-
Notifications
You must be signed in to change notification settings - Fork 0
/
check-md-links.js
127 lines (118 loc) · 3.59 KB
/
check-md-links.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env node
/**
* Run "./md-github-links-verifier.js" to scan all MD files in all
* subdirectories, find broken GitHub links and write a report
* to "broken-links.json"
*/
const { readdirSync, statSync, readFileSync, writeFileSync } = require('fs');
const { join, extname } = require('path');
const axios = require('axios');
const markdownLinkExtractor = require('markdown-link-extractor');
const excludes = ['node_modules'];
const brokenLinksFile = 'broken-links.json';
function getAllMdFiles(dirPath = '../', arrayOfFiles = []) {
let files = [...arrayOfFiles];
readdirSync(dirPath).forEach((file) => {
if (
statSync(`${dirPath}/${file}`).isDirectory() &&
!excludes.includes(file)
) {
files = getAllMdFiles(`${dirPath}/${file}`, files);
} else if (extname(file) === '.md') {
files.push(join(__dirname, '../', dirPath, '/', file));
}
});
return files;
}
function getGitHubLinks(dirPath = '../') {
const mdFiles = getAllMdFiles(dirPath);
const githubLinks = [];
mdFiles.forEach((mdFile) => {
const md = readFileSync(mdFile, 'utf8');
const urls = markdownLinkExtractor(md);
urls.forEach((url) => {
if (
url.startsWith('https://github.com/') &&
// unless the item is already in the array
!githubLinks.some((item) => item.mdFile === mdFile && item.url === url)
) {
githubLinks.push({ mdFile, url });
}
});
});
return githubLinks;
}
const delay = (ms = 1000) =>
new Promise((r) => {
setTimeout(r, ms);
});
const deleteObject = (array = [], object = { url: '', mdFile: '' }) => {
const index = array.findIndex(
(element) => element.url === object.url && element.mdFile === object.mdFile,
);
if (index !== -1) array.splice(index, 1);
};
const printItem = (item) => {
console.log(item.mdFile);
console.log(item.url);
};
async function getBrokenLinksConcurrently(
items = [],
concurrency = 3,
pauseTime = 10000,
) {
const itemsCopy = [...items];
let brokenLinks = [];
const fetchInChunks = async () => {
if (itemsCopy.length > 0) {
const chunk = itemsCopy.slice(0, concurrency);
const promises = chunk.map((item) => axios.get(item.url, { item }));
const results = await Promise.allSettled(promises);
let tooManyRequests = false;
for (let result of results) {
let item;
if (result.status === 'rejected') {
const errorStatus = result?.reason?.response?.status;
if (errorStatus === 429) {
console.log(`Too many requests. Waiting ${pauseTime} ms.`);
tooManyRequests = true;
break;
} else {
item = result.reason.config.item;
printItem(item);
console.log('Broken');
item.status = errorStatus;
brokenLinks.push(item);
}
} else {
item = result.value.config.item;
printItem(item);
console.log('OK');
}
deleteObject(itemsCopy, item);
}
if (tooManyRequests) {
await delay(pauseTime);
}
await fetchInChunks();
}
};
await fetchInChunks();
return brokenLinks;
}
async function findBrokenLinks() {
try {
const githubLinks = getGitHubLinks();
console.log(`Verifying ${githubLinks.length} links`);
const brokenLinks = await getBrokenLinksConcurrently(githubLinks);
console.log(`Writing broken links to ${brokenLinksFile}`);
writeFileSync(
join(__dirname, brokenLinksFile),
JSON.stringify(brokenLinks),
'utf8',
);
} catch (error) {
console.error('Error', error);
}
}
findBrokenLinks();