-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathwords.js
More file actions
53 lines (42 loc) · 1.15 KB
/
Copy pathwords.js
File metadata and controls
53 lines (42 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env node
const path = require('path');
const log = require('verbalize');
const cheerio = require('cheerio');
const request = require('request');
const file = require('fs-utils');
log.runner = 'repos';
var options = {
url: 'https://en.wikipedia.org/wiki/Most_common_words_in_English',
headers: {
'User-Agent': 'request'
}
};
log.writeln();
log.inform('reading', options.url);
var dest = 'words.json';
function callback(err, response, body) {
if (!err && response.statusCode === 200) {
var $ = cheerio.load(body);
var content = '';
// Iterate over TR elements in the Wikipedia infobox
$("table.wikitable tr").each(function (i, ele) {
content += $(this).find("td").text();
content += '\n';
});
var words = [];
content.replace(/([\d]+)([\S]+)/g, function(match, num, word) {
words = words.concat({
rank: num,
word: word
});
});
file.writeJSONSync(dest, words);
dest = path.relative(process.cwd(), dest).replace(/\\/g, '/');
log.inform('writing', dest);
} else {
log.error(err);
}
// Success message.
log.done('done');
}
request(options, callback);