Skip to content

Commit e3c72a8

Browse files
authored
Scrape CSS-in-JS, reduce dependencies, normalize url (projectwallace#10)
1 parent b6bde41 commit e3c72a8

File tree

4 files changed

+127
-875
lines changed

4 files changed

+127
-875
lines changed

dev.js

Lines changed: 0 additions & 6 deletions
This file was deleted.

index.js

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,81 @@
11
const got = require('got')
2-
const puppeteer = require('puppeteer-core')
3-
const chrome = require('chrome-aws-lambda')
4-
const extractCss = require('extract-css-core')
2+
const chromium = require('chrome-aws-lambda')
3+
const normalizeUrl = require('normalize-url')
4+
5+
const extractCss = async url => {
6+
const browser = await chromium.puppeteer.launch({
7+
executablePath: await chromium.executablePath,
8+
args: chromium.args,
9+
headless: chromium.headless
10+
})
11+
12+
const page = await browser.newPage()
13+
14+
// // Start CSS coverage. This is the meat and bones of this module
15+
await page.coverage.startCSSCoverage()
16+
17+
const response = await page.goto(url, { waitUntil: 'networkidle2' })
18+
19+
// Make sure that we only try to extract CSS from valid pages.
20+
// Bail out if the response is an invalid request (400, 500)
21+
if (response.status() >= 400) {
22+
await browser.close() // Don't leave any resources behind
23+
24+
return Promise.reject(
25+
new Error(
26+
`There was an error retrieving CSS from ${url}.\n\tHTTP status code: ${
27+
response.statusCode
28+
} (${response.statusText})`
29+
)
30+
)
31+
}
32+
33+
// // Coverage contains a lot of <style> and <link> CSS,
34+
// // but not all...
35+
const coverage = await page.coverage.stopCSSCoverage()
36+
37+
// Get all CSS generated with the CSSStyleSheet API
38+
// See: https://developer.mozilla.org/en-US/docs/Web/API/CSSRule/cssText
39+
const styleSheetsApiCss = await page.evaluate(() => {
40+
/* global document */
41+
return [...document.styleSheets]
42+
.filter(stylesheet => stylesheet.href === null)
43+
.map(stylesheet =>
44+
[...stylesheet.cssRules]
45+
.map(cssStyleRule => cssStyleRule.cssText)
46+
.join('')
47+
)
48+
.join('')
49+
})
50+
51+
await browser.close()
52+
53+
// Turn the coverage Array into a single string of CSS
54+
const coverageCss = coverage
55+
// Filter out the <style> tags that were found in the coverage
56+
// report since we've conducted our own search for them.
57+
// A coverage CSS item with the same url as the url of the page
58+
// we requested is an indication that this was a <style> tag
59+
.filter(styles => styles.url !== url)
60+
// The `text` property contains the actual CSS
61+
.map(({ text }) => text)
62+
.join('')
63+
64+
return Promise.resolve(styleSheetsApiCss + coverageCss)
65+
}
566

667
module.exports = async (req, res) => {
7-
const url = req.url.slice(1)
68+
const url = normalizeUrl(req.url.slice(1), { stripWWW: false })
869

970
try {
1071
const css = url.endsWith('.css')
1172
? (await got(url)).body
12-
: await extractCss(url, {
13-
waitUntil: 'networkidle0',
14-
browserOverride: {
15-
executablePath: await chrome.executablePath,
16-
puppeteer,
17-
args: chrome.args
18-
}
19-
})
20-
res.statusCode = 200
73+
: await extractCss(url)
74+
2175
res.setHeader('Content-Type', 'text/css')
22-
return res.end(css)
76+
77+
return res.status(200).send(css)
2378
} catch (error) {
24-
res.statusCode = 400
25-
res.setHeader('Content-Type', 'application/json')
26-
return res.end(JSON.stringify(error, null, 2))
79+
return res.status(400).json(error)
2780
}
2881
}

0 commit comments

Comments
 (0)