Skip to content

Commit a1d1113

Browse files
committed
Add support for scraping CSS-in-JS based styles
1 parent 5576b77 commit a1d1113

10 files changed

+64
-42
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"/Users/bartveneman/www/projectwallace/modules/extract-css-core/src/index.js":{"path":"/Users/bartveneman/www/projectwallace/modules/extract-css-core/src/index.js","statementMap":{"0":{"start":{"line":1,"column":18},"end":{"line":1,"column":38}},"1":{"start":{"line":4,"column":1},"end":{"line":4,"column":30}},"2":{"start":{"line":5,"column":1},"end":{"line":5,"column":115}},"3":{"start":{"line":8,"column":0},"end":{"line":8,"column":43}},"4":{"start":{"line":10,"column":0},"end":{"line":78,"column":1}},"5":{"start":{"line":14,"column":1},"end":{"line":21,"column":2}},"6":{"start":{"line":18,"column":2},"end":{"line":20,"column":3}},"7":{"start":{"line":24,"column":17},"end":{"line":24,"column":60}},"8":{"start":{"line":27,"column":14},"end":{"line":27,"column":37}},"9":{"start":{"line":30,"column":1},"end":{"line":30,"column":39}},"10":{"start":{"line":31,"column":18},"end":{"line":31,"column":51}},"11":{"start":{"line":35,"column":1},"end":{"line":45,"column":2}},"12":{"start":{"line":36,"column":2},"end":{"line":36,"column":23}},"13":{"start":{"line":38,"column":2},"end":{"line":44,"column":3}},"14":{"start":{"line":49,"column":18},"end":{"line":49,"column":55}},"15":{"start":{"line":53,"column":27},"end":{"line":62,"column":3}},"16":{"start":{"line":55,"column":2},"end":{"line":61,"column":12}},"17":{"start":{"line":57,"column":4},"end":{"line":59,"column":14}},"18":{"start":{"line":58,"column":26},"end":{"line":58,"column":46}},"19":{"start":{"line":64,"column":1},"end":{"line":64,"column":22}},"20":{"start":{"line":67,"column":21},"end":{"line":75,"column":11}},"21":{"start":{"line":72,"column":20},"end":{"line":72,"column":38}},"22":{"start":{"line":74,"column":19},"end":{"line":74,"column":23}},"23":{"start":{"line":77,"column":1},"end":{"line":77,"column":56}}},"fnMap":{"0":{"name":"InvalidUrlError","decl":{"start":{"line":3,"column":9},"end":{"line":3,"column":24}},"loc":{"start":{"line":3,"column":56},"end":{"line":6,"column":1}},"line":3},"1":{"name":"(anonymous_1)","decl":{"start":{"line":10,"column":17},"end":{"line":10,"column":18}},"loc":{"start":{"line":13,"column":5},"end":{"line":78,"column":1}},"line":13},"2":{"name":"(anonymous_2)","decl":{"start":{"line":53,"column":47},"end":{"line":53,"column":48}},"loc":{"start":{"line":53,"column":53},"end":{"line":62,"column":2}},"line":53},"3":{"name":"(anonymous_3)","decl":{"start":{"line":56,"column":8},"end":{"line":56,"column":9}},"loc":{"start":{"line":57,"column":4},"end":{"line":59,"column":14}},"line":57},"4":{"name":"(anonymous_4)","decl":{"start":{"line":58,"column":10},"end":{"line":58,"column":11}},"loc":{"start":{"line":58,"column":26},"end":{"line":58,"column":46}},"line":58},"5":{"name":"(anonymous_5)","decl":{"start":{"line":72,"column":10},"end":{"line":72,"column":11}},"loc":{"start":{"line":72,"column":20},"end":{"line":72,"column":38}},"line":72},"6":{"name":"(anonymous_6)","decl":{"start":{"line":74,"column":7},"end":{"line":74,"column":8}},"loc":{"start":{"line":74,"column":19},"end":{"line":74,"column":23}},"line":74}},"branchMap":{"0":{"loc":{"start":{"line":12,"column":1},"end":{"line":12,"column":56}},"type":"default-arg","locations":[{"start":{"line":12,"column":54},"end":{"line":12,"column":56}}],"line":12},"1":{"loc":{"start":{"line":12,"column":2},"end":{"line":12,"column":28}},"type":"default-arg","locations":[{"start":{"line":12,"column":14},"end":{"line":12,"column":28}}],"line":12},"2":{"loc":{"start":{"line":12,"column":30},"end":{"line":12,"column":50}},"type":"default-arg","locations":[{"start":{"line":12,"column":46},"end":{"line":12,"column":50}}],"line":12},"3":{"loc":{"start":{"line":14,"column":1},"end":{"line":21,"column":2}},"type":"if","locations":[{"start":{"line":14,"column":1},"end":{"line":21,"column":2}},{"start":{"line":14,"column":1},"end":{"line":21,"column":2}}],"line":14},"4":{"loc":{"start":{"line":15,"column":2},"end":{"line":16,"column":62}},"type":"binary-expr","locations":[{"start":{"line":15,"column":2},"end":{"line":15,"column":24}},{"start":{"line":16,"column":3},"end":{"line":16,"column":29}},{"start":{"line":16,"column":33},"end":{"line":16,"column":61}}],"line":15},"5":{"loc":{"start":{"line":24,"column":17},"end":{"line":24,"column":60}},"type":"binary-expr","locations":[{"start":{"line":24,"column":17},"end":{"line":24,"column":30}},{"start":{"line":24,"column":35},"end":{"line":24,"column":59}}],"line":24},"6":{"loc":{"start":{"line":35,"column":1},"end":{"line":45,"column":2}},"type":"if","locations":[{"start":{"line":35,"column":1},"end":{"line":45,"column":2}},{"start":{"line":35,"column":1},"end":{"line":45,"column":2}}],"line":35}},"s":{"0":1,"1":1,"2":1,"3":1,"4":1,"5":10,"6":1,"7":9,"8":9,"9":9,"10":9,"11":8,"12":1,"13":1,"14":7,"15":7,"16":0,"17":0,"18":0,"19":0,"20":0,"21":0,"22":0,"23":0},"f":{"0":1,"1":10,"2":0,"3":0,"4":0,"5":0,"6":0},"b":{"0":[6],"1":[8],"2":[8],"3":[1,9],"4":[10,2,1],"5":[9,8],"6":[1,7]},"_coverageSchema":"43e27e138ebf9cfc5966b082cf9a028302ed4184","hash":"da844700a4f3d37989bef401a3b224cdb7987c35","contentHash":"82b903d865c7d43873bd0d3d7e3e8818b949355438f7f67188dd471700fd27ab"}}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"uuid":"09c5e365-77b7-49ac-ae2b-2b7d3158399a","parent":null,"pid":9437,"argv":["/usr/local/bin/node","/Users/bartveneman/www/projectwallace/modules/extract-css-core/node_modules/.bin/ava","test"],"execArgv":[],"cwd":"/Users/bartveneman/www/projectwallace/modules/extract-css-core","time":1562274553600,"ppid":9436,"root":"a1283c4c-7e12-4880-86a0-70e9ef5b71fe","coverageFilename":"/Users/bartveneman/www/projectwallace/modules/extract-css-core/.nyc_output/09c5e365-77b7-49ac-ae2b-2b7d3158399a.json","files":[]}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"uuid":"5df11076-ceea-47ff-8dba-9fde7457ec74","parent":"09c5e365-77b7-49ac-ae2b-2b7d3158399a","pid":9439,"argv":["/usr/local/bin/node","/Users/bartveneman/www/projectwallace/modules/extract-css-core/node_modules/ava/lib/worker/subprocess.js","undefined"],"execArgv":[],"cwd":"/Users/bartveneman/www/projectwallace/modules/extract-css-core","time":1562274559373,"ppid":9437,"root":"a1283c4c-7e12-4880-86a0-70e9ef5b71fe","coverageFilename":"/Users/bartveneman/www/projectwallace/modules/extract-css-core/.nyc_output/5df11076-ceea-47ff-8dba-9fde7457ec74.json","files":["/Users/bartveneman/www/projectwallace/modules/extract-css-core/src/index.js"]}

.nyc_output/processinfo/index.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"processes":{"09c5e365-77b7-49ac-ae2b-2b7d3158399a":{"parent":null,"children":["5df11076-ceea-47ff-8dba-9fde7457ec74"]},"5df11076-ceea-47ff-8dba-9fde7457ec74":{"parent":"09c5e365-77b7-49ac-ae2b-2b7d3158399a","children":[]}},"files":{"/Users/bartveneman/www/projectwallace/modules/extract-css-core/src/index.js":["5df11076-ceea-47ff-8dba-9fde7457ec74"]},"externalIds":{}}

src/index.js

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ module.exports = async (
2626
// Create a new page and navigate to it
2727
const page = await browser.newPage()
2828

29-
// Start CSS coverage. This is the meat and bones of this module
29+
// // Start CSS coverage. This is the meat and bones of this module
3030
await page.coverage.startCSSCoverage()
3131
const response = await page.goto(url, {waitUntil})
3232

@@ -48,17 +48,18 @@ module.exports = async (
4848
// but not all...
4949
const coverage = await page.coverage.stopCSSCoverage()
5050

51-
// Fetch all <style> tags from the page, because the coverage
52-
// API may have missed some JS-generated <style> tags.
53-
// Some of them *were* already caught by the coverage API,
54-
// but they will be removed later on to prevent duplicates.
55-
const styleTagsCss = (await page.$$eval('style', styles => {
56-
// Get the text inside each <style> tag and trim() the
57-
// results to prevent all the inside-html indentation
58-
// clogging up the results and making it look
59-
// bigger than it actually is
60-
return styles.map(style => style.innerHTML.trim())
61-
})).join('')
51+
// Get all CSS generated with the CSSStyleSheet API
52+
// See: https://developer.mozilla.org/en-US/docs/Web/API/CSSRule/cssText
53+
const styleSheetsApiCss = await page.evaluate(() => {
54+
/* global document */
55+
return [...document.styleSheets]
56+
.map(stylesheet =>
57+
[...stylesheet.cssRules]
58+
.map(cssStyleRule => cssStyleRule.cssText)
59+
.join('')
60+
)
61+
.join('')
62+
})
6263

6364
await browser.close()
6465

@@ -73,5 +74,5 @@ module.exports = async (
7374
.map(({text}) => text)
7475
.join('')
7576

76-
return Promise.resolve(coverageCss + styleTagsCss)
77+
return Promise.resolve(styleSheetsApiCss + coverageCss)
7778
}

test/index.js

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ const puppeteerCore = require('puppeteer-core')
88
const extractCss = require('..')
99

1010
let server
11-
const expected = readFileSync(resolve(__dirname, 'fixture.css'), 'utf8')
11+
const fixture = readFileSync(resolve(__dirname, 'fixture.css'), 'utf8')
1212

1313
test.before(async () => {
1414
server = await createTestServer()
1515

1616
server.get('/fixture.css', (req, res) => {
17-
res.send(expected)
17+
res.send(fixture)
1818
})
1919
})
2020

@@ -27,11 +27,16 @@ test('it fetches css from a page with CSS in a server generated <link> inside th
2727
server.get(url, (req, res) => {
2828
res.send(`
2929
<!doctype html>
30-
<link rel="stylesheet" href="fixture.css" />
30+
<html>
31+
<head>
32+
<link rel="stylesheet" href="fixture.css" />
33+
</head>
34+
</html>
3135
`)
3236
})
3337

3438
const actual = await extractCss(server.url + url)
39+
const expected = fixture
3540

3641
t.is(actual, expected)
3742
})
@@ -41,13 +46,14 @@ test('it fetches css from a page with CSS in server generated <style> inside the
4146
server.get(url, (req, res) => {
4247
res.send(`
4348
<!doctype html>
44-
<style>${expected.trim()}</style>
49+
<style>${fixture}</style>
4550
`)
4651
})
4752

4853
const actual = await extractCss(server.url + url)
54+
const expected = 'body { color: teal; }'
4955

50-
t.is(actual, expected.trim())
56+
t.is(actual, expected)
5157
})
5258

5359
test('it finds JS generated <link /> CSS', async t => {
@@ -62,6 +68,7 @@ test('it finds JS generated <link /> CSS', async t => {
6268
})
6369

6470
const actual = await extractCss(server.url + path)
71+
const expected = fixture
6572

6673
t.is(actual, expected)
6774
})
@@ -77,7 +84,26 @@ test('it finds JS generated <style /> CSS', async t => {
7784
})
7885

7986
const actual = await extractCss(server.url + url, {waitUntil: 'load'})
80-
const expected = `body { color: teal; }`
87+
const expected = 'body { color: teal; }'
88+
89+
t.is(actual, expected)
90+
})
91+
92+
test('it finds css-in-js, like Styled Components', async t => {
93+
const url = '/css-in-js'
94+
const cssInJsExampleHtml = readFileSync(
95+
resolve(__dirname, 'css-in-js.html'),
96+
'utf8'
97+
)
98+
server.get(url, (req, res) => {
99+
res.send(cssInJsExampleHtml)
100+
})
101+
102+
const actual = await extractCss(server.url + url, {waitUntil: 'load'})
103+
// Color is RGB instead of Hex, because of serialization:
104+
// https://www.w3.org/TR/cssom-1/#serializing-css-values
105+
const expected =
106+
'html { color: rgb(255, 0, 0); }.hJHBhT { color: blue; font-family: sans-serif; font-size: 3em; }'
81107

82108
t.is(actual, expected)
83109
})
@@ -94,9 +120,11 @@ test('it combines server generated <link> and <style> tags with client side crea
94120

95121
const actual = await extractCss(server.url + path)
96122

123+
t.true(actual.includes('content: "js-style";'))
124+
t.true(actual.includes('content: "server-style";'))
125+
t.true(actual.includes(`body {`))
126+
t.true(actual.includes(`color: teal;`))
97127
t.snapshot(actual)
98-
t.true(actual.includes('counter-increment: 2;'))
99-
t.true(actual.includes('counter-increment: 3;'))
100128
})
101129

102130
test('it rejects if the url has an HTTP error status', async t => {
@@ -116,20 +144,17 @@ test('it accepts a browser override for usage with other browsers', async t => {
116144
res.send(`
117145
<!doctype html>
118146
<style>
119-
body::before {
120-
content: ${req.headers['user-agent']};
121-
}
147+
body::before { content: "${req.headers['user-agent']}"; }
122148
</style>
123149
`)
124150
})
125151
const customBrowser = await puppeteerCore.launch({
126152
executablePath: chromium.path,
127-
args: ["--user-agent='Extract CSS Core'"]
153+
args: ['--user-agent=Extract CSS Core']
128154
})
129155
const actual = await extractCss(server.url + path, {customBrowser})
130156

131-
t.snapshot(actual)
132-
t.true(actual.includes("content: 'Extract CSS Core';"))
157+
t.is(actual, 'body::before { content: "Extract CSS Core"; }')
133158
})
134159

135160
test('it rejects on an invalid customBrowser option', async t => {

test/kitchen-sink.html

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,18 @@
99

1010
<!-- Server generated style -->
1111
<style>
12-
.server-style {
13-
counter-increment: 2;
12+
.server-style::after {
13+
content: 'server-style';
1414
}
1515
</style>
1616

1717
<h1>Title</h1>
18+
<div class="server-style">server-style:</div>
1819

1920
<script>
2021
// Client generated style
2122
var style = document.createElement('style')
22-
style.textContent = '.js-style { counter-increment: 3; }'
23+
style.textContent = '.js-style::after { content: "js-style"; }'
2324
document.body.appendChild(style)
2425

2526
// Client generated link

test/snapshots/index.js.md

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,14 @@ The actual snapshot is saved in `index.js.snap`.
44

55
Generated by [AVA](https://ava.li).
66

7-
## it accepts a browser override for usage with other browsers
8-
9-
> Snapshot 1
10-
11-
`body::before {␊
12-
content: 'Extract CSS Core';␊
13-
}`
14-
157
## it combines server generated <link> and <style> tags with client side created <link> and <style> tags
168

179
> Snapshot 1
1810
19-
`body {␊
11+
`.server-style::after { content: "server-style"; }.js-style::after { content: "js-style"; }body {␊
2012
color: teal;␊
2113
}␊
2214
body {␊
2315
color: teal;␊
2416
}␊
25-
.server-style {␊
26-
counter-increment: 2;␊
27-
}.js-style { counter-increment: 3; }`
17+
`

test/snapshots/index.js.snap

-66 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)