Skip to content

Commit 69ecc54

Browse files
authored
keep a locally cached Page instance (projectwallace#18)
* keep a locally cached Page instance * use puppeteer 1.19.0
1 parent 4cc79c1 commit 69ecc54

File tree

6 files changed

+115
-282
lines changed

6 files changed

+115
-282
lines changed

.nowignore

Lines changed: 0 additions & 5 deletions
This file was deleted.

_chromium.js

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
const puppeteer = require('puppeteer-core')
2+
const chrome = require('chrome-aws-lambda')
3+
const exePath = process.platform === 'win32' ?
4+
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' :
5+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
6+
7+
const isDev = process.env.NOW_REGION === 'dev1'
8+
9+
async function getOptions() {
10+
if (isDev) {
11+
return {
12+
args: [],
13+
executablePath: exePath,
14+
headless: true
15+
}
16+
}
17+
18+
return {
19+
args: chrome.args,
20+
executablePath: await chrome.executablePath,
21+
headless: chrome.headless
22+
}
23+
}
24+
25+
// Keep a locally cached 'page' object so that we
26+
// don't have to request the browser instance to
27+
// create a new one for each request.
28+
let _page
29+
30+
async function getPage() {
31+
if (_page) {
32+
return _page
33+
}
34+
35+
const options = await getOptions()
36+
const browser = await puppeteer.launch(options)
37+
_page = await browser.newPage() // eslint-disable-line
38+
return _page
39+
}
40+
41+
exports.extractCss = async url => {
42+
const page = await getPage()
43+
44+
// Start CSS coverage. This is the meat and bones of this module
45+
await page.coverage.startCSSCoverage()
46+
47+
const response = await page.goto(url, {waitUntil: 'networkidle2'})
48+
49+
// Make sure that we only try to extract CSS from valid pages.
50+
// Bail out if the response is an invalid request (400, 500)
51+
if (response.status() >= 400) {
52+
return Promise.reject(
53+
new Error(
54+
`There was an error retrieving CSS from ${url}.\n\tHTTP status code: ${response.statusCode} (${response.statusText})`
55+
)
56+
)
57+
}
58+
59+
// Coverage contains a lot of <style> and <link> CSS,
60+
// but not all...
61+
const coverage = await page.coverage.stopCSSCoverage()
62+
63+
// Get all CSS generated with the CSSStyleSheet API
64+
// See: https://developer.mozilla.org/en-US/docs/Web/API/CSSRule/cssText
65+
const styleSheetsApiCss = await page.evaluate(() => {
66+
/* global document */
67+
return [...document.styleSheets]
68+
.filter(stylesheet => stylesheet.href === null)
69+
.map(stylesheet =>
70+
[...stylesheet.cssRules]
71+
.map(cssStyleRule => cssStyleRule.cssText)
72+
.join('')
73+
)
74+
.join('')
75+
})
76+
77+
// Turn the coverage Array into a single string of CSS
78+
const coverageCss = coverage
79+
// Filter out the <style> tags that were found in the coverage
80+
// report since we've conducted our own search for them.
81+
// A coverage CSS item with the same url as the url of the page
82+
// we requested is an indication that this was a <style> tag
83+
.filter(styles => styles.url !== url)
84+
// The `text` property contains the actual CSS
85+
.map(({text}) => text)
86+
.join('')
87+
88+
return Promise.resolve(styleSheetsApiCss + coverageCss)
89+
}

index.js

Lines changed: 9 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,43 @@
1-
const got = require('got')
2-
const chromium = require('chrome-aws-lambda')
31
const normalizeUrl = require('normalize-url')
42
const isUrl = require('is-url')
53
const LRU = require('lru-cache')
4+
const {extractCss} = require('./_chromium')
65

76
const cssCache = new LRU({
87
max: 500,
9-
maxAge: 60 * 1000
8+
maxAge: 60 * 1000 // 1 minute
109
})
1110

12-
const extractCss = async url => {
13-
const browser = await chromium.puppeteer.launch({
14-
executablePath: await chromium.executablePath,
15-
args: chromium.args,
16-
headless: true
17-
})
18-
19-
const page = await browser.newPage()
20-
21-
// Start CSS coverage. This is the meat and bones of this module
22-
await page.coverage.startCSSCoverage()
23-
24-
const response = await page.goto(url, {waitUntil: 'networkidle2'})
25-
26-
// Make sure that we only try to extract CSS from valid pages.
27-
// Bail out if the response is an invalid request (400, 500)
28-
if (response.status() >= 400) {
29-
await browser.close() // Don't leave any resources behind
30-
31-
return Promise.reject(
32-
new Error(
33-
`There was an error retrieving CSS from ${url}.\n\tHTTP status code: ${response.statusCode} (${response.statusText})`
34-
)
35-
)
36-
}
37-
38-
// Coverage contains a lot of <style> and <link> CSS,
39-
// but not all...
40-
const coverage = await page.coverage.stopCSSCoverage()
41-
42-
// Get all CSS generated with the CSSStyleSheet API
43-
// See: https://developer.mozilla.org/en-US/docs/Web/API/CSSRule/cssText
44-
const styleSheetsApiCss = await page.evaluate(() => {
45-
/* global document */
46-
return [...document.styleSheets]
47-
.filter(stylesheet => stylesheet.href === null)
48-
.map(stylesheet =>
49-
[...stylesheet.cssRules]
50-
.map(cssStyleRule => cssStyleRule.cssText)
51-
.join('')
52-
)
53-
.join('')
54-
})
55-
56-
await browser.close()
57-
58-
// Turn the coverage Array into a single string of CSS
59-
const coverageCss = coverage
60-
// Filter out the <style> tags that were found in the coverage
61-
// report since we've conducted our own search for them.
62-
// A coverage CSS item with the same url as the url of the page
63-
// we requested is an indication that this was a <style> tag
64-
.filter(styles => styles.url !== url)
65-
// The `text` property contains the actual CSS
66-
.map(({text}) => text)
67-
.join('')
68-
69-
return Promise.resolve(styleSheetsApiCss + coverageCss)
70-
}
71-
7211
module.exports = async (req, res) => {
7312
const url = normalizeUrl(req.url.slice(1), {stripWWW: false})
7413

7514
if (!isUrl(url)) {
7615
res.statusCode = 406
7716
res.setHeader('Content-Type', 'application/json')
17+
7818
return res.end(
7919
JSON.stringify({
8020
message: 'The provided URL is not valid'
8121
})
8222
)
8323
}
8424

25+
res.setHeader('Content-Type', 'text/css')
26+
res.statusCode = 200
27+
8528
if (cssCache.has(url)) {
86-
res.setHeader('Content-Type', 'text/css')
87-
res.statusCode = 200
8829
return res.end(cssCache.get(url))
8930
}
9031

9132
try {
92-
const css = url.endsWith('.css') ?
93-
(await got(url)).body :
94-
await extractCss(url)
95-
96-
res.setHeader('Content-Type', 'text/css')
97-
res.statusCode = 200
33+
const css = await extractCss(url)
9834
cssCache.set(url, css)
35+
9936
return res.end(css)
10037
} catch (error) {
10138
res.statusCode = 500
10239
res.setHeader('Content-Type', 'application/json')
40+
10341
return res.end(JSON.stringify(error))
10442
}
10543
}

0 commit comments

Comments
 (0)