diff --git a/package.json b/package.json index 3d3bceee..874be5af 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "dependencies": { "cheerio": "1.0.0-rc.2", "css-tree": "1.0.0-alpha.28", - "csso": "~3.5.0", + "csso": "ianvonholt/csso", "filesize": "^3.5.11", "minimist": "^1.2.0", "puppeteer": "^1.4.0" diff --git a/src/run.js b/src/run.js index 3e154145..44f7a8d7 100644 --- a/src/run.js +++ b/src/run.js @@ -1,10 +1,12 @@ 'use strict'; +// @ts-ignore const puppeteer = require('puppeteer'); // @ts-ignore const csso = require('csso'); // @ts-ignore const csstree = require('css-tree'); +// @ts-ignore const cheerio = require('cheerio'); const utils = require('./utils'); const { createTracker } = require('./tracker'); @@ -12,6 +14,8 @@ const url = require('url'); const isOk = response => response.ok() || response.status() === 304; +const skippableResourceTypes = ['font', 'media', 'ping', 'xhr']; + /** * Take in a csstree AST, mutate it and return a csstree AST. * The mutation is about: @@ -126,7 +130,7 @@ const processPage = ({ let fulfilledPromise = false; const tracker = createTracker(page); - const safeReject = error => { + const safeReject = (error, fatal) => { if (!fulfilledPromise) { if (error.message.startsWith('Navigation Timeout Exceeded')) { const urls = tracker.urls(); @@ -138,8 +142,12 @@ const processPage = ({ error.message += `\nFor ${urls[0]}`; } } - tracker.dispose(); - reject(error); + if (fatal) { + tracker.dispose(); + reject(error); + } else { + console.warn(error); + } } }; @@ -181,7 +189,7 @@ const processPage = ({ request.abort(); } else if (!loadimages && resourceType === 'image') { request.abort(); - } else if (resourceType === 'font') { + } else if (skippableResourceTypes.includes(resourceType)) { request.abort(); } else if (stylesheetAsts[requestUrl]) { // no point downloading this again @@ -203,6 +211,12 @@ const processPage = ({ page.on('response', response => { const responseUrl = response.url(); const resourceType = response.request().resourceType(); + + // Log third-party content for possible future blacklisting + if (!responseUrl.startsWith(pageUrl)) { + console.log(resourceType, responseUrl); + } + if (response.status() >= 400) { return safeReject( new Error(`${response.status()} on ${responseUrl}`) @@ -218,6 +232,10 @@ const processPage = ({ redirectResponses[responseUrl] = redirectsTo; } else if (resourceType === 'stylesheet') { response.text().then(text => { + // Double semicolons can crash csso. + while (/;\s*;/.test(text)) { + text = text.replace(/;\s*;/g, ';'); + } const ast = csstree.parse(text); csstree.walk(ast, node => { if (node.type === 'Url') { @@ -264,7 +282,10 @@ const processPage = ({ await page.setJavaScriptEnabled(false); response = await page.goto(pageUrl); if (!isOk(response)) { - return safeReject(new Error(`${response.status()} on ${pageUrl}`)); + return safeReject( + new Error(`${response.status()} on ${pageUrl}`), + true // fatal + ); } const htmlVanilla = await page.content(); doms.push(cheerio.load(htmlVanilla)); @@ -288,7 +309,8 @@ const processPage = ({ response = await page.goto(pageUrl, { waitUntil: 'networkidle0' }); if (!isOk(response)) { return safeReject( - new Error(`${response.status()} on ${pageUrl} (second time)`) + new Error(`${response.status()} on ${pageUrl} (second time)`), + true // fatal ); } const evalWithJavascript = await page.evaluate(() => { @@ -306,7 +328,10 @@ const processPage = ({ link.media !== 'print' && !link.href.toLowerCase().startsWith('data:') ) { - hrefs.push(link.href); + // Fragments are omitted from request.url() and response.url(), + // so we also need to strip them here, otherwise the hrefs + // won't always match when we check for missing ASTs. + hrefs.push(link.href.replace(/#.*$/, '')); } }); return { @@ -333,7 +358,7 @@ const processPage = ({ resolve(); } } catch (e) { - return safeReject(e); + return safeReject(e, true); // fatal } }); @@ -405,8 +430,8 @@ const minimalcss = async options => { ); }); if (missingASTs.length) { - throw new Error( - `Found stylesheets that failed to download (${missingASTs})` + console.warn( + `Found stylesheets that failed to download: (${missingASTs})` ); } @@ -445,6 +470,9 @@ const minimalcss = async options => { } const ast = stylesheetAsts[href]; + // Missing stylesheet would crash csstree. + if (!ast) return; + csstree.walk(ast, { visit: 'Rule', enter: function(node, item, list) { @@ -456,6 +484,12 @@ const minimalcss = async options => { return; } + if (!node.prelude.children) { + // delete malformed rule + list.remove(item); + return; + } + node.prelude.children.forEach((node, item, list) => { // Translate selector's AST to a string and filter pseudos from it // This changes things like `a.button:active` to `a.button`