From 32b3b376f9709d7f329bf50fd76481f606bf3f18 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Sun, 20 Sep 2015 16:48:06 -0700 Subject: [PATCH 1/2] ability to skip and ignore --- README.rst | 23 +++++++++++++++++ mincss/processor.py | 50 +++++++++++++++++++++++++++++++------ tests/ignore-inline.html | 19 ++++++++++++++ tests/ignore-link.html | 13 ++++++++++ tests/no-mincss-inline.html | 19 ++++++++++++++ tests/no-mincss-link.html | 13 ++++++++++ tests/test_mincss.py | 37 +++++++++++++++++++++++++++ tests/three-links.html | 20 +++++++++++++++ 8 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 tests/ignore-inline.html create mode 100644 tests/ignore-link.html create mode 100644 tests/no-mincss-inline.html create mode 100644 tests/no-mincss-link.html create mode 100644 tests/three-links.html diff --git a/README.rst b/README.rst index 3476fbc..7f33bf0 100644 --- a/README.rst +++ b/README.rst @@ -71,3 +71,26 @@ this for example: } That tells ``mincss`` to ignore the whole block and all its selectors. + +Ignore CSS +---------- + +By default, ``mincss`` will find all ```` tags and process them. If you have a link or an inline +tag that you don't want ``mincss`` to even notice, simply add this attribute +to the tag: + +:: + + data-mincss="ignore" + +Leave CSS as is +--------------- + +One technique to have a specific CSS selector be ignored by ``mincss`` is to +put in a comment like ``/* no mincss */`` inside the CSS selectors +block. + +Another way is to leave the whole stylesheet as is. The advantage of doing +this is if you have a ``link`` or ``style`` tag that you want ``mincss`` +to ignore but still find and include in the parsed result. diff --git a/mincss/processor.py b/mincss/processor.py index 7e1e04a..3bfa4e2 100644 --- a/mincss/processor.py +++ b/mincss/processor.py @@ -26,6 +26,9 @@ unicode = str +INLINE = 'inline' +LINK = 'link' + RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE) RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M) RE_CLASS_DEF = re.compile('\.([\w-]+)') @@ -136,22 +139,26 @@ def process(self, *urls): for url in urls: self.process_url(url) - for identifier in sorted(self.blocks.keys(), key=lambda x: str(x[0])): + for identifier in sorted(self.blocks.keys()): content = self.blocks[identifier] processed = self._process_content(content, self._bodies) - if isinstance(identifier[0], int): - line, url = identifier + if identifier[1] == INLINE: + line, _, url, no_mincss = identifier + if no_mincss: + processed = content self.inlines.append( InlineResult( line, url, content, - processed + processed, ) ) else: - url, href = identifier + _, _, url, href, no_mincss = identifier + if no_mincss: + processed = content self.links.append( LinkResult( href, @@ -199,19 +206,44 @@ def process_html(self, html, url): # happend when the style tag has absolute nothing it # not even whitespace continue - for i, line in enumerate(lines): + no_mincss = False + try: + data_attrib = style.attrib['data-mincss'].lower() + if data_attrib == 'ignore': + continue + elif data_attrib == 'no': + no_mincss = True + + except KeyError: + # happens if the attribute key isn't there + pass + + for i, line in enumerate(lines, start=1): if line.count(first_line): - key = (i + 1, url) + key = (i, INLINE, url, no_mincss) self.blocks[key] = style.text break + i = 0 for link in CSSSelector('link')(page): if ( link.attrib.get('rel', '') == 'stylesheet' or link.attrib['href'].lower().split('?')[0].endswith('.css') ): + no_mincss = False + try: + data_attrib = link.attrib['data-mincss'].lower() + if data_attrib == 'ignore': + continue + if data_attrib == 'no': + no_mincss = True + except KeyError: + # happens if the attribute key isn't there + pass + link_url = self.make_absolute_url(url, link.attrib['href']) - key = (link_url, link.attrib['href']) + key = (i, LINK, link_url, link.attrib['href'], no_mincss) + i += 1 self.blocks[key] = self.download(link_url) if self.preserve_remote_urls: self.blocks[key] = self._rewrite_urls( @@ -337,6 +369,7 @@ def commentmatcher(match): ) for temp_key, old, __ in inner_improvements: + assert old in content content = content.replace(old, temp_key) _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M) @@ -396,6 +429,7 @@ def matcher(match): fixed = _regex.sub(matcher, content) for temp_key, __, improved in inner_improvements: + assert temp_key in fixed fixed = fixed.replace(temp_key, improved) for temp_key, whole in comments: # note, `temp_key` might not be in the `fixed` thing because the diff --git a/tests/ignore-inline.html b/tests/ignore-inline.html new file mode 100644 index 0000000..f3a4bf4 --- /dev/null +++ b/tests/ignore-inline.html @@ -0,0 +1,19 @@ + + + + + test page + + + +

h1

+

h2

+

h3

+ + diff --git a/tests/ignore-link.html b/tests/ignore-link.html new file mode 100644 index 0000000..71d979e --- /dev/null +++ b/tests/ignore-link.html @@ -0,0 +1,13 @@ + + + + + test page + + + +

h1

+

h2

+

h3

+ + diff --git a/tests/no-mincss-inline.html b/tests/no-mincss-inline.html new file mode 100644 index 0000000..e3fc6e1 --- /dev/null +++ b/tests/no-mincss-inline.html @@ -0,0 +1,19 @@ + + + + + test page + + + +

h1

+

h2

+

h3

+ + diff --git a/tests/no-mincss-link.html b/tests/no-mincss-link.html new file mode 100644 index 0000000..bede122 --- /dev/null +++ b/tests/no-mincss-link.html @@ -0,0 +1,13 @@ + + + + + test page + + + +

h1

+

h2

+

h3

+ + diff --git a/tests/test_mincss.py b/tests/test_mincss.py index 2bcac99..b5f5858 100644 --- a/tests/test_mincss.py +++ b/tests/test_mincss.py @@ -42,6 +42,20 @@ def test_just_inline(self): for i, line in enumerate(expect.strip().splitlines()): eq_(line.strip(), lines_after[i].strip()) + def test_ignore_inline(self): + html = os.path.join(HERE, 'ignore-inline.html') + url = 'file://' + html + p = Processor() + p.process(url) + assert not p.inlines + + def test_no_mincss_inline(self): + html = os.path.join(HERE, 'no-mincss-inline.html') + url = 'file://' + html + p = Processor() + p.process(url) + eq_(p.inlines[0].before, p.inlines[0].after) + def test_html_with_empty_style_tag(self): html = os.path.join(HERE, 'one-2.html') url = 'file://' + html @@ -76,6 +90,29 @@ def test_just_one_link(self): for i, line in enumerate(expect.strip().splitlines()): eq_(line.strip(), lines_after[i].strip()) + def test_no_mincss_link(self): + html = os.path.join(HERE, 'no-mincss-link.html') + url = 'file://' + html + p = Processor() + p.process(url) + link = p.links[0] + eq_(link.before, link.after) + + def test_ignore_link(self): + html = os.path.join(HERE, 'ignore-link.html') + url = 'file://' + html + p = Processor() + p.process(url) + assert not p.links + + def test_respect_link_order(self): + html = os.path.join(HERE, 'three-links.html') + url = 'file://' + html + p = Processor() + p.process(url) + hrefs = [x.href for x in p.links] + eq_(hrefs, ['two.css', 'three.css']) + def test_one_link_two_different_pages(self): html = os.path.join(HERE, 'two.html') url1 = 'file://' + html diff --git a/tests/three-links.html b/tests/three-links.html new file mode 100644 index 0000000..ad16b5b --- /dev/null +++ b/tests/three-links.html @@ -0,0 +1,20 @@ + + + + + test page + + + + + +
+ one + one + three +
+ + + + From 186d055f11b5891e03d3f00d57e8d1bd6b67d006 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Sun, 20 Sep 2015 21:13:46 -0700 Subject: [PATCH 2/2] test against a simpler site --- .travis.yml | 2 +- mincss/__main__.py | 2 +- mincss/processor.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1acd4be..4f72128 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,7 +37,7 @@ install: script: - nosetests -- mincss https://travis-ci.org +- mincss https://news.ycombinator.com deploy: provider: pypi diff --git a/mincss/__main__.py b/mincss/__main__.py index 16f3e37..7a05f7d 100755 --- a/mincss/__main__.py +++ b/mincss/__main__.py @@ -2,7 +2,7 @@ import sys -from . import main +from mincss import main if __name__ == '__main__': diff --git a/mincss/processor.py b/mincss/processor.py index 3bfa4e2..1c7cb11 100644 --- a/mincss/processor.py +++ b/mincss/processor.py @@ -369,7 +369,7 @@ def commentmatcher(match): ) for temp_key, old, __ in inner_improvements: - assert old in content + assert old in content, old content = content.replace(old, temp_key) _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M)