Merge pull request #38 from peterbe/ability-to-skip-and-ignore

Peter Bengtsson · Peter Bengtsson · commit f7e778025b4d · 2015-09-20T21:36:17.000-07:00
ability to skip and ignore
diff --git a/.travis.yml b/.travis.yml
@@ -37,7 +37,7 @@ install:
 
 script:
 - nosetests
-- mincss https://travis-ci.org
+- mincss https://news.ycombinator.com
 
 deploy:
   provider: pypi
diff --git a/README.rst b/README.rst
@@ -71,3 +71,26 @@ this for example:
     }
 
 That tells ``mincss`` to ignore the whole block and all its selectors.
+
+Ignore CSS
+----------
+
+By default, ``mincss`` will find all ``<link rel="stylesheet" ...`` and
+``<style...>`` tags and process them. If you have a link or an inline
+tag that you don't want ``mincss`` to even notice, simply add this attribute
+to the tag:
+
+::
+
+    data-mincss="ignore"
+
+Leave CSS as is
+---------------
+
+One technique to have a specific CSS selector be ignored by ``mincss`` is to
+put in a comment like ``/* no mincss */`` inside the CSS selectors
+block.
+
+Another way is to leave the whole stylesheet as is. The advantage of doing
+this is if you have a ``link`` or ``style`` tag that you want ``mincss``
+to ignore but still find and include in the parsed result.
diff --git a/mincss/__main__.py b/mincss/__main__.py
@@ -2,7 +2,7 @@
 
 import sys
 
-from . import main
+from mincss import main
 
 
 if __name__ == '__main__':
diff --git a/mincss/processor.py b/mincss/processor.py
@@ -26,6 +26,9 @@
     unicode = str
 
 
+INLINE = 'inline'
+LINK = 'link'
+
 RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE)
 RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M)
 RE_CLASS_DEF = re.compile('\.([\w-]+)')
@@ -136,22 +139,26 @@ def process(self, *urls):
         for url in urls:
             self.process_url(url)
 
-        for identifier in sorted(self.blocks.keys(), key=lambda x: str(x[0])):
+        for identifier in sorted(self.blocks.keys()):
             content = self.blocks[identifier]
             processed = self._process_content(content, self._bodies)
 
-            if isinstance(identifier[0], int):
-                line, url = identifier
+            if identifier[1] == INLINE:
+                line, _, url, no_mincss = identifier
+                if no_mincss:
+                    processed = content
                 self.inlines.append(
                     InlineResult(
                         line,
                         url,
                         content,
-                        processed
+                        processed,
                     )
                 )
             else:
-                url, href = identifier
+                _, _, url, href, no_mincss = identifier
+                if no_mincss:
+                    processed = content
                 self.links.append(
                     LinkResult(
                         href,
@@ -199,19 +206,44 @@ def process_html(self, html, url):
                 # happend when the style tag has absolute nothing it
                 # not even whitespace
                 continue
-            for i, line in enumerate(lines):
+            no_mincss = False
+            try:
+                data_attrib = style.attrib['data-mincss'].lower()
+                if data_attrib == 'ignore':
+                    continue
+                elif data_attrib == 'no':
+                    no_mincss = True
+
+            except KeyError:
+                # happens if the attribute key isn't there
+                pass
+
+            for i, line in enumerate(lines, start=1):
                 if line.count(first_line):
-                    key = (i + 1, url)
+                    key = (i, INLINE, url, no_mincss)
                     self.blocks[key] = style.text
                     break
 
+        i = 0
         for link in CSSSelector('link')(page):
             if (
                 link.attrib.get('rel', '') == 'stylesheet' or
                 link.attrib['href'].lower().split('?')[0].endswith('.css')
             ):
+                no_mincss = False
+                try:
+                    data_attrib = link.attrib['data-mincss'].lower()
+                    if data_attrib == 'ignore':
+                        continue
+                    if data_attrib == 'no':
+                        no_mincss = True
+                except KeyError:
+                    # happens if the attribute key isn't there
+                    pass
+
                 link_url = self.make_absolute_url(url, link.attrib['href'])
-                key = (link_url, link.attrib['href'])
+                key = (i, LINK, link_url, link.attrib['href'], no_mincss)
+                i += 1
                 self.blocks[key] = self.download(link_url)
                 if self.preserve_remote_urls:
                     self.blocks[key] = self._rewrite_urls(
@@ -337,6 +369,7 @@ def commentmatcher(match):
             )
 
         for temp_key, old, __ in inner_improvements:
+            assert old in content, old
             content = content.replace(old, temp_key)
 
         _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M)
@@ -396,6 +429,7 @@ def matcher(match):
         fixed = _regex.sub(matcher, content)
 
         for temp_key, __, improved in inner_improvements:
+            assert temp_key in fixed
             fixed = fixed.replace(temp_key, improved)
         for temp_key, whole in comments:
             # note, `temp_key` might not be in the `fixed` thing because the
diff --git a/tests/ignore-inline.html b/tests/ignore-inline.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <title>test page</title>
+    <style data-mincss="ignore">
+    h1, h2, h3 { text-align: center; }
+    h3, h4 { font-family: serif; }
+    .foobar { delete:me }
+    .foobar, h4, h2 { color:red }
+    #none, .exists { delete: me-too; }
+    </style>
+  </head>
+  <body>
+    <h1>h1</h1>
+    <h2>h2</h2>
+    <h3>h3</h3>
+  </body>
+</html>
diff --git a/tests/ignore-link.html b/tests/ignore-link.html
@@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <title>test page</title>
+    <link rel="stylesheet" href='two.css' data-mincss='ignore'>
+  </head>
+  <body>
+    <h1>h1</h1>
+    <h2>h2</h2>
+    <h3>h3</h3>
+  </body>
+</html>
diff --git a/tests/no-mincss-inline.html b/tests/no-mincss-inline.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <title>test page</title>
+    <style data-mincss="no">
+    h1, h2, h3 { text-align: center; }
+    h3, h4 { font-family: serif; }
+    .foobar { delete:me }
+    .foobar, h4, h2 { color:red }
+    #none, .exists { delete: me-too; }
+    </style>
+  </head>
+  <body>
+    <h1>h1</h1>
+    <h2>h2</h2>
+    <h3>h3</h3>
+  </body>
+</html>
diff --git a/tests/no-mincss-link.html b/tests/no-mincss-link.html
@@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <title>test page</title>
+    <link rel="stylesheet" href='two.css' data-mincss="no">
+  </head>
+  <body>
+    <h1>h1</h1>
+    <h2>h2</h2>
+    <h3>h3</h3>
+  </body>
+</html>
diff --git a/tests/test_mincss.py b/tests/test_mincss.py
@@ -42,6 +42,20 @@ def test_just_inline(self):
         for i, line in enumerate(expect.strip().splitlines()):
             eq_(line.strip(), lines_after[i].strip())
 
+    def test_ignore_inline(self):
+        html = os.path.join(HERE, 'ignore-inline.html')
+        url = 'file://' + html
+        p = Processor()
+        p.process(url)
+        assert not p.inlines
+
+    def test_no_mincss_inline(self):
+        html = os.path.join(HERE, 'no-mincss-inline.html')
+        url = 'file://' + html
+        p = Processor()
+        p.process(url)
+        eq_(p.inlines[0].before, p.inlines[0].after)
+
     def test_html_with_empty_style_tag(self):
         html = os.path.join(HERE, 'one-2.html')
         url = 'file://' + html
@@ -76,6 +90,29 @@ def test_just_one_link(self):
         for i, line in enumerate(expect.strip().splitlines()):
             eq_(line.strip(), lines_after[i].strip())
 
+    def test_no_mincss_link(self):
+        html = os.path.join(HERE, 'no-mincss-link.html')
+        url = 'file://' + html
+        p = Processor()
+        p.process(url)
+        link = p.links[0]
+        eq_(link.before, link.after)
+
+    def test_ignore_link(self):
+        html = os.path.join(HERE, 'ignore-link.html')
+        url = 'file://' + html
+        p = Processor()
+        p.process(url)
+        assert not p.links
+
+    def test_respect_link_order(self):
+        html = os.path.join(HERE, 'three-links.html')
+        url = 'file://' + html
+        p = Processor()
+        p.process(url)
+        hrefs = [x.href for x in p.links]
+        eq_(hrefs, ['two.css', 'three.css'])
+
     def test_one_link_two_different_pages(self):
         html = os.path.join(HERE, 'two.html')
         url1 = 'file://' + html
diff --git a/tests/three-links.html b/tests/three-links.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <title>test page</title>
+    <!-- alphabetically, "three.css" comes before "two.css"
+    but that shouldn't matter -->
+    <link rel="stylesheet" href='two.css'>
+    <link rel="stylesheet" href='three.css'>
+  </head>
+  <body>
+    <div class="container">
+    <a href="#" class="one">one</a>
+    <a href="#" class="two">one</a>
+    <a href="#" class="three">three</a>
+    </div>
+
+    <input type="search">
+  </body>
+</html>