From 32b3b376f9709d7f329bf50fd76481f606bf3f18 Mon Sep 17 00:00:00 2001
From: Peter Bengtsson
Date: Sun, 20 Sep 2015 16:48:06 -0700
Subject: [PATCH 1/2] ability to skip and ignore
---
README.rst | 23 +++++++++++++++++
mincss/processor.py | 50 +++++++++++++++++++++++++++++++------
tests/ignore-inline.html | 19 ++++++++++++++
tests/ignore-link.html | 13 ++++++++++
tests/no-mincss-inline.html | 19 ++++++++++++++
tests/no-mincss-link.html | 13 ++++++++++
tests/test_mincss.py | 37 +++++++++++++++++++++++++++
tests/three-links.html | 20 +++++++++++++++
8 files changed, 186 insertions(+), 8 deletions(-)
create mode 100644 tests/ignore-inline.html
create mode 100644 tests/ignore-link.html
create mode 100644 tests/no-mincss-inline.html
create mode 100644 tests/no-mincss-link.html
create mode 100644 tests/three-links.html
diff --git a/README.rst b/README.rst
index 3476fbc..7f33bf0 100644
--- a/README.rst
+++ b/README.rst
@@ -71,3 +71,26 @@ this for example:
}
That tells ``mincss`` to ignore the whole block and all its selectors.
+
+Ignore CSS
+----------
+
+By default, ``mincss`` will find all ```` tags and process them. If you have a link or an inline
+tag that you don't want ``mincss`` to even notice, simply add this attribute
+to the tag:
+
+::
+
+ data-mincss="ignore"
+
+Leave CSS as is
+---------------
+
+One technique to have a specific CSS selector be ignored by ``mincss`` is to
+put in a comment like ``/* no mincss */`` inside the CSS selectors
+block.
+
+Another way is to leave the whole stylesheet as is. The advantage of doing
+this is if you have a ``link`` or ``style`` tag that you want ``mincss``
+to ignore but still find and include in the parsed result.
diff --git a/mincss/processor.py b/mincss/processor.py
index 7e1e04a..3bfa4e2 100644
--- a/mincss/processor.py
+++ b/mincss/processor.py
@@ -26,6 +26,9 @@
unicode = str
+INLINE = 'inline'
+LINK = 'link'
+
RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE)
RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M)
RE_CLASS_DEF = re.compile('\.([\w-]+)')
@@ -136,22 +139,26 @@ def process(self, *urls):
for url in urls:
self.process_url(url)
- for identifier in sorted(self.blocks.keys(), key=lambda x: str(x[0])):
+ for identifier in sorted(self.blocks.keys()):
content = self.blocks[identifier]
processed = self._process_content(content, self._bodies)
- if isinstance(identifier[0], int):
- line, url = identifier
+ if identifier[1] == INLINE:
+ line, _, url, no_mincss = identifier
+ if no_mincss:
+ processed = content
self.inlines.append(
InlineResult(
line,
url,
content,
- processed
+ processed,
)
)
else:
- url, href = identifier
+ _, _, url, href, no_mincss = identifier
+ if no_mincss:
+ processed = content
self.links.append(
LinkResult(
href,
@@ -199,19 +206,44 @@ def process_html(self, html, url):
# happend when the style tag has absolute nothing it
# not even whitespace
continue
- for i, line in enumerate(lines):
+ no_mincss = False
+ try:
+ data_attrib = style.attrib['data-mincss'].lower()
+ if data_attrib == 'ignore':
+ continue
+ elif data_attrib == 'no':
+ no_mincss = True
+
+ except KeyError:
+ # happens if the attribute key isn't there
+ pass
+
+ for i, line in enumerate(lines, start=1):
if line.count(first_line):
- key = (i + 1, url)
+ key = (i, INLINE, url, no_mincss)
self.blocks[key] = style.text
break
+ i = 0
for link in CSSSelector('link')(page):
if (
link.attrib.get('rel', '') == 'stylesheet' or
link.attrib['href'].lower().split('?')[0].endswith('.css')
):
+ no_mincss = False
+ try:
+ data_attrib = link.attrib['data-mincss'].lower()
+ if data_attrib == 'ignore':
+ continue
+ if data_attrib == 'no':
+ no_mincss = True
+ except KeyError:
+ # happens if the attribute key isn't there
+ pass
+
link_url = self.make_absolute_url(url, link.attrib['href'])
- key = (link_url, link.attrib['href'])
+ key = (i, LINK, link_url, link.attrib['href'], no_mincss)
+ i += 1
self.blocks[key] = self.download(link_url)
if self.preserve_remote_urls:
self.blocks[key] = self._rewrite_urls(
@@ -337,6 +369,7 @@ def commentmatcher(match):
)
for temp_key, old, __ in inner_improvements:
+ assert old in content
content = content.replace(old, temp_key)
_regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M)
@@ -396,6 +429,7 @@ def matcher(match):
fixed = _regex.sub(matcher, content)
for temp_key, __, improved in inner_improvements:
+ assert temp_key in fixed
fixed = fixed.replace(temp_key, improved)
for temp_key, whole in comments:
# note, `temp_key` might not be in the `fixed` thing because the
diff --git a/tests/ignore-inline.html b/tests/ignore-inline.html
new file mode 100644
index 0000000..f3a4bf4
--- /dev/null
+++ b/tests/ignore-inline.html
@@ -0,0 +1,19 @@
+
+
+
+
+ test page
+
+
+
+