From 7f23c9a3ed68463b344244eac1d4c4bea1a53fed Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Wed, 3 Jan 2018 10:57:07 -0500 Subject: [PATCH] split on : better, fixes #51 --- mincss/processor.py | 10 +++++++--- tests/complex-selector.html | 9 +++++++++ tests/test_mincss.py | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/mincss/processor.py b/mincss/processor.py index bd8c3e9..05119f3 100644 --- a/mincss/processor.py +++ b/mincss/processor.py @@ -43,6 +43,12 @@ VENDOR_PREFIXED_PSEUDO_CLASSES = re.compile( ':-(webkit|moz)-' ) +# For matching things like "foo:bar" and '"foo:ing":bar' because it's +# not enough to just do a split on ':' since the ':' might be inside +# quotation marks. E.g. 'a[href^="javascript:"]' +PSEUDO_SELECTOR = re.compile( + r':(?=([^"\'\\]*(\\.|["\']([^"\'\\]*\\.)*[^"\'\\]*[\'"]))*[^"\']*$)' +) EXCEPTIONAL_SELECTORS = ( 'html', @@ -504,8 +510,6 @@ def _found(self, bodies, selector): # If the last part of the selector is a tag like # ".foo blockquote" or "sometag" then we can look for it # in plain HTML as a form of optimization - last_part = selector.split()[-1] - # if self._all_tags and '"' not in selector: if not re.findall('[^\w \.]', selector): # It's a trivial selector. Like "tag.myclass", # or ".one.two". Let's look for some cheap wins @@ -525,7 +529,7 @@ def _found(self, bodies, selector): def _simplified_selector(selector): # If the select has something like :active or :hover, # then evaluate it as if it's without that pseudo class - return selector.split(':')[0].strip() + return PSEUDO_SELECTOR.split(selector)[0].strip() def _selector_query_found(self, bodies, selector): if '}' in selector: diff --git a/tests/complex-selector.html b/tests/complex-selector.html index 14f057f..40916d3 100644 --- a/tests/complex-selector.html +++ b/tests/complex-selector.html @@ -6,11 +6,20 @@

h1

actually +
+ Stuff +
diff --git a/tests/test_mincss.py b/tests/test_mincss.py index ee462d3..cd661d6 100644 --- a/tests/test_mincss.py +++ b/tests/test_mincss.py @@ -346,6 +346,8 @@ def test_complex_colons_in_selector_expression(self): after = p.inlines[0].after ok_('a[href^="javascript:"] { color: pink; }' in after) ok_('a[href^="javascript:"]:after { content: "x"; }' in after) + ok_('.ui[class*="4:3"].embed' in after) + ok_('.ui[class*="6:9"].embed' not in after) def test_before_after(self): html = os.path.join(HERE, 'before-after.html')