From 7f23c9a3ed68463b344244eac1d4c4bea1a53fed Mon Sep 17 00:00:00 2001
From: Peter Bengtsson
Date: Wed, 3 Jan 2018 10:57:07 -0500
Subject: [PATCH] split on : better, fixes #51
---
mincss/processor.py | 10 +++++++---
tests/complex-selector.html | 9 +++++++++
tests/test_mincss.py | 2 ++
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/mincss/processor.py b/mincss/processor.py
index bd8c3e9..05119f3 100644
--- a/mincss/processor.py
+++ b/mincss/processor.py
@@ -43,6 +43,12 @@
VENDOR_PREFIXED_PSEUDO_CLASSES = re.compile(
':-(webkit|moz)-'
)
+# For matching things like "foo:bar" and '"foo:ing":bar' because it's
+# not enough to just do a split on ':' since the ':' might be inside
+# quotation marks. E.g. 'a[href^="javascript:"]'
+PSEUDO_SELECTOR = re.compile(
+ r':(?=([^"\'\\]*(\\.|["\']([^"\'\\]*\\.)*[^"\'\\]*[\'"]))*[^"\']*$)'
+)
EXCEPTIONAL_SELECTORS = (
'html',
@@ -504,8 +510,6 @@ def _found(self, bodies, selector):
# If the last part of the selector is a tag like
# ".foo blockquote" or "sometag" then we can look for it
# in plain HTML as a form of optimization
- last_part = selector.split()[-1]
- # if self._all_tags and '"' not in selector:
if not re.findall('[^\w \.]', selector):
# It's a trivial selector. Like "tag.myclass",
# or ".one.two". Let's look for some cheap wins
@@ -525,7 +529,7 @@ def _found(self, bodies, selector):
def _simplified_selector(selector):
# If the select has something like :active or :hover,
# then evaluate it as if it's without that pseudo class
- return selector.split(':')[0].strip()
+ return PSEUDO_SELECTOR.split(selector)[0].strip()
def _selector_query_found(self, bodies, selector):
if '}' in selector:
diff --git a/tests/complex-selector.html b/tests/complex-selector.html
index 14f057f..40916d3 100644
--- a/tests/complex-selector.html
+++ b/tests/complex-selector.html
@@ -6,11 +6,20 @@
h1
actually
+
+ Stuff
+