From a554067c6713dfa7f63ade526686e00c78c3cd4a Mon Sep 17 00:00:00 2001
From: Peter Bengtsson
Date: Tue, 21 Nov 2017 08:31:08 -0500
Subject: [PATCH 1/9] use utf-8 to codec write outputs (#50)
---
run.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/run.py b/run.py
index 93e3682..bb4d10b 100755
--- a/run.py
+++ b/run.py
@@ -41,10 +41,12 @@ def run(args):
#print("AFTER ".ljust(79, '-'))
#print(link.after)
orig_name = link.href.split('/')[-1]
- with codecs.open(os.path.join(output_dir, orig_name), 'w') as f:
+ fn = os.path.join(output_dir, orig_name)
+ with codecs.open(fn, 'w', 'utf-8') as f:
f.write(link.after)
before_name = 'before_' + link.href.split('/')[-1]
- with codecs.open(os.path.join(output_dir, before_name), 'w') as f:
+ fn = os.path.join(output_dir, before_name)
+ with codecs.open(fn, 'w', 'utf-8') as f:
f.write(link.before)
print("Files written to", output_dir)
print()
From b1b195c7cf808e5e427264200e70285d70ccc915 Mon Sep 17 00:00:00 2001
From: Peter Bengtsson
Date: Wed, 3 Jan 2018 10:59:58 -0500
Subject: [PATCH 2/9] split on : better, fixes #51 (#52)
---
mincss/processor.py | 10 +++++++---
tests/complex-selector.html | 9 +++++++++
tests/test_mincss.py | 2 ++
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/mincss/processor.py b/mincss/processor.py
index bd8c3e9..05119f3 100644
--- a/mincss/processor.py
+++ b/mincss/processor.py
@@ -43,6 +43,12 @@
VENDOR_PREFIXED_PSEUDO_CLASSES = re.compile(
':-(webkit|moz)-'
)
+# For matching things like "foo:bar" and '"foo:ing":bar' because it's
+# not enough to just do a split on ':' since the ':' might be inside
+# quotation marks. E.g. 'a[href^="javascript:"]'
+PSEUDO_SELECTOR = re.compile(
+ r':(?=([^"\'\\]*(\\.|["\']([^"\'\\]*\\.)*[^"\'\\]*[\'"]))*[^"\']*$)'
+)
EXCEPTIONAL_SELECTORS = (
'html',
@@ -504,8 +510,6 @@ def _found(self, bodies, selector):
# If the last part of the selector is a tag like
# ".foo blockquote" or "sometag" then we can look for it
# in plain HTML as a form of optimization
- last_part = selector.split()[-1]
- # if self._all_tags and '"' not in selector:
if not re.findall('[^\w \.]', selector):
# It's a trivial selector. Like "tag.myclass",
# or ".one.two". Let's look for some cheap wins
@@ -525,7 +529,7 @@ def _found(self, bodies, selector):
def _simplified_selector(selector):
# If the select has something like :active or :hover,
# then evaluate it as if it's without that pseudo class
- return selector.split(':')[0].strip()
+ return PSEUDO_SELECTOR.split(selector)[0].strip()
def _selector_query_found(self, bodies, selector):
if '}' in selector:
diff --git a/tests/complex-selector.html b/tests/complex-selector.html
index 14f057f..40916d3 100644
--- a/tests/complex-selector.html
+++ b/tests/complex-selector.html
@@ -6,11 +6,20 @@
h1
actually
+
+ Stuff
+