Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions example/page.css

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion example/page.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
<div id="content">
<p class="foo">Expect this to be blue</p>
</div>

<div class="ui form">
nothing here
<div class="fields"></div>
</div>
<script>
onload = function() {
var parent = document.getElementById('content');
Expand Down
10 changes: 10 additions & 0 deletions example/peterbe-nomincss.css

Large diffs are not rendered by default.

172 changes: 172 additions & 0 deletions example/peterbe.css

Large diffs are not rendered by default.

367 changes: 367 additions & 0 deletions example/peterbe.html

Large diffs are not rendered by default.

38 changes: 0 additions & 38 deletions example/run_mincss

This file was deleted.

75 changes: 75 additions & 0 deletions example/run_mincss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python
from __future__ import print_function
import sys, os
import time
sys.path.insert(0, os.path.abspath('.'))
from mincss.processor import Processor

DEFAULT_URL = 'http://localhost:9000/page.html'

def run(url):
p = Processor()
t0 = time.time()
p.process(url)
t1 = time.time()

print("INLINES ".ljust(79, '-'))
total_size_before = 0
total_size_after = 0
# for each in p.inlines:
# print("On line %s" % each.line)
# print('- ' * 40)
# print("BEFORE")
# print(each.before)
# total_size_before += len(each.before)
# print('- ' * 40)
# print("AFTER:")
# print(each.after)
# total_size_after += len(each.after)
# print("\n")
#
# print("LINKS ".ljust(79, '-'))
# for each in p.links:
# print("On href %s" % each.href)
# print('- ' * 40)
# print("BEFORE")
# print(each.before)
# total_size_before += len(each.before)
# print('- ' * 40)
# print("AFTER:")
# print(each.after)
# print("\n")
#
# print("LINKS ".ljust(79, '-'))
# for each in p.links:
# print("On href %s" % each.href)
# print('- ' * 40)
# print("BEFORE")
# print(each.before)
# total_size_before += len(each.before)
# print('- ' * 40)
# print("AFTER:")
# print(each.after)
# total_size_after += len(each.after)
# print("\n")

print(
"TOOK:".ljust(20),
"%.5fs" % (t1 - t0)
)
print(
"TOTAL SIZE BEFORE:".ljust(20),
"%.1fKb" % (total_size_before / 1024.0)
)
print(
"TOTAL SIZE AFTER:".ljust(20),
"%.1fKb" % (total_size_after / 1024.0)
)


if __name__ == '__main__':
urls = sys.argv[1:]
if not urls:
urls = [DEFAULT_URL]
for url in urls:
run(url)
3 changes: 0 additions & 3 deletions example/run_server

This file was deleted.

81 changes: 58 additions & 23 deletions mincss/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE)
RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M)
RE_CLASS_DEF = re.compile('\.([\w-]+)')
RE_SELECTOR_TAGS = re.compile('(^|\s)(\w+)')
RE_ID_DEF = re.compile('#([\w-]+)')
MOUSE_PSEUDO_CLASSES = re.compile(
':(link|hover|active|focus|visited)$', re.M | re.I
Expand Down Expand Up @@ -88,6 +89,7 @@ def __init__(self,
self.optimize_lookup = optimize_lookup
self._all_ids = set()
self._all_classes = set()
self._all_tags = set()
self.phantomjs = phantomjs
self.phantomjs_options = phantomjs_options
self._downloaded = {}
Expand Down Expand Up @@ -182,6 +184,21 @@ def process_url(self, url):
html = self.download(url)
self.process_html(html.strip(), url=url)

def _find_all_ids_classes_and_tags(self, element):
for each in element:
identifier = each.attrib.get('id')
if identifier:
self._all_ids.add(identifier)
classes = each.attrib.get('class')
if classes:
for class_ in classes.split():
self._all_classes.add(class_)

self._all_tags.add(each.tag)

# recurse!
self._find_all_ids_classes_and_tags(each)

def process_html(self, html, url):
parser = etree.HTMLParser(encoding='utf-8')
tree = etree.fromstring(html.encode('utf-8'), parser).getroottree()
Expand All @@ -195,14 +212,8 @@ def process_html(self, html, url):
body, = CSSSelector('body')(page)
self._bodies.append(body)
if self.optimize_lookup:
for each in body.iter():
identifier = each.attrib.get('id')
if identifier:
self._all_ids.add(identifier)
classes = each.attrib.get('class')
if classes:
for class_ in classes.split():
self._all_classes.add(class_)
self._all_tags.add('body')
self._find_all_ids_classes_and_tags(body)

for style in CSSSelector('style')(page):
try:
Expand Down Expand Up @@ -404,18 +415,25 @@ def matcher(match):
s = selector.strip()
if s in EXCEPTIONAL_SELECTORS:
continue
simplified = self._simplified_selector(s)

if s in _already_found:
if simplified.endswith('>'):
# Things like "foo.bar > :first-child" is valid,
# but once simplified you're left with
# "foo.bar >" which can never be found because
# it's an invalid selector. Best to avoid.
found = True
elif simplified in _already_found:
found = True
elif s in _already_tried:
elif simplified in _already_tried:
found = False
else:
found = self._found(bodies, s)
found = self._found(bodies, simplified)

if found:
_already_found.add(s)
_already_found.add(simplified)
else:
_already_tried.add(s)
_already_tried.add(simplified)
perfect = False
improved = re.sub(
'%s,?\s*' % re.escape(s),
Expand Down Expand Up @@ -481,24 +499,41 @@ def _found(self, bodies, selector):
# don't bother then
return False

r = self._selector_query_found(bodies, selector)
return r
# If the last part of the selector is a tag like
# ".foo blockquote" or "sometag" then we can look for it
# in plain HTML as a form of optimization
last_part = selector.split()[-1]
# if self._all_tags and '"' not in selector:
if not re.findall('[^\w \.]', selector):
# It's a trivial selector. Like "tag.myclass",
# or ".one.two". Let's look for some cheap wins
if self._all_tags:
# If the selector is quite simple, we can fish out
# all tags mentioned in it and do a quick lookup using
# simply the tag name.
for prefix, tag in RE_SELECTOR_TAGS.findall(selector):
if tag not in self._all_tags:
# If the tag doesn't even exist in the HTML,
# don't bother.
return False

return self._selector_query_found(bodies, selector)

def _selector_query_found(self, bodies, selector):
@staticmethod
def _simplified_selector(selector):
# If the select has something like :active or :hover,
# then evaluate it as if it's without that pseudo class
if (
MOUSE_PSEUDO_CLASSES.findall(selector) or
'::' in selector or
BEFOREAFTER_PSEUDO_CLASSES.findall(selector) or
VENDOR_PREFIXED_PSEUDO_CLASSES.findall(selector)
):
selector = selector.split(':')[0].strip()
return selector.split(':')[0].strip()

def _selector_query_found(self, bodies, selector):
if '}' in selector:
# XXX does this ever happen any more?
return

if selector.endswith('>'):
# It's
return False

for body in bodies:
try:
for _ in CSSSelector(selector)(body):
Expand Down
5 changes: 5 additions & 0 deletions tests/before-after.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,10 @@
<li>First</li>
<li>Second</li>
</ol>
<blockquote>
<p>
Something
</p>
</blockquote>
</body>
</html>