diff --git a/.gitignore b/.gitignore index 0d5f4b2..c69ca8d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,12 @@ +.*.swo +.*.swp output/ proxy/.cache/ docs/_build/ -mincss.egg-info/ +*.egg +*.egg-info /build/ /dist/ simple.js +*.pyc +__pycache__/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..7264492 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +language: python + +python: + - "2.7" + - "3.3" + - "3.4" + +install: + - python setup.py --quiet install + +script: + - ./setup.py test + - mincss --output=./output https://travis-ci.org diff --git a/MANIFEST.in b/MANIFEST.in index fcf2f66..dd38543 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include LICENSE -include README.md +include README.rst include docs/changelog.rst include requirements.txt include mincss/download.js diff --git a/README.md b/README.rst similarity index 64% rename from README.md rename to README.rst index 556ec6e..96d4487 100644 --- a/README.md +++ b/README.rst @@ -1,17 +1,37 @@ mincss ====== +.. image:: https://travis-ci.org/myint/mincss.png?branch=master + :target: https://travis-ci.org/myint/mincss + :alt: Build status + Clears the junk out of your CSS by finding out which selectors are actually not used in your HTML. -By Peter Bengtsson, 2012-2013 +This is an unofficial fork (of https://pypi.python.org/pypi/mincss) that runs +on both Python 2 and 3. + +Example +------- + +To output to a directory called ``cleaned``:: + + $ mincss --output=./cleaned https://github.com + + +Installation +------------ + +From pip:: + + $ pip install --upgrade mincss3k Why? ---- With the onslaught of Twitter Bootstrap upon the world it's very -tempting to just download their whole fat 80+Kb CSS and serve it up -even though you're not using half of the HTML that it styles. +tempting to just download their whole fat CSS and serve it up even +though you're not using half of the HTML that it styles. There's also the case of websites that have changed over time but without the CSS getting the same amount of love refactoring. Then it's @@ -24,8 +44,8 @@ you're not using. Whitespace compression? ----------------------- -No, that's a separate concern. This tool works independent of -whitespace compression/optimization. +No, that's a separate concern. This tool works independent of whitespace +compression/optimization. For example, if you have a build step or a runtime step that converts all your CSS files into one (concatenation) and trims away all the @@ -42,9 +62,11 @@ because at the moment ``mincss`` is entirely static. So what is a web developer to do? Simple, use ``/* no mincss */`` like this for example: +:: + .logged-in-info { /* no mincss */ - color: pink; + color: pink; } That tells ``mincss`` to ignore the whole block and all its selectors. diff --git a/docs/conf.py b/docs/conf.py index 12798d0..0bfa03f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,7 +3,8 @@ # mincss documentation build configuration file, created by # sphinx-quickstart on Fri Jan 11 14:08:28 2013. # -# This file is execfile()d with the current directory set to its containing dir. +# This file is execfile()d with the current directory set to its containing +# dir. # # Note that not all possible configuration values are present in this # autogenerated file. @@ -11,20 +12,19 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) -# -- General configuration ----------------------------------------------------- +# -- General configuration ----------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [] # Add any paths that contain templates here, relative to this directory. @@ -66,7 +66,8 @@ # directories to ignore when looking for source files. exclude_patterns = ['_build'] -# The reST default role (used for this markup: `text`) to use for all documents. +# The reST default role (used for this markup: `text`) to use for all +# documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. @@ -87,7 +88,7 @@ #modindex_common_prefix = [] -# -- Options for HTML output --------------------------------------------------- +# -- Options for HTML output --------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. @@ -167,24 +168,25 @@ htmlhelp_basename = 'mincssdoc' -# -- Options for LaTeX output -------------------------------------------------- +# -- Options for LaTeX output -------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). +# (source start file, target name, title, author, documentclass +# [howto/manual]). latex_documents = [ - ('index', 'mincss.tex', u'mincss Documentation', - u'Peter Bengtsson', 'manual'), + ('index', 'mincss.tex', u'mincss Documentation', + u'Peter Bengtsson', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -208,7 +210,7 @@ #latex_domain_indices = True -# -- Options for manual page output -------------------------------------------- +# -- Options for manual page output -------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). @@ -221,15 +223,15 @@ #man_show_urls = False -# -- Options for Texinfo output ------------------------------------------------ +# -- Options for Texinfo output ------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'mincss', u'mincss Documentation', - u'Peter Bengtsson', 'mincss', 'One line description of project.', - 'Miscellaneous'), + ('index', 'mincss', u'mincss Documentation', + u'Peter Bengtsson', 'mincss', 'One line description of project.', + 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. diff --git a/mincss/__init__.py b/mincss/__init__.py index ef72cc0..e46aee1 100644 --- a/mincss/__init__.py +++ b/mincss/__init__.py @@ -1 +1 @@ -__version__ = '0.8.1' +__version__ = '0.9' diff --git a/mincss/__main__.py b/mincss/__main__.py new file mode 100755 index 0000000..c1d5339 --- /dev/null +++ b/mincss/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from __future__ import absolute_import + +import sys + +from . import main + + +if __name__ == '__main__': + sys.exit(main.main()) diff --git a/mincss/main.py b/mincss/main.py new file mode 100644 index 0000000..d8bfc3f --- /dev/null +++ b/mincss/main.py @@ -0,0 +1,70 @@ +from __future__ import absolute_import +from __future__ import print_function + +import io +import os +import sys +import time + +from .processor import Processor + + +def run(args): + options = {'debug': args.verbose} + if args.phantomjs_path: + options['phantomjs'] = args.phantomjs_path + elif args.phantomjs: + options['phantomjs'] = True + p = Processor(**options) + t0 = time.time() + p.process(args.url) + t1 = time.time() + print('TOTAL TIME ', t1 - t0, file=sys.stderr) + for inline in p.inlines: + print('ON', inline.url, file=sys.stderr) + print('AT line', inline.line, file=sys.stderr) + print('BEFORE '.ljust(79, '-'), file=sys.stderr) + print(inline.before, file=sys.stderr) + print('AFTER '.ljust(79, '-'), file=sys.stderr) + print(inline.after, file=sys.stderr) + print(file=sys.stderr) + + if not os.path.isdir(args.output): + os.mkdir(args.output) + for link in p.links: + print('FOR', link.href) + orig_name = link.href.split('/')[-1] + with io.open(os.path.join(args.output, orig_name), 'w') as f: + f.write(link.after) + before_name = 'before_' + link.href.split('/')[-1] + with io.open(os.path.join(args.output, before_name), 'w') as f: + f.write(link.before) + print('Files written to\n', args.output, file=sys.stderr) + print( + '(from %d to %d saves %d)' % + (len(link.before), len(link.after), + len(link.before) - len(link.after)), + file=sys.stderr + ) + + return 0 + + +def main(): + import argparse + parser = argparse.ArgumentParser() + add = parser.add_argument + add('url', type=str, + help='URL to process') + add('-o', '--output', action='store', required=True, + help='directory where to put output') + add('-v', '--verbose', action='store_true', + help='increase output verbosity') + add('--phantomjs', action='store_true', + help='Use PhantomJS to download the source') + add('--phantomjs-path', action='store', + default='', + help='Where is the phantomjs executable') + + args = parser.parse_args() + return run(args) diff --git a/mincss/processor.py b/mincss/processor.py index 8645e0d..aba6e59 100644 --- a/mincss/processor.py +++ b/mincss/processor.py @@ -1,20 +1,37 @@ +from __future__ import absolute_import +from __future__ import print_function + +import collections +import contextlib +import functools import os import sys -import functools import random import re -import urlparse import time import subprocess + +try: + from urllib.parse import urljoin + from urllib.request import urlopen +except ImportError: + from urlparse import urljoin + from urllib import urlopen + from lxml import etree from lxml.cssselect import CSSSelector, SelectorSyntaxError, ExpressionError -import urllib -RE_FIND_MEDIA = re.compile("(@media.+?)(\{)", re.DOTALL | re.MULTILINE) -RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M) -RE_CLASS_DEF = re.compile('\.([\w-]+)') -RE_ID_DEF = re.compile('#([\w-]+)') +try: + unicode +except NameError: + unicode = str + + +RE_FIND_MEDIA = re.compile(r'(@media.+?)(\{)', re.DOTALL | re.MULTILINE) +RE_NESTS = re.compile(r'@(-|keyframes).*?({)', re.DOTALL | re.M) +RE_CLASS_DEF = re.compile(r'\.([\w-]+)') +RE_ID_DEF = re.compile(r'#([\w-]+)') EXCEPTIONAL_SELECTORS = ( @@ -29,13 +46,13 @@ class ParserError(Exception): - """happens when we fail to parse the HTML""" - pass + + """happens when we fail to parse the HTML.""" class DownloadError(Exception): - """happens when we fail to down the URL""" - pass + + """happens when we fail to down the URL.""" def _get_random_string(): @@ -55,8 +72,8 @@ def __init__(self, optimize_lookup=True): self.debug = debug self.preserve_remote_urls = preserve_remote_urls - self.tab = ' ' * 4 - self.blocks = {} + self.inline_blocks = collections.OrderedDict() + self.link_blocks = collections.OrderedDict() self.inlines = [] self.links = [] self._bodies = [] @@ -68,14 +85,15 @@ def __init__(self, def _download(self, url): try: - response = urllib.urlopen(url) - if response.getcode() is not None: - if response.getcode() != 200: - raise DownloadError( - '%s -- %s ' % (url, response.getcode()) - ) - content = response.read() - return unicode(content, 'utf-8') + with contextlib.closing(urlopen(url)) as response: + if response.getcode() is not None: + if response.getcode() != 200: + raise DownloadError( + '%s -- %s ' % (url, response.getcode()) + ) + content = response.read() + return unicode(content, + get_charset(response)) except IOError: raise IOError(url) @@ -101,15 +119,15 @@ def _download_with_phantomjs(self, url): t0 = time.time() process = subprocess.Popen( - ' '.join(command), - shell=True, + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - out, err = process.communicate() + out = process.communicate()[0] t1 = time.time() if self.debug: - print "Took", t1 - t0, "seconds to download with PhantomJS" + print('Took', t1 - t0, 'seconds to download with PhantomJS', + file=sys.stderr) return unicode(out, 'utf-8') @@ -117,31 +135,30 @@ def process(self, *urls): for url in urls: self.process_url(url) - for identifier in sorted(self.blocks.keys()): - content = self.blocks[identifier] + for (identifier, content) in self.inline_blocks.items(): processed = self._process_content(content, self._bodies) - if isinstance(identifier[0], int): - line, url = identifier - # inline - self.inlines.append( - InlineResult( - line, - url, - content, - processed - ) + (line, url) = identifier + self.inlines.append( + InlineResult( + line, + url, + content, + processed ) - else: - url, href = identifier - self.links.append( - LinkResult( - href, - #url, - content, - processed - ) + ) + + for (identifier, content) in self.link_blocks.items(): + processed = self._process_content(content, self._bodies) + + href = identifier[1] + self.links.append( + LinkResult( + href, + content, + processed ) + ) def process_url(self, url): if self.phantomjs: @@ -151,33 +168,37 @@ def process_url(self, url): self.process_html(html.strip(), url=url) def process_html(self, html, url): - parser = etree.HTMLParser() - tree = etree.fromstring(html, parser).getroottree() + parser = etree.HTMLParser(encoding='utf-8') + tree = etree.fromstring(html.encode('utf-8'), parser).getroottree() page = tree.getroot() if page is None: - print repr(html) - raise ParserError("Could not parse the html") + print(repr(html), file=sys.stderr) + raise ParserError('Could not parse the html') lines = html.splitlines() body, = CSSSelector('body')(page) self._bodies.append(body) if self.optimize_lookup: for each in body.iter(): - id = each.attrib.get('id') - if id: - self._all_ids.add(id) + identifier = each.attrib.get('id') + if identifier: + self._all_ids.add(identifier) classes = each.attrib.get('class') if classes: for class_ in classes.split(): self._all_classes.add(class_) for style in CSSSelector('style')(page): - first_line = style.text.strip().splitlines()[0] + try: + first_line = style.text.strip().splitlines()[0] + except (AttributeError, IndexError): + continue + for i, line in enumerate(lines): if line.count(first_line): key = (i + 1, url) - self.blocks[key] = style.text + self.inline_blocks[key] = style.text break for link in CSSSelector('link')(page): @@ -187,10 +208,10 @@ def process_html(self, html, url): ): link_url = self.make_absolute_url(url, link.attrib['href']) key = (link_url, link.attrib['href']) - self.blocks[key] = self._download(link_url) + self.link_blocks[key] = self._download(link_url) if self.preserve_remote_urls: - self.blocks[key] = self._rewrite_urls( - self.blocks[key], + self.link_blocks[key] = self._rewrite_urls( + self.link_blocks[key], link_url ) @@ -206,8 +227,9 @@ def _rewrite_urls(self, content, link_url): Then rewrite this to become: background: url(http://cdn.example.org/foo.png) + """ - css_url_regex = re.compile('url\(([^\)]+)\)') + css_url_regex = re.compile(r'url\(([^\)]+)\)') def css_url_replacer(match, href=None): filename = match.groups()[0] @@ -223,13 +245,7 @@ def css_url_replacer(match, href=None): # this is a known IE hack in CSS return bail - #if not filename.startswith('/'): - # joined = os.path.join( - # os.path.dirname(href), - # filename - # ) - - new_filename = urlparse.urljoin(href, filename) + new_filename = urljoin(href, filename) return 'url("%s")' % new_filename content = css_url_regex.sub( @@ -265,7 +281,7 @@ def commentmatcher(match): elif nearest_close > -1 and nearest_open > -1: outside = nearest_close > nearest_open else: - raise Exception("can this happen?!") + raise Exception('can this happen?!') if outside: temp_key = '@%scomment{}' % _get_random_string() @@ -289,7 +305,7 @@ def commentmatcher(match): nests = [(m.group(1), m) for m in RE_NESTS.finditer(content)] _nests = [] - for start, m in nests: + for _, m in nests: __, whole = self._get_contents(m, content) _nests.append(whole) # once all nests have been spotted, temporarily replace them @@ -312,7 +328,6 @@ def commentmatcher(match): else: improved = '' temp_key = '@%s{}' % _get_random_string() - #content = content.replace(whole, temp_key) inner_improvements.append( (temp_key, whole, improved) ) @@ -321,7 +336,7 @@ def commentmatcher(match): assert old in content content = content.replace(old, temp_key) - _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M) + _regex = re.compile(r'((.*?){(.*?)})', re.DOTALL | re.M) _already_found = set() _already_tried = set() @@ -359,7 +374,7 @@ def matcher(match): _already_tried.add(s) perfect = False improved = re.sub( - '%s,?\s*' % re.escape(s), + r'%s,?\s*' % re.escape(s), '', improved, count=1 @@ -371,7 +386,7 @@ def matcher(match): if not improved.strip(): return '' else: - improved = re.sub(',\s*$', ' ', improved) + improved = re.sub(r',\s*$', ' ', improved) whole = whole.replace(selectors, improved) return whole @@ -391,12 +406,12 @@ def _get_contents(self, match, original_content): # we are starting the character after the first opening brace open_braces = 1 position = match.end() - content = "" + content = '' while open_braces > 0: c = original_content[position] - if c == "{": + if c == '{': open_braces += 1 - if c == "}": + if c == '}': open_braces -= 1 content += c position += 1 @@ -422,7 +437,6 @@ def _found(self, bodies, selector): # don't bother then return False - #print "SELECTOR", repr(selector) r = self._selector_query_found(bodies, selector) return r @@ -435,22 +449,23 @@ def _selector_query_found(self, bodies, selector): for body in bodies: try: - for each in CSSSelector(selector)(body): + for _ in CSSSelector(selector)(body): return True except SelectorSyntaxError: - print >>sys.stderr, "TROUBLEMAKER" - print >>sys.stderr, repr(selector) + print('TROUBLEMAKER', file=sys.stderr) + print(repr(selector), file=sys.stderr) except ExpressionError: - print >>sys.stderr, "EXPRESSIONERROR" - print >>sys.stderr, repr(selector) + print('EXPRESSIONERROR', file=sys.stderr) + print(repr(selector), file=sys.stderr) return False @staticmethod def make_absolute_url(url, href): - return urlparse.urljoin(url, href) + return urljoin(url, href) class _Result(object): + def __init__(self, before, after): self.before = before self.after = after @@ -468,5 +483,19 @@ class LinkResult(_Result): def __init__(self, href, *args): self.href = href - #self.url = url super(LinkResult, self).__init__(*args) + + +def get_charset(response, default='utf-8'): + """Return encoding.""" + try: + # Python 3. + return response.info().get_param('charset', default) + except AttributeError: + # Python 2. + content_type = response.headers['content-type'] + split_on = 'charset=' + if split_on in content_type: + return content_type.split(split_on)[-1] + else: + return default diff --git a/proxy/app.py b/proxy/app.py index 811769a..de50216 100755 --- a/proxy/app.py +++ b/proxy/app.py @@ -1,4 +1,6 @@ #!/usr/bin/env python + +from __future__ import print_function import codecs import datetime import os @@ -6,11 +8,16 @@ import logging import hashlib import re -import urllib -import urlparse import shutil import time +try: + from urllib.parse import urljoin, urlparse + from urllib.request import urlopen +except ImportError: + from urlparse import urljoin, urlparse + from urllib import urlopen + from lxml import etree from lxml.cssselect import CSSSelector @@ -23,6 +30,12 @@ from mincss.processor import Processor +try: + unicode +except NameError: + unicode = str + + CACHE_DIR = os.path.join( os.path.dirname(__file__), '.cache' @@ -35,21 +48,21 @@ ) -@app.route("/cache/") +@app.route('/cache/') def cache(path): source = os.path.join(CACHE_DIR, path) with open(source) as f: response = make_response(f.read()) - response.headers["Content-type"] = "text/css" + response.headers['Content-type'] = 'text/css' return response def download(url): - html = urllib.urlopen(url).read() + html = urlopen(url).read() return unicode(html, 'utf-8') -@app.route("/") +@app.route('/') def proxy(path): if path == 'favicon.ico': abort(404) @@ -57,14 +70,14 @@ def proxy(path): if not path.count('://'): url = 'http://' + url - query = urlparse.urlparse(request.url).query + query = urlparse(request.url).query if query: url += '?%s' % query logging.info('Downloading %s' % url) t0 = time.time() html = download(url) t1 = time.time() - print "%.4f seconds to download" % (t1 - t0) + print('%.4f seconds to download' % (t1 - t0)) p = Processor(debug=False, optimize_lookup=True) # since we've already download the HTML @@ -73,7 +86,7 @@ def proxy(path): t1 = time.time() p.process() t2 = time.time() - print "%.4f seconds to parse and process" % (t2 - t1) + print('%.4f seconds to parse and process' % (t2 - t1)) collect_stats = request.args.get('MINCSS_STATS', False) stats = [] @@ -94,15 +107,7 @@ def css_url_replacer(match, href=None): # this is a known IE hack in CSS return bail - #if not filename.startswith('/'): - # filename = os.path.normpath( - # os.path.join( - # os.path.dirname(href), - # filename - # ) - # ) - - new_filename = urlparse.urljoin(url, filename) + new_filename = urljoin(url, filename) return 'url("%s")' % new_filename for i, each in enumerate(p.inlines): @@ -125,11 +130,9 @@ def css_url_replacer(match, href=None): # lxml inserts a doctype if none exists, so only include it in # the root if it was in the original html. was_doctype = tree.docinfo.doctype - #root = tree if stripped.startswith(tree.docinfo.doctype) else page links = dict((x.href, x) for x in p.links) - #all_lines = html.splitlines() for link in CSSSelector('link')(page): if ( link.attrib.get('rel', '') == 'stylesheet' or @@ -168,7 +171,7 @@ def css_url_replacer(match, href=None): for img in CSSSelector('img, script')(page): if 'src' in img.attrib: - orig_src = urlparse.urljoin(url, img.attrib['src']) + orig_src = urljoin(url, img.attrib['src']) img.attrib['src'] = orig_src for a in CSSSelector('a')(page): @@ -186,10 +189,9 @@ def css_url_replacer(match, href=None): if href.startswith('/'): a.attrib['href'] = ( '/' + - urlparse.urljoin(url, a.attrib['href']) + urljoin(url, a.attrib['href']) .replace('http://', '') ) - #else: if collect_stats: a.attrib['href'] = add_collect_stats_qs( a.attrib['href'], @@ -210,9 +212,10 @@ def css_url_replacer(match, href=None): def add_collect_stats_qs(url, value): - """ - if :url is `page.html?foo=bar` - return `page.html?foo=bar&MINCSS_STATS=:value` + """if :url is `page.html?foo=bar` return. + + `page.html?foo=bar&MINCSS_STATS=:value` + """ if '?' in url: url += '&' @@ -283,21 +286,23 @@ def summorize_stats_html(stats): def sizeof(num): for x in ['bytes', 'KB', 'MB', 'GB']: if num < 1024.0 and num > -1024.0: - return "%3.1f%s" % (num, x) + return '%3.1f%s' % (num, x) num /= 1024.0 - return "%3.1f%s" % (num, 'TB') + return '%3.1f%s' % (num, 'TB') def mkdir(newdir): """works the way a good mkdir should :) + - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well + """ if os.path.isdir(newdir): return if os.path.isfile(newdir): - raise OSError("a file with the same name as the desired " + raise OSError('a file with the same name as the desired ' "dir, '%s', already exists." % newdir) head, tail = os.path.split(newdir) if head and not os.path.isdir(head): @@ -317,7 +322,7 @@ def _find_link(line, href): return each -if __name__ == "__main__": +if __name__ == '__main__': app.run(debug=True) try: shutil.rmtree(CACHE_DIR) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e87585d..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -lxml -cssselect diff --git a/run.py b/run.py deleted file mode 100755 index 66655f7..0000000 --- a/run.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import time - -# make sure it's running the mincss here and not anything installed -sys.path.insert(0, os.path.dirname(__file__)) -from mincss.processor import Processor - - -def run(args): - options = {'debug': args.verbose} - if args.phantomjs_path: - options['phantomjs'] = args.phantomjs_path - elif args.phantomjs: - options['phantomjs'] = True - p = Processor(**options) - t0 = time.time() - p.process(args.url) - t1 = time.time() - print "TOTAL TIME ", t1 - t0 - for inline in p.inlines: - print "ON", inline.url - print "AT line", inline.line - print "BEFORE ".ljust(79, '-') - print inline.before - print "AFTER ".ljust(79, '-') - print inline.after - print - - output_dir = args.outputdir - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - for link in p.links: - print "FOR", link.href - #print "BEFORE ".ljust(79, '-') - #print link.before - #print "AFTER ".ljust(79, '-') - #print link.after - orig_name = link.href.split('/')[-1] - with open(os.path.join(output_dir, orig_name), 'w') as f: - f.write(link.after) - before_name = 'before_' + link.href.split('/')[-1] - with open(os.path.join(output_dir, before_name), 'w') as f: - f.write(link.before.encode('utf-8')) - print "Files written to", output_dir - print - print ( - '(from %d to %d saves %d)' % - (len(link.before), len(link.after), - len(link.before) - len(link.after)) - ) - - return 0 - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser() - add = parser.add_argument - add("url", type=str, - help="URL to process") - add("--outputdir", action="store", - default="./output", - help="directory where to put output (default ./output)") - add("-v", "--verbose", action="store_true", - help="increase output verbosity") - add("--phantomjs", action="store_true", - help="Use PhantomJS to download the source") - add("--phantomjs-path", action="store", - default="", - help="Where is the phantomjs executable") - - args = parser.parse_args() - sys.exit(run(args)) diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index d0e729f..f0459b3 --- a/setup.py +++ b/setup.py @@ -1,61 +1,42 @@ -import codecs -import os -import re - - -# Prevent spurious errors during `python setup.py test`, a la -# http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html: -try: - import multiprocessing -except ImportError: - pass - -from setuptools import setup, find_packages - - -def read(*parts): - return codecs.open(os.path.join(os.path.dirname(__file__), *parts)).read() - - -def find_version(*file_paths): - version_file = read(*file_paths) - version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", - version_file, re.M) - if version_match: - return version_match.group(1) - raise RuntimeError("Unable to find version string.") - -def find_install_requires(): - return [x.strip() for x in - read('requirements.txt').splitlines() - if x.strip() and not x.startswith('#')] - - -setup( - name='mincss', - version=find_version('mincss/__init__.py'), - description='clears the junk out of your CSS', - long_description=read('README.md') + '\n\n' + - '\n'.join(read('docs', 'changelog.rst') - .splitlines()[1:]), - author='Peter Bengtsson', - author_email='mail@peterbe.com', - license='BSD', - packages=find_packages(), - include_package_data=True, - zip_safe=False, - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - ], - install_requires=find_install_requires(), - tests_require=['nose'], - test_suite='tests', - url='http://github.com/peterbe/mincss' -) +#!/usr/bin/env python + +import ast + +import setuptools + + +def version(): + """Return version string.""" + with open('mincss/__init__.py') as input_file: + for line in input_file: + if line.startswith('__version__'): + return ast.parse(line).body[0].value.s + + +with open('README.rst') as readme: + setuptools.setup( + name='mincss3k', + version=version(), + description='Clears the junk out of your CSS.', + long_description=readme.read(), + license='BSD', + packages=setuptools.find_packages(), + include_package_data=True, + zip_safe=False, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + ], + install_requires=['lxml', 'cssselect'], + entry_points={'console_scripts': ['mincss=mincss.main:main']}, + test_suite='tests.test_mincss', + url='https://github.com/myint/mincss' + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fake_phantomjs b/tests/fake_phantomjs index 3916d2f..202d293 100755 --- a/tests/fake_phantomjs +++ b/tests/fake_phantomjs @@ -1,11 +1,18 @@ #!/usr/bin/env python -import urllib + +import sys + +try: + from urllib.request import urlopen +except ImportError: + from urllib import urlopen + def run(url): - print urllib.urlopen(url).read() + print(urlopen(url).read().decode('utf-8')) return 0 + if __name__ == '__main__': - import sys url = sys.argv[-1] sys.exit(run(url)) diff --git a/tests/test_mincss.py b/tests/test_mincss.py old mode 100644 new mode 100755 index e53d995..642931a --- a/tests/test_mincss.py +++ b/tests/test_mincss.py @@ -1,6 +1,9 @@ +#!/usr/bin/env python + +from __future__ import absolute_import + import os import unittest -from nose.tools import eq_, ok_ # make sure it's running the mincss here and not anything installed import sys @@ -8,12 +11,18 @@ from mincss.processor import Processor -HERE = os.path.dirname(__file__) +try: + unicode +except NameError: + unicode = str + -PHANTOMJS = os.path.join(HERE, 'fake_phantomjs') +HERE = os.path.realpath(os.path.dirname(__file__)) +PHANTOMJS = os.path.realpath(os.path.join(HERE, 'fake_phantomjs')) -class TestMinCSS(unittest.TestCase): + +class Tests(unittest.TestCase): def test_just_inline(self): html = os.path.join(HERE, 'one.html') @@ -24,8 +33,8 @@ def test_just_inline(self): # one.html only has 1 block on inline CSS inline = p.inlines[0] lines_after = inline.after.strip().splitlines() - eq_(inline.line, 7) - ok_(len(inline.after) < len(inline.before)) + self.assertEqual(inline.line, 7) + self.assertTrue(len(inline.after) < len(inline.before)) # compare line by line expect = ''' @@ -34,7 +43,7 @@ def test_just_inline(self): h2 { color:red } ''' for i, line in enumerate(expect.strip().splitlines()): - eq_(line.strip(), lines_after[i].strip()) + self.assertEqual(line.strip(), lines_after[i].strip()) def test_just_one_link(self): html = os.path.join(HERE, 'two.html') @@ -43,9 +52,8 @@ def test_just_one_link(self): p.process(url) # two.html only has 1 link CSS ref link = p.links[0] - eq_(link.href, 'two.css') - #eq_(link.url, url.replace('.html', '.css')) - ok_(len(link.after) < len(link.before)) + self.assertEqual(link.href, 'two.css') + self.assertTrue(len(link.after) < len(link.before)) lines_after = link.after.splitlines() # compare line by line expect = ''' @@ -55,7 +63,7 @@ def test_just_one_link(self): h2 { color:red } ''' for i, line in enumerate(expect.strip().splitlines()): - eq_(line.strip(), lines_after[i].strip()) + self.assertEqual(line.strip(), lines_after[i].strip()) def test_one_link_two_different_pages(self): html = os.path.join(HERE, 'two.html') @@ -66,9 +74,8 @@ def test_one_link_two_different_pages(self): p.process(url1, url2) # two.html only has 1 link CSS ref link = p.links[0] - eq_(link.href, 'two.css') - #eq_(link.url, url1.replace('.html', '.css')) - ok_(len(link.after) < len(link.before)) + self.assertEqual(link.href, 'two.css') + self.assertTrue(len(link.after) < len(link.before)) lines_after = link.after.splitlines() # compare line by line expect = ''' @@ -79,7 +86,7 @@ def test_one_link_two_different_pages(self): .foobar, h2 { color:red } ''' for i, line in enumerate(expect.strip().splitlines()): - eq_(line.strip(), lines_after[i].strip()) + self.assertEqual(line.strip(), lines_after[i].strip()) def test_pseudo_selectors_hell(self): html = os.path.join(HERE, 'three.html') @@ -89,31 +96,30 @@ def test_pseudo_selectors_hell(self): # two.html only has 1 link CSS ref link = p.links[0] after = link.after - ok_('a.three:hover' in after) - ok_('a.hundred:link' not in after) + self.assertTrue('a.three:hover' in after) + self.assertTrue('a.hundred:link' not in after) - ok_('.container > a.one' in after) - ok_('.container > a.notused' not in after) - ok_('input[type="button"]' not in after) + self.assertTrue('.container > a.one' in after) + self.assertTrue('.container > a.notused' not in after) + self.assertTrue('input[type="button"]' not in after) - ok_('input[type="search"]::-webkit-search-decoration' in after) - ok_('input[type="reset"]::-webkit-search-decoration' not in after) + self.assertTrue('input[type="search"]::-webkit-search-decoration' in after) + self.assertTrue('input[type="reset"]::-webkit-search-decoration' not in after) - ok_('@media (max-width: 900px)' in after) - ok_('.container .two' in after) - ok_('a.four' not in after) + self.assertTrue('@media (max-width: 900px)' in after) + self.assertTrue('.container .two' in after) + self.assertTrue('a.four' not in after) - ok_('::-webkit-input-placeholder' in after) - ok_(':-moz-placeholder {' in after) - ok_('div::-moz-focus-inner' in after) - ok_('button::-moz-focus-inner' not in after) + self.assertTrue('::-webkit-input-placeholder' in after) + self.assertTrue(':-moz-placeholder {' in after) + self.assertTrue('div::-moz-focus-inner' in after) + self.assertTrue('button::-moz-focus-inner' not in after) - ok_('@-webkit-keyframes progress-bar-stripes' in after) - ok_('from {' in after) - #print after + self.assertTrue('@-webkit-keyframes progress-bar-stripes' in after) + self.assertTrue('from {' in after) # some day perhaps this can be untangled and parsed too - ok_('@import url(other.css)' in after) + self.assertTrue('@import url(other.css)' in after) def test_media_query_simple(self): html = os.path.join(HERE, 'four.html') @@ -123,12 +129,11 @@ def test_media_query_simple(self): link = p.links[0] after = link.after - #print repr(after) - ok_('/* A comment */' in after, after) - ok_('@media (max-width: 900px) {' in after, after) - ok_('.container .two {' in after, after) - ok_('.container .nine {' not in after, after) - ok_('a.four' not in after, after) + self.assertTrue('/* A comment */' in after, after) + self.assertTrue('@media (max-width: 900px) {' in after, after) + self.assertTrue('.container .two {' in after, after) + self.assertTrue('.container .nine {' not in after, after) + self.assertTrue('a.four' not in after, after) def test_double_classes(self): html = os.path.join(HERE, 'five.html') @@ -137,11 +142,11 @@ def test_double_classes(self): p.process(url) after = p.links[0].after - eq_(after.count('{'), after.count('}')) - ok_('input.span6' in after) - ok_('.uneditable-input.span9' in after) - ok_('.uneditable-{' not in after) - ok_('.uneditable-input.span3' not in after) + self.assertEqual(after.count('{'), after.count('}')) + self.assertTrue('input.span6' in after) + self.assertTrue('.uneditable-input.span9' in after) + self.assertTrue('.uneditable-{' not in after) + self.assertTrue('.uneditable-input.span3' not in after) def test_complicated_keyframes(self): html = os.path.join(HERE, 'six.html') @@ -150,10 +155,10 @@ def test_complicated_keyframes(self): p.process(url) after = p.inlines[0].after - eq_(after.count('{'), after.count('}')) - ok_('.pull-left' in after) - ok_('.pull-right' in after) - ok_('.pull-middle' not in after) + self.assertEqual(after.count('{'), after.count('}')) + self.assertTrue('.pull-left' in after) + self.assertTrue('.pull-right' in after) + self.assertTrue('.pull-middle' not in after) def test_ignore_annotations(self): html = os.path.join(HERE, 'seven.html') @@ -162,16 +167,16 @@ def test_ignore_annotations(self): p.process(url) after = p.inlines[0].after - eq_(after.count('{'), after.count('}')) - ok_('/* Leave this comment as is */' in after) - ok_('/* Lastly leave this as is */' in after) - ok_('/* Also stick around */' in after) - ok_('/* leave untouched */' in after) - ok_('.north' in after) - ok_('.south' in after) - ok_('.east' not in after) - ok_('.west' in after) - ok_('no mincss' not in after) + self.assertEqual(after.count('{'), after.count('}')) + self.assertTrue('/* Leave this comment as is */' in after) + self.assertTrue('/* Lastly leave this as is */' in after) + self.assertTrue('/* Also stick around */' in after) + self.assertTrue('/* leave untouched */' in after) + self.assertTrue('.north' in after) + self.assertTrue('.south' in after) + self.assertTrue('.east' not in after) + self.assertTrue('.west' in after) + self.assertTrue('no mincss' not in after) def test_non_ascii_html(self): html = os.path.join(HERE, 'eight.html') @@ -180,8 +185,8 @@ def test_non_ascii_html(self): p.process(url) after = p.inlines[0].after - ok_(isinstance(after, unicode)) - ok_(u'Varf\xf6r st\xe5r det h\xe4r?' in after) + self.assertTrue(isinstance(after, unicode)) + self.assertTrue(u'Varf\xf6r st\xe5r det h\xe4r?' in after) def test_preserve_remote_urls(self): html = os.path.join(HERE, 'nine.html') @@ -190,13 +195,13 @@ def test_preserve_remote_urls(self): p.process(url) after = p.links[0].after - ok_("url('http://www.google.com/north.png')" in after) + self.assertTrue("url('http://www.google.com/north.png')" in after) url = 'file://' + HERE + '/deeper/south.png' - ok_('url("%s")' % url in after) + self.assertTrue('url("%s")' % url in after) # since local file URLs don't have a domain, this is actually expected - ok_('url("file:///east.png")' in after) + self.assertTrue('url("file:///east.png")' in after) url = 'file://' + HERE + '/west.png' - ok_('url("%s")' % url in after) + self.assertTrue('url("%s")' % url in after) def test_download_with_phantomjs(self): html = os.path.join(HERE, 'one.html') @@ -210,8 +215,8 @@ def test_download_with_phantomjs(self): # one.html only has 1 block on inline CSS inline = p.inlines[0] lines_after = inline.after.strip().splitlines() - eq_(inline.line, 7) - ok_(len(inline.after) < len(inline.before)) + self.assertEqual(inline.line, 7) + self.assertTrue(len(inline.after) < len(inline.before)) # compare line by line expect = ''' @@ -220,43 +225,47 @@ def test_download_with_phantomjs(self): h2 { color:red } ''' for i, line in enumerate(expect.strip().splitlines()): - eq_(line.strip(), lines_after[i].strip()) + self.assertEqual(line.strip(), lines_after[i].strip()) def test_make_absolute_url(self): p = Processor() - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/', './style.css'), 'http://www.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com', './style.css'), 'http://www.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com', '//cdn.com/style.css'), 'http://cdn.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/', '//cdn.com/style.css'), 'http://cdn.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/', '/style.css'), 'http://www.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/elsewhere', '/style.css'), 'http://www.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/elsewhere/', '/style.css'), 'http://www.com/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/elsewhere/', './style.css'), 'http://www.com/elsewhere/style.css' ) - eq_( + self.assertEqual( p.make_absolute_url('http://www.com/elsewhere', './style.css'), 'http://www.com/style.css' ) + + +if __name__ == '__main__': + unittest.main()