Skip to content

Commit f7e7780

Browse files
author
Peter Bengtsson
committed
Merge pull request #38 from peterbe/ability-to-skip-and-ignore
ability to skip and ignore
2 parents 14895dc + 186d055 commit f7e7780

File tree

10 files changed

+188
-10
lines changed

10 files changed

+188
-10
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ install:
3737

3838
script:
3939
- nosetests
40-
- mincss https://travis-ci.org
40+
- mincss https://news.ycombinator.com
4141

4242
deploy:
4343
provider: pypi

README.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,26 @@ this for example:
7171
}
7272

7373
That tells ``mincss`` to ignore the whole block and all its selectors.
74+
75+
Ignore CSS
76+
----------
77+
78+
By default, ``mincss`` will find all ``<link rel="stylesheet" ...`` and
79+
``<style...>`` tags and process them. If you have a link or an inline
80+
tag that you don't want ``mincss`` to even notice, simply add this attribute
81+
to the tag:
82+
83+
::
84+
85+
data-mincss="ignore"
86+
87+
Leave CSS as is
88+
---------------
89+
90+
One technique to have a specific CSS selector be ignored by ``mincss`` is to
91+
put in a comment like ``/* no mincss */`` inside the CSS selectors
92+
block.
93+
94+
Another way is to leave the whole stylesheet as is. The advantage of doing
95+
this is if you have a ``link`` or ``style`` tag that you want ``mincss``
96+
to ignore but still find and include in the parsed result.

mincss/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import sys
44

5-
from . import main
5+
from mincss import main
66

77

88
if __name__ == '__main__':

mincss/processor.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
unicode = str
2727

2828

29+
INLINE = 'inline'
30+
LINK = 'link'
31+
2932
RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE)
3033
RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M)
3134
RE_CLASS_DEF = re.compile('\.([\w-]+)')
@@ -136,22 +139,26 @@ def process(self, *urls):
136139
for url in urls:
137140
self.process_url(url)
138141

139-
for identifier in sorted(self.blocks.keys(), key=lambda x: str(x[0])):
142+
for identifier in sorted(self.blocks.keys()):
140143
content = self.blocks[identifier]
141144
processed = self._process_content(content, self._bodies)
142145

143-
if isinstance(identifier[0], int):
144-
line, url = identifier
146+
if identifier[1] == INLINE:
147+
line, _, url, no_mincss = identifier
148+
if no_mincss:
149+
processed = content
145150
self.inlines.append(
146151
InlineResult(
147152
line,
148153
url,
149154
content,
150-
processed
155+
processed,
151156
)
152157
)
153158
else:
154-
url, href = identifier
159+
_, _, url, href, no_mincss = identifier
160+
if no_mincss:
161+
processed = content
155162
self.links.append(
156163
LinkResult(
157164
href,
@@ -199,19 +206,44 @@ def process_html(self, html, url):
199206
# happend when the style tag has absolute nothing it
200207
# not even whitespace
201208
continue
202-
for i, line in enumerate(lines):
209+
no_mincss = False
210+
try:
211+
data_attrib = style.attrib['data-mincss'].lower()
212+
if data_attrib == 'ignore':
213+
continue
214+
elif data_attrib == 'no':
215+
no_mincss = True
216+
217+
except KeyError:
218+
# happens if the attribute key isn't there
219+
pass
220+
221+
for i, line in enumerate(lines, start=1):
203222
if line.count(first_line):
204-
key = (i + 1, url)
223+
key = (i, INLINE, url, no_mincss)
205224
self.blocks[key] = style.text
206225
break
207226

227+
i = 0
208228
for link in CSSSelector('link')(page):
209229
if (
210230
link.attrib.get('rel', '') == 'stylesheet' or
211231
link.attrib['href'].lower().split('?')[0].endswith('.css')
212232
):
233+
no_mincss = False
234+
try:
235+
data_attrib = link.attrib['data-mincss'].lower()
236+
if data_attrib == 'ignore':
237+
continue
238+
if data_attrib == 'no':
239+
no_mincss = True
240+
except KeyError:
241+
# happens if the attribute key isn't there
242+
pass
243+
213244
link_url = self.make_absolute_url(url, link.attrib['href'])
214-
key = (link_url, link.attrib['href'])
245+
key = (i, LINK, link_url, link.attrib['href'], no_mincss)
246+
i += 1
215247
self.blocks[key] = self.download(link_url)
216248
if self.preserve_remote_urls:
217249
self.blocks[key] = self._rewrite_urls(
@@ -337,6 +369,7 @@ def commentmatcher(match):
337369
)
338370

339371
for temp_key, old, __ in inner_improvements:
372+
assert old in content, old
340373
content = content.replace(old, temp_key)
341374

342375
_regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M)
@@ -396,6 +429,7 @@ def matcher(match):
396429
fixed = _regex.sub(matcher, content)
397430

398431
for temp_key, __, improved in inner_improvements:
432+
assert temp_key in fixed
399433
fixed = fixed.replace(temp_key, improved)
400434
for temp_key, whole in comments:
401435
# note, `temp_key` might not be in the `fixed` thing because the

tests/ignore-inline.html

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset='utf-8'>
5+
<title>test page</title>
6+
<style data-mincss="ignore">
7+
h1, h2, h3 { text-align: center; }
8+
h3, h4 { font-family: serif; }
9+
.foobar { delete:me }
10+
.foobar, h4, h2 { color:red }
11+
#none, .exists { delete: me-too; }
12+
</style>
13+
</head>
14+
<body>
15+
<h1>h1</h1>
16+
<h2>h2</h2>
17+
<h3>h3</h3>
18+
</body>
19+
</html>

tests/ignore-link.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset='utf-8'>
5+
<title>test page</title>
6+
<link rel="stylesheet" href='two.css' data-mincss='ignore'>
7+
</head>
8+
<body>
9+
<h1>h1</h1>
10+
<h2>h2</h2>
11+
<h3>h3</h3>
12+
</body>
13+
</html>

tests/no-mincss-inline.html

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset='utf-8'>
5+
<title>test page</title>
6+
<style data-mincss="no">
7+
h1, h2, h3 { text-align: center; }
8+
h3, h4 { font-family: serif; }
9+
.foobar { delete:me }
10+
.foobar, h4, h2 { color:red }
11+
#none, .exists { delete: me-too; }
12+
</style>
13+
</head>
14+
<body>
15+
<h1>h1</h1>
16+
<h2>h2</h2>
17+
<h3>h3</h3>
18+
</body>
19+
</html>

tests/no-mincss-link.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset='utf-8'>
5+
<title>test page</title>
6+
<link rel="stylesheet" href='two.css' data-mincss="no">
7+
</head>
8+
<body>
9+
<h1>h1</h1>
10+
<h2>h2</h2>
11+
<h3>h3</h3>
12+
</body>
13+
</html>

tests/test_mincss.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,20 @@ def test_just_inline(self):
4242
for i, line in enumerate(expect.strip().splitlines()):
4343
eq_(line.strip(), lines_after[i].strip())
4444

45+
def test_ignore_inline(self):
46+
html = os.path.join(HERE, 'ignore-inline.html')
47+
url = 'file://' + html
48+
p = Processor()
49+
p.process(url)
50+
assert not p.inlines
51+
52+
def test_no_mincss_inline(self):
53+
html = os.path.join(HERE, 'no-mincss-inline.html')
54+
url = 'file://' + html
55+
p = Processor()
56+
p.process(url)
57+
eq_(p.inlines[0].before, p.inlines[0].after)
58+
4559
def test_html_with_empty_style_tag(self):
4660
html = os.path.join(HERE, 'one-2.html')
4761
url = 'file://' + html
@@ -76,6 +90,29 @@ def test_just_one_link(self):
7690
for i, line in enumerate(expect.strip().splitlines()):
7791
eq_(line.strip(), lines_after[i].strip())
7892

93+
def test_no_mincss_link(self):
94+
html = os.path.join(HERE, 'no-mincss-link.html')
95+
url = 'file://' + html
96+
p = Processor()
97+
p.process(url)
98+
link = p.links[0]
99+
eq_(link.before, link.after)
100+
101+
def test_ignore_link(self):
102+
html = os.path.join(HERE, 'ignore-link.html')
103+
url = 'file://' + html
104+
p = Processor()
105+
p.process(url)
106+
assert not p.links
107+
108+
def test_respect_link_order(self):
109+
html = os.path.join(HERE, 'three-links.html')
110+
url = 'file://' + html
111+
p = Processor()
112+
p.process(url)
113+
hrefs = [x.href for x in p.links]
114+
eq_(hrefs, ['two.css', 'three.css'])
115+
79116
def test_one_link_two_different_pages(self):
80117
html = os.path.join(HERE, 'two.html')
81118
url1 = 'file://' + html

tests/three-links.html

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset='utf-8'>
5+
<title>test page</title>
6+
<!-- alphabetically, "three.css" comes before "two.css"
7+
but that shouldn't matter -->
8+
<link rel="stylesheet" href='two.css'>
9+
<link rel="stylesheet" href='three.css'>
10+
</head>
11+
<body>
12+
<div class="container">
13+
<a href="#" class="one">one</a>
14+
<a href="#" class="two">one</a>
15+
<a href="#" class="three">three</a>
16+
</div>
17+
18+
<input type="search">
19+
</body>
20+
</html>

0 commit comments

Comments
 (0)