|
26 | 26 | unicode = str |
27 | 27 |
|
28 | 28 |
|
| 29 | +INLINE = 'inline' |
| 30 | +LINK = 'link' |
| 31 | + |
29 | 32 | RE_FIND_MEDIA = re.compile('(@media.+?)(\{)', re.DOTALL | re.MULTILINE) |
30 | 33 | RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M) |
31 | 34 | RE_CLASS_DEF = re.compile('\.([\w-]+)') |
@@ -136,22 +139,26 @@ def process(self, *urls): |
136 | 139 | for url in urls: |
137 | 140 | self.process_url(url) |
138 | 141 |
|
139 | | - for identifier in sorted(self.blocks.keys(), key=lambda x: str(x[0])): |
| 142 | + for identifier in sorted(self.blocks.keys()): |
140 | 143 | content = self.blocks[identifier] |
141 | 144 | processed = self._process_content(content, self._bodies) |
142 | 145 |
|
143 | | - if isinstance(identifier[0], int): |
144 | | - line, url = identifier |
| 146 | + if identifier[1] == INLINE: |
| 147 | + line, _, url, no_mincss = identifier |
| 148 | + if no_mincss: |
| 149 | + processed = content |
145 | 150 | self.inlines.append( |
146 | 151 | InlineResult( |
147 | 152 | line, |
148 | 153 | url, |
149 | 154 | content, |
150 | | - processed |
| 155 | + processed, |
151 | 156 | ) |
152 | 157 | ) |
153 | 158 | else: |
154 | | - url, href = identifier |
| 159 | + _, _, url, href, no_mincss = identifier |
| 160 | + if no_mincss: |
| 161 | + processed = content |
155 | 162 | self.links.append( |
156 | 163 | LinkResult( |
157 | 164 | href, |
@@ -199,19 +206,44 @@ def process_html(self, html, url): |
199 | 206 | # happend when the style tag has absolute nothing it |
200 | 207 | # not even whitespace |
201 | 208 | continue |
202 | | - for i, line in enumerate(lines): |
| 209 | + no_mincss = False |
| 210 | + try: |
| 211 | + data_attrib = style.attrib['data-mincss'].lower() |
| 212 | + if data_attrib == 'ignore': |
| 213 | + continue |
| 214 | + elif data_attrib == 'no': |
| 215 | + no_mincss = True |
| 216 | + |
| 217 | + except KeyError: |
| 218 | + # happens if the attribute key isn't there |
| 219 | + pass |
| 220 | + |
| 221 | + for i, line in enumerate(lines, start=1): |
203 | 222 | if line.count(first_line): |
204 | | - key = (i + 1, url) |
| 223 | + key = (i, INLINE, url, no_mincss) |
205 | 224 | self.blocks[key] = style.text |
206 | 225 | break |
207 | 226 |
|
| 227 | + i = 0 |
208 | 228 | for link in CSSSelector('link')(page): |
209 | 229 | if ( |
210 | 230 | link.attrib.get('rel', '') == 'stylesheet' or |
211 | 231 | link.attrib['href'].lower().split('?')[0].endswith('.css') |
212 | 232 | ): |
| 233 | + no_mincss = False |
| 234 | + try: |
| 235 | + data_attrib = link.attrib['data-mincss'].lower() |
| 236 | + if data_attrib == 'ignore': |
| 237 | + continue |
| 238 | + if data_attrib == 'no': |
| 239 | + no_mincss = True |
| 240 | + except KeyError: |
| 241 | + # happens if the attribute key isn't there |
| 242 | + pass |
| 243 | + |
213 | 244 | link_url = self.make_absolute_url(url, link.attrib['href']) |
214 | | - key = (link_url, link.attrib['href']) |
| 245 | + key = (i, LINK, link_url, link.attrib['href'], no_mincss) |
| 246 | + i += 1 |
215 | 247 | self.blocks[key] = self.download(link_url) |
216 | 248 | if self.preserve_remote_urls: |
217 | 249 | self.blocks[key] = self._rewrite_urls( |
@@ -337,6 +369,7 @@ def commentmatcher(match): |
337 | 369 | ) |
338 | 370 |
|
339 | 371 | for temp_key, old, __ in inner_improvements: |
| 372 | + assert old in content, old |
340 | 373 | content = content.replace(old, temp_key) |
341 | 374 |
|
342 | 375 | _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M) |
@@ -396,6 +429,7 @@ def matcher(match): |
396 | 429 | fixed = _regex.sub(matcher, content) |
397 | 430 |
|
398 | 431 | for temp_key, __, improved in inner_improvements: |
| 432 | + assert temp_key in fixed |
399 | 433 | fixed = fixed.replace(temp_key, improved) |
400 | 434 | for temp_key, whole in comments: |
401 | 435 | # note, `temp_key` might not be in the `fixed` thing because the |
|
0 commit comments