From 856621a21479fd51daebfce42d5d94ddabef13e3 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Tue, 24 Apr 2012 17:03:29 +0200
Subject: [PATCH 001/192] Add support for :lang(), close #3
---
CHANGES | 11 +++++++++++
cssselect/__init__.py | 2 +-
cssselect/tests.py | 13 ++++++++-----
cssselect/xpath.py | 21 ++++++++++++++++-----
docs/index.rst | 3 +--
5 files changed, 37 insertions(+), 13 deletions(-)
diff --git a/CHANGES b/CHANGES
index d4ac88b..4df6f02 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,17 @@
Changelog
=========
+Version 0.6
+-----------
+
+* In ``setup.py`` use setuptools/distribute if available, but fall back
+ on distutils.
+* Implement the ``:lang()`` pseudo-class, although it is only based on
+ ``xml:lang`` or ``lang`` attributes. If the document language is known from
+ some other meta-data (like a ``Content-Language`` HTTP header or ````
+ element), a workaround is to set a lang attribute on the root element.
+
+
Version 0.5
-----------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 9f17ddb..5c70835 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.5'
+VERSION = '0.6'
__version__ = VERSION
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 94e8f79..12b43aa 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -18,7 +18,6 @@
"""
import sys
-import operator
import unittest
from lxml import etree, html
@@ -392,7 +391,6 @@ def xpath(css):
self.assertRaises(ExpressionError, xpath, ':last-of-type')
self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)')
self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)')
- self.assertRaises(ExpressionError, xpath, ':lang(fr)')
self.assertRaises(ExpressionError, xpath, ':nth-child(n-)')
self.assertRaises(ExpressionError, xpath, ':after')
self.assertRaises(ExpressionError, xpath, ':lorem-ipsum')
@@ -497,8 +495,14 @@ def pcss(main, *selectors, **kwargs):
assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
'foobar-div']
assert pcss('div[foobar~="cd"]') == []
- assert pcss('*[lang|="en"]', '[lang|="en-US"]') == ['second-li']
+ assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
+ # Attribute values are case sensitive
+ assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
assert pcss('*[lang|="e"]') == []
+ # ... :lang() is not.
+ assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
+ 'second-li', 'li-div']
+ assert pcss(':lang("e")', html_only=True) == []
assert pcss('li:nth-child(3)') == ['third-li']
assert pcss('li:nth-child(10)') == []
assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
@@ -524,7 +528,6 @@ def pcss(main, *selectors, **kwargs):
assert pcss('li div:only-child') == ['li-div']
assert pcss('div *:only-child') == ['li-div', 'foobar-span']
self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
- self.assertRaises(ExpressionError, pcss, 'p:lang(fr)')
assert pcss('p:only-of-type') == ['paragraph']
assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
assert pcss('li:empty') == [
@@ -661,7 +664,7 @@ def count(selector):
link
- content
- -
+
-
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 7a865c3..53ba40b 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -116,6 +116,10 @@ class GenericTranslator(object):
#: http://www.w3.org/TR/selectors/#id-selectors
id_attribute = 'id'
+ #: The attribute used for ``:lang()`` depends on the document language:
+ #: http://www.w3.org/TR/selectors/#lang-pseudo
+ lang_attribute = 'xml:lang'
+
#: The case sensitivity of document language element names,
#: attribute names, and attribute values in selectors depends
#: on the document language.
@@ -366,11 +370,15 @@ def xpath_contains_function(self, xpath, function):
return xpath.add_condition('contains(string(.), %s)'
% self.xpath_literal(function.arguments))
- def function_unsupported(self, xpath, pseudo):
- raise ExpressionError(
- "The pseudo-class :%s() is not supported" % pseudo.name)
-
- xpath_lang_function = function_unsupported
+ def xpath_lang_function(self, xpath, function):
+ return xpath.add_condition(
+ "ancestor-or-self::*[@lang][1][starts-with(concat("
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]"
+ % (self.lang_attribute, self.xpath_literal(
+ function.arguments.lower() + '-')))
# Pseudo: dispatch by pseudo-class name
@@ -497,6 +505,9 @@ class HTMLTranslator(GenericTranslator):
are case-insensitive.
"""
+
+ lang_attribute = 'lang'
+
def __init__(self, xhtml=False):
self.xhtml = xhtml # Might be useful for sub-classes?
if not xhtml:
diff --git a/docs/index.rst b/docs/index.rst
index 5cf8743..9aec19e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -92,7 +92,6 @@ they never match:
These applicable pseudo-classes are not yet implemented:
-* ``:lang(language)``
* ``*:first-of-type``, ``*:last-of-type``, ``*:nth-of-type``,
``*:nth-last-of-type``, ``*:only-of-type``. All of these work when
you specify an element type, but not with ``*``
@@ -136,7 +135,7 @@ implemented without forking or monkey-patching cssselect.
The "customization API" is the set of methods in translation classes
and their signature. You can look at the `source code`_ to see how it works.
However, be aware that this API is not very stable yet. It might change
-and break you sub-class.
+and break your sub-class.
.. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py
From ab92f28755392a1a2a0f3edaaf58321352fbf0a1 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Tue, 24 Apr 2012 17:03:53 +0200
Subject: [PATCH 002/192] Tag v0.6
---
CHANGES | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CHANGES b/CHANGES
index 4df6f02..c4d3968 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,8 @@ Changelog
Version 0.6
-----------
+Released on 2012-04-24.
+
* In ``setup.py`` use setuptools/distribute if available, but fall back
on distutils.
* Implement the ``:lang()`` pseudo-class, although it is only based on
From 19443c77dd923f91902fe7a18fe5cea07ce80a43 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Wed, 25 Apr 2012 13:14:28 +0200
Subject: [PATCH 003/192] Make sure Selector.pseudo_element is unicode, not a
Symbol.
---
CHANGES | 9 +++++++++
cssselect/__init__.py | 2 +-
cssselect/parser.py | 4 ++--
cssselect/tests.py | 10 ++++++----
4 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/CHANGES b/CHANGES
index c4d3968..c72c2ec 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,15 @@
Changelog
=========
+Version 0.6.1
+-------------
+
+Released on 2012-04-25.
+
+Make sure that internal token objects do not "leak" into the public API and
+:attr:`Selector.pseudo_element` is an unicode string.
+
+
Version 0.6
-----------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 5c70835..08b529e 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.6'
+VERSION = '0.6.1'
__version__ = VERSION
diff --git a/cssselect/parser.py b/cssselect/parser.py
index e38ae07..d76742d 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -381,7 +381,7 @@ def parse_simple_selector(stream, inside_negation=False):
"Expected ']', got '%s'" % next)
elif peek == '::':
stream.next()
- pseudo_element = stream.next_symbol()
+ pseudo_element = _unicode(stream.next_symbol())
continue
elif peek == ':':
stream.next()
@@ -389,7 +389,7 @@ def parse_simple_selector(stream, inside_negation=False):
if ident in ('first-line', 'first-letter', 'before', 'after'):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
- pseudo_element = ident
+ pseudo_element = _unicode(ident)
continue
if stream.peek() != '(':
result = Pseudo(result, ident)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 12b43aa..fea4b67 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -23,7 +23,7 @@
from lxml import etree, html
from cssselect import (parse, GenericTranslator, HTMLTranslator,
SelectorSyntaxError, ExpressionError)
-from cssselect.parser import tokenize, parse_series
+from cssselect.parser import tokenize, parse_series, _unicode
class TestCssselect(unittest.TestCase):
@@ -131,9 +131,11 @@ def test_pseudo_elements(self):
def parse_pseudo(css):
result = []
for selector in parse(css):
- result.append((
- repr(selector.parsed_tree).replace("(u'", "('"),
- selector.pseudo_element))
+ pseudo = selector.pseudo_element
+ # No Symbol here
+ assert pseudo is None or type(pseudo) is _unicode
+ selector = repr(selector.parsed_tree).replace("(u'", "('")
+ result.append((selector, pseudo))
return result
def parse_one(css):
From 7189f52304f82c10d755b56012a2e61e98e6806b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 7 Jun 2012 17:46:30 +0200
Subject: [PATCH 004/192] Rewrite the tokenizer to conform to the spec grammar.
---
cssselect/parser.py | 484 +++++++++++++++++++++++---------------------
cssselect/tests.py | 157 +++++++-------
cssselect/xpath.py | 40 ++--
3 files changed, 355 insertions(+), 326 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index d76742d..9ccd66f 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -14,6 +14,7 @@
import sys
import re
+import operator
if sys.version_info[0] < 3:
@@ -118,7 +119,11 @@ def __init__(self, selector, name, arguments):
def __repr__(self):
return '%s[%r:%s(%r)]' % (
- self.__class__.__name__, self.selector, self.name, self.arguments)
+ self.__class__.__name__, self.selector, self.name,
+ [token.value for token in self.arguments])
+
+ def argument_types(self):
+ return [token.type for token in self.arguments]
def specificity(self):
a, b, c = self.selector.specificity()
@@ -174,10 +179,10 @@ def __init__(self, selector, namespace, attrib, operator, value):
self.value = value
def __repr__(self):
- if self.namespace == '*':
- attrib = self.attrib
- else:
+ if self.namespace:
attrib = '%s|%s' % (self.namespace, self.attrib)
+ else:
+ attrib = self.attrib
if self.operator == 'exists':
return '%s[%r[%s]]' % (
self.__class__.__name__, self.selector, attrib)
@@ -195,24 +200,25 @@ def specificity(self):
class Element(object):
"""
Represents namespace|element
+
+ `None` is for the universal selector '*'
+
"""
- def __init__(self, namespace, element):
+ def __init__(self, namespace=None, element=None):
self.namespace = namespace
self.element = element
def __repr__(self):
- if self.namespace == '*':
- element = self.element
- else:
- element = '%s|%s' % (self.namespace, self.element)
- return '%s[%s]' % (
- self.__class__.__name__, element)
+ element = self.element or '*'
+ if self.namespace:
+ element = '%s|%s' % (self.namespace, element)
+ return '%s[%s]' % (self.__class__.__name__, element)
def specificity(self):
- if self.element == '*':
- return 0, 0, 0
- else:
+ if self.element:
return 0, 0, 1
+ else:
+ return 0, 0, 0
class Hash(object):
@@ -256,9 +262,15 @@ def specificity(self):
#### Parser
-_el_re = re.compile(r'^\s*(\w+)$')
-_id_re = re.compile(r'^\s*(\w*)#(\w+)\s*$')
-_class_re = re.compile(r'^\s*(\w*)\.(\w+)\s*$')
+# foo
+_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
+
+# foo#bar or #bar
+_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
+
+# foo.bar or .bar
+_class_re = re.compile(
+ r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
def parse(css):
@@ -279,36 +291,35 @@ def parse(css):
# Fast path for simple cases
match = _el_re.match(css)
if match:
- return [Selector(Element('*', match.group(1)))]
+ return [Selector(Element(element=match.group(1)))]
match = _id_re.match(css)
if match is not None:
- return [Selector(Hash(Element(
- '*', match.group(1) or '*'), match.group(2)))]
+ return [Selector(Hash(Element(element=match.group(1) or None),
+ match.group(2)))]
match = _class_re.match(css)
if match is not None:
- return [Selector(Class(Element(
- '*', match.group(1) or '*'), match.group(2)))]
+ return [Selector(Class(Element(element=match.group(1) or None),
+ match.group(2)))]
stream = TokenStream(tokenize(css))
stream.source = css
- try:
- return list(parse_selector_group(stream))
- except SelectorSyntaxError:
- e = sys.exc_info()[1]
- message = "%s at %s -> %r" % (
- e, stream.used, stream.peek())
- e.msg = message
- if sys.version_info < (2,6):
- e.message = message
- e.args = tuple([message])
- raise
+ return list(parse_selector_group(stream))
+# except SelectorSyntaxError:
+# e = sys.exc_info()[1]
+# message = "%s at %s -> %r" % (
+# e, stream.used, stream.peek())
+# e.msg = message
+# if sys.version_info < (2,6):
+# e.message = message
+# e.args = tuple([message])
+# raise
def parse_selector_group(stream):
stream.skip_whitespace()
while 1:
yield Selector(*parse_selector(stream))
- if stream.peek() == ',':
+ if stream.peek() == ('DELIM', ','):
stream.next()
stream.skip_whitespace()
else:
@@ -319,14 +330,15 @@ def parse_selector(stream):
while 1:
stream.skip_whitespace()
peek = stream.peek()
- if peek == ',' or peek is None:
+ if peek in (('EOF', None), ('DELIM', ',')):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'A pseudo-element must be at the end of a selector')
- if peek in ('+', '>', '~'):
+ 'Got pseudo-element ::%s not at the end of a selector'
+ % pseudo_element)
+ if peek.is_delim('+', '>', '~'):
# A combinator
- combinator = stream.next()
+ combinator = stream.next().value
stream.skip_whitespace()
else:
# By exclusion, the last parse_simple_selector() ended
@@ -339,59 +351,54 @@ def parse_selector(stream):
def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
+ selector_start = len(stream.used)
peek = stream.peek()
- consumed = len(stream.used)
- if peek == '*' or isinstance(peek, Symbol):
- next = stream.next()
- if stream.peek() == '|':
- namespace = next
+ if peek.type == 'IDENT' or peek == ('DELIM', '*'):
+ if peek.type == 'IDENT':
+ namespace = stream.next().value
+ else:
stream.next()
- element = stream.next_symbol_or_star()
+ namespace = None
+ if stream.peek() == ('DELIM', '|'):
+ stream.next()
+ element = stream.next_ident_or_star()
else:
- namespace = '*'
- element = next
+ element = namespace
+ namespace = None
else:
- element = namespace = '*'
+ element = namespace = None
result = Element(namespace, element)
pseudo_element = None
while 1:
peek = stream.peek()
- if peek in (None, ' ', ',', '+', '>', '~') or (
- inside_negation and peek == ')'):
+ if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
+ inside_negation and peek == ('DELIM', ')')):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'A pseudo-element must be at the end of a selector')
- if peek == '#':
- stream.next()
- result = Hash(result, stream.next_symbol())
- continue
- elif peek == '.':
+ 'Got pseudo-element ::%s not at the end of a selector'
+ % pseudo_element)
+ if peek.type == 'HASH':
+ result = Hash(result, stream.next().value)
+ elif peek == ('DELIM', '.'):
stream.next()
- result = Class(result, stream.next_symbol())
- continue
- elif peek == '[':
+ result = Class(result, stream.next_ident())
+ elif peek == ('DELIM', '['):
stream.next()
result = parse_attrib(result, stream)
- next = stream.next()
- if next == ']':
- continue
- else:
- raise SelectorSyntaxError(
- "Expected ']', got '%s'" % next)
- elif peek == '::':
+ elif peek == ('DELIM', ':'):
stream.next()
- pseudo_element = _unicode(stream.next_symbol())
- continue
- elif peek == ':':
- stream.next()
- ident = stream.next_symbol()
+ if stream.peek() == ('DELIM', ':'):
+ stream.next()
+ pseudo_element = stream.next_ident()
+ continue
+ ident = stream.next_ident()
if ident in ('first-line', 'first-letter', 'before', 'after'):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element = _unicode(ident)
continue
- if stream.peek() != '(':
+ if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
continue
stream.next()
@@ -401,60 +408,90 @@ def parse_simple_selector(stream, inside_negation=False):
raise SelectorSyntaxError('Got nested :not()')
argument, argument_pseudo_element = parse_simple_selector(
stream, inside_negation=True)
+ next = stream.next()
if argument_pseudo_element:
raise SelectorSyntaxError(
- 'Pseudo-elements are not allowed inside :not()')
+ 'Got pseudo-element ::%s inside :not() at %s'
+ % (argument_pseudo_element, next.pos))
+ if next != ('DELIM', ')'):
+ raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
else:
- peek = stream.peek()
- if isinstance(peek, (Symbol, String)):
- argument = stream.next()
- else:
+ arguments = []
+ while 1:
+ stream.skip_whitespace()
+ next = stream.next()
+ if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
+ ('DELIM', '+'), ('DELIM', '-')]:
+ arguments.append(next)
+ elif next == ('DELIM', ')'):
+ break
+ else:
+ raise SelectorSyntaxError(
+ "Expected an argument, got %s" % (next,))
+ if not arguments:
raise SelectorSyntaxError(
- "Expected argument, got '%s'" % peek)
- result = Function(result, ident, argument)
- stream.skip_whitespace()
- next = stream.next()
- if next == ')':
- continue
- else:
- raise SelectorSyntaxError(
- "Expected ')', got '%s'" % next)
+ "Expected at least one argument, got %s" % (next,))
+ result = Function(result, ident, arguments)
else:
raise SelectorSyntaxError(
- "Expected selector, got '%s'" % peek)
- if consumed == len(stream.used):
+ "Expected selector, got %s" % (peek,))
+ if len(stream.used) == selector_start:
raise SelectorSyntaxError(
- "Expected selector, got '%s'" % stream.peek())
+ "Expected selector, got %s" % (stream.peek(),))
return result, pseudo_element
def parse_attrib(selector, stream):
stream.skip_whitespace()
- attrib = stream.next_symbol_or_star()
- if attrib == '*' and stream.peek() != '|':
+ attrib = stream.next_ident_or_star()
+ if attrib is None and stream.peek() != ('DELIM', '|'):
raise SelectorSyntaxError(
- "Expected '|', got '%s'" % stream.peek())
- if stream.peek() == '|':
- namespace = attrib
+ "Expected '|', got %s" % (stream.peek(),))
+ if stream.peek() == ('DELIM', '|'):
stream.next()
- attrib = stream.next_symbol()
+ if stream.peek() == ('DELIM', '='):
+ namespace = None
+ stream.next()
+ op = '|='
+ else:
+ namespace = attrib
+ attrib = stream.next_ident()
+ op = None
else:
- namespace = '*'
- stream.skip_whitespace()
- if stream.peek() == ']':
- return Attrib(selector, namespace, attrib, 'exists', None)
- op = stream.next()
- if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
- raise SelectorSyntaxError(
- "Operator expected, got '%s'" % op)
+ namespace = op = None
+ if op is None:
+ stream.skip_whitespace()
+ next = stream.next()
+ if next == ('DELIM', ']'):
+ return Attrib(selector, namespace, attrib, 'exists', None)
+ elif next == ('DELIM', '='):
+ op = '='
+ elif next.is_delim('^', '$', '*', '~', '|', '!') and (
+ stream.peek() == ('DELIM', '=')):
+ op = next.value + '='
+ stream.next()
+ else:
+ raise SelectorSyntaxError(
+ "Operator expected, got %s" % (next,))
stream.skip_whitespace()
value = stream.next()
- if not isinstance(value, (Symbol, String)):
+ if value.type not in ('IDENT', 'STRING'):
raise SelectorSyntaxError(
- "Expected string or symbol, got '%s'" % value)
+ "Expected string or ident, got %s" % (value,))
stream.skip_whitespace()
- return Attrib(selector, namespace, attrib, op, value)
+ next = stream.next()
+ if next != ('DELIM', ']'):
+ raise SelectorSyntaxError(
+ "Expected ']', got %s" % (next,))
+ return Attrib(selector, namespace, attrib, op, value.value)
+
+
+def parse_series_from_tokens(tokens):
+ for token in tokens:
+ if token.type == 'STRING':
+ raise ValueError('String tokens not allowed in series.')
+ return parse_series(''.join(token.value for token in tokens))
def parse_series(s):
@@ -465,6 +502,7 @@ def parse_series(s):
:returns: :``(a, b)``
"""
+ s = s.strip()
if s == 'odd':
return (2, 1)
elif s == 'even':
@@ -490,139 +528,136 @@ def parse_series(s):
#### Token objects
-class _UniToken(_unicode):
- def __new__(cls, contents, pos):
- obj = _unicode.__new__(cls, contents)
+class Token(tuple):
+ def __new__(cls, type_, value, pos):
+ obj = tuple.__new__(cls, (type_, value))
obj.pos = pos
return obj
def __repr__(self):
- return '%s(%s, %r)' % (
- self.__class__.__name__,
- _unicode.__repr__(self),
- self.pos)
+ return '<%s %r at %i>' % (self.type, self.value, self.pos)
-class Symbol(_UniToken):
- pass
+ def is_delim(self, *values):
+ return self.type == 'DELIM' and self.value in values
-class String(_UniToken):
- pass
+ type = property(operator.itemgetter(0))
+ value = property(operator.itemgetter(1))
-class Token(_UniToken):
- pass
+
+class EOFToken(Token):
+ def __new__(cls, pos):
+ return Token.__new__(cls, 'EOF', None, pos)
+
+ def __repr__(self):
+ return '<%s at %i>' % (self.type, self.pos)
#### Tokenizer
-_match_whitespace = re.compile(r'\s+', re.UNICODE).match
-_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub
+class TokenMacros:
+ unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
+ escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
+ string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
+ nonascii = r'[^\0-\177]'
+ nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
+ nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
+
+def _compile(pattern):
+ return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
+
+_match_whitespace = _compile(r'[ \t\r\n\f]+')
+_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
+_match_hash = _compile('#(?:%(nmchar)s)+')
+_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
+_match_string_by_quote = {
+ "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
+ '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
+}
+
+_sub_simple_escape = re.compile(r'\\(.)').sub
+_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
+_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
+
+# Same as r'\1', but faster on CPython
+_replace_simple = operator.methodcaller('group', 1)
+
+def _replace_unicode(match):
+ codepoint = int(match.group(1), 16)
+ if codepoint > sys.maxunicode:
+ codepoint = 0xFFFD
+ return _unichr(codepoint)
+
+
+def unescape_ident(value):
+ value = _sub_unicode_escape(_replace_unicode, value)
+ value = _sub_simple_escape(_replace_simple, value)
+ return value
-_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match
def tokenize(s):
pos = 0
- s = _replace_comments('', s)
len_s = len(s)
while pos < len_s:
match = _match_whitespace(s, pos=pos)
if match:
- yield Token(' ', pos)
+ yield Token('S', ' ', pos)
pos = match.end()
continue
- match = _match_count_number(s, pos=pos)
- if match and match.group() != 'n':
- sym = s[pos:match.end()]
- yield Symbol(sym, pos)
+
+ match = _match_ident(s, pos=pos)
+ if match:
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode, match.group()))
+ yield Token('IDENT', value, pos)
pos = match.end()
continue
- c = s[pos]
- c2 = s[pos:pos+2]
- if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
- yield Token(c2, pos)
- pos += 2
- continue
- if c in '>+~,.*=[]()|:#':
- yield Token(c, pos)
- pos += 1
- continue
- if c == '"' or c == "'":
- # Quoted string
- old_pos = pos
- sym, pos = tokenize_escaped_string(s, pos)
- yield String(sym, old_pos)
- continue
- old_pos = pos
- sym, pos = tokenize_symbol(s, pos)
- yield Symbol(sym, old_pos)
- continue
-split_at_string_escapes = re.compile(r'(\\(?:%s))'
- % '|'.join(['[A-Fa-f0-9]{1,6}(?:\r\n|\s)?',
- '[^A-Fa-f0-9]'])).split
+ match = _match_hash(s, pos=pos)
+ if match:
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode, match.group()[1:]))
+ yield Token('HASH', value, pos)
+ pos = match.end()
+ continue
+ quote = s[pos]
+ if quote in _match_string_by_quote:
+ match = _match_string_by_quote[quote](s, pos=pos + 1)
+ assert match, 'Should have found at least an empty match'
+ end_pos = match.end()
+ if end_pos == len_s:
+ raise SelectorSyntaxError('Unclosed string at %s' % pos)
+ if s[end_pos] != quote:
+ raise SelectorSyntaxError('Invalid string at %s' % next_pos)
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode,
+ _sub_newline_escape('', match.group())))
+ yield Token('STRING', value, pos)
+ pos = end_pos + 1
+ continue
-def unescape_string_literal(literal):
- substrings = []
- for substring in split_at_string_escapes(literal):
- if not substring:
+ match = _match_number(s, pos=pos)
+ if match:
+ value = match.group()
+ yield Token('NUMBER', value, pos)
+ pos = match.end()
continue
- elif '\\' in substring:
- if substring[0] == '\\' and len(substring) > 1:
- substring = substring[1:]
- if substring[0] in '0123456789ABCDEFabcdef':
- # int() correctly ignores the potentially trailing whitespace
- substring = _unichr(int(substring, 16))
+
+ pos2 = pos + 2
+ if s[pos:pos2] == '/*':
+ pos = s.find('*/', pos2)
+ if pos == -1:
+ pos = len_s
else:
- raise SelectorSyntaxError(
- "Invalid escape sequence %r in string %r"
- % (substring.split('\\')[1], literal))
- substrings.append(substring)
- return ''.join(substrings)
-
-
-def tokenize_escaped_string(s, pos):
- quote = s[pos]
- assert quote in ('"', "'")
- pos = pos+1
- start = pos
- while 1:
- next = s.find(quote, pos)
- if next == -1:
- raise SelectorSyntaxError(
- "Expected closing %s for string in: %r"
- % (quote, s[start:]))
- result = s[start:next]
- if result.endswith('\\'):
- # next quote character is escaped
- pos = next+1
+ pos += 2
continue
- if '\\' in result:
- result = unescape_string_literal(result)
- return result, next+1
-
-_illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE)
+ yield Token('DELIM', s[pos], pos)
+ pos += 1
-def tokenize_symbol(s, pos):
- start = pos
- match = _illegal_symbol.search(s, pos=pos)
- if match:
- if match.start() == pos:
- raise SelectorSyntaxError(
- "Unexpected symbol: %r" % s[pos])
- result = s[start:match.start()]
- pos = match.start()
- else:
- result = s[start:]
- pos = len(s)
- try:
- result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
- except UnicodeDecodeError:
- e = sys.exc_info()[1]
- raise SelectorSyntaxError(
- "Bad symbol %r: %s" % (result, e))
- return result, pos
+ assert pos == len_s
+ yield EOFToken(pos)
class TokenStream(object):
@@ -644,36 +679,33 @@ def next(self):
self.used.append(self.peeked)
return self.peeked
else:
- try:
- next = self.next_token()
- self.used.append(next)
- return next
- except StopIteration:
- return None
+ next = self.next_token()
+ self.used.append(next)
+ return next
def peek(self):
if not self._peeking:
- try:
- self.peeked = self.next_token()
- except StopIteration:
- return None
+ self.peeked = self.next_token()
self._peeking = True
return self.peeked
- def next_symbol(self):
+ def next_ident(self):
next = self.next()
- if not isinstance(next, Symbol):
- raise SelectorSyntaxError(
- "Expected symbol, got '%s'" % next)
- return next
+ if next.type != 'IDENT':
+ raise SelectorSyntaxError('Expected ident, got %s' % (next,))
+ return next.value
- def next_symbol_or_star(self):
+ def next_ident_or_star(self):
next = self.next()
- if next != '*' and not isinstance(next, Symbol):
+ if next.type == 'IDENT':
+ return next.value
+ elif next == ('DELIM', '*'):
+ return None
+ else:
raise SelectorSyntaxError(
- "Expected symbol or '*', got '%s'" % next)
- return next
+ "Expected ident or '*', got %s" % (next,))
def skip_whitespace(self):
- if self.peek() == ' ':
+ peek = self.peek()
+ if peek.type == 'S':
self.next()
diff --git a/cssselect/tests.py b/cssselect/tests.py
index fea4b67..ff0effa 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -26,21 +26,42 @@
from cssselect.parser import tokenize, parse_series, _unicode
+if sys.version_info[0] < 3:
+ # Python 2
+ def u(text):
+ return text.decode('utf8')
+else:
+ # Python 3
+ def u(text):
+ return text
+
+
class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
- tokens = [repr(item).replace("u'", "'")
- for item in tokenize('E > f[a~="y\\"x"]')]
+ tokens = [
+ repr(item).replace("u'", "'") # Py 2/3
+ for item in tokenize(
+ u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
assert tokens == [
- "Symbol('E', 0)",
- "Token(' ', 1)",
- "Token('>', 2)",
- "Token(' ', 3)",
- "Symbol('f', 4)",
- "Token('[', 5)",
- "Symbol('a', 6)",
- "Token('~=', 7)",
- "String('y\"x', 9)",
- "Token(']', 15)"]
+ "",
+ "",
+ "' at 5>",
+ "",
+ # the no-break space is not whitespace in CSS
+ r"",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ]
def test_parser(self):
def repr_parse(css):
@@ -98,28 +119,26 @@ def parse_many(first, *others):
'Attrib[Element[a][name]]']
assert parse_many('a [name]') == [
'CombinedSelector[Element[a] Attrib[Element[*][name]]]']
- assert parse_many('a[rel="include"]') == [
- "Attrib[Element[a][rel = String('include', 6)]]"]
- assert parse_many('a[rel = include]') == [
- "Attrib[Element[a][rel = Symbol('include', 8)]]"]
- assert parse_many("a[hreflang |= 'en']") == [
- "Attrib[Element[a][hreflang |= String('en', 14)]]"]
+ assert parse_many('a[rel="include"]', 'a[rel = include]') == [
+ "Attrib[Element[a][rel = 'include']]"]
+ assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
+ "Attrib[Element[a][hreflang |= 'en']]"]
assert parse_many('div:nth-child(10)') == [
- "Function[Element[div]:nth-child(Symbol('10', 14))]"]
+ "Function[Element[div]:nth-child(['10'])]"]
assert parse_many(':nth-child(2n+2)') == [
- "Function[Element[*]:nth-child(Symbol('2n+2', 11))]"]
+ "Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
assert parse_many('div:nth-of-type(10)') == [
- "Function[Element[div]:nth-of-type(Symbol('10', 16))]"]
+ "Function[Element[div]:nth-of-type(['10'])]"]
assert parse_many('div div:nth-of-type(10) .aclass') == [
'CombinedSelector[CombinedSelector[Element[div] '
- "Function[Element[div]:nth-of-type(Symbol('10', 20))]] "
+ "Function[Element[div]:nth-of-type(['10'])]] "
' Class[Element[*].aclass]]']
assert parse_many('label:only') == [
'Pseudo[Element[label]:only]']
assert parse_many('a:lang(fr)') == [
- "Function[Element[a]:lang(Symbol('fr', 7))]"]
+ "Function[Element[a]:lang(['fr'])]"]
assert parse_many('div:contains("foo")') == [
- "Function[Element[div]:contains(String('foo', 13))]"]
+ "Function[Element[div]:contains(['foo'])]"]
assert parse_many('div#foobar') == [
'Hash[Element[div]#foobar]']
assert parse_many('div:not(div.foo)') == [
@@ -213,99 +232,65 @@ def get_error(css):
return str(sys.exc_info()[1]).replace("(u'", "('")
assert get_error('attributes(href)/html/body/a') == (
- "Expected selector, got '(' at "
- "[Symbol('attributes', 0)] -> Token('(', 10)")
+ "Expected selector, got ")
assert get_error('attributes(href)') == (
- "Expected selector, got '(' at "
- "[Symbol('attributes', 0)] -> Token('(', 10)")
+ "Expected selector, got ")
assert get_error('html/body/a') == (
- "Unexpected symbol: '/' at [Symbol('html', 0)] -> None")
+ "Expected selector, got ")
assert get_error(' ') == (
- "Expected selector, got 'None' at [Token(' ', 0)] -> None")
+ "Expected selector, got ")
assert get_error('div, ') == (
- "Expected selector, got 'None' at "
- "[Symbol('div', 0), Token(',', 3), Token(' ', 4)] -> None")
+ "Expected selector, got ")
assert get_error(' , div') == (
- "Expected selector, got ',' at "
- "[Token(' ', 0)] -> Token(',', 1)")
+ "Expected selector, got ")
assert get_error('p, , div') == (
- "Expected selector, got ',' at "
- "[Symbol('p', 0), Token(',', 1), Token(' ', 2)] -> Token(',', 3)")
+ "Expected selector, got ")
assert get_error('div > ') == (
- "Expected selector, got 'None' at "
- "[Symbol('div', 0), Token(' ', 3), Token('>', 4), Token(' ', 5)]"
- " -> None")
+ "Expected selector, got ")
assert get_error(' > div') == (
- "Expected selector, got '>' at [Token(' ', 0)] -> Token('>', 2)")
+ "Expected selector, got ' at 2>")
assert get_error('foo|#bar') == (
- "Expected symbol or '*', got '#' at "
- "[Symbol('foo', 0), Token('|', 3), "
- "Token('#', 4)] -> Symbol('bar', 5)")
+ "Expected ident or '*', got ")
assert get_error('#.foo') == (
- "Expected symbol, got '.' at "
- "[Token('#', 0), Token('.', 1)] -> Symbol('foo', 2)")
+ "Expected selector, got ")
assert get_error('.#foo') == (
- "Expected symbol, got '#' at "
- "[Token('.', 0), Token('#', 1)] -> Symbol('foo', 2)")
+ "Expected ident, got ")
assert get_error(':#foo') == (
- "Expected symbol, got '#' at "
- "[Token(':', 0), Token('#', 1)] -> Symbol('foo', 2)")
+ "Expected ident, got ")
assert get_error('[*]') == (
- "Expected '|', got ']' at "
- "[Token('[', 0), Token('*', 1)] -> Token(']', 2)")
+ "Expected '|', got ")
assert get_error('[foo|]') == (
- "Expected symbol, got ']' at "
- "[Token('[', 0), Symbol('foo', 1), Token('|', 4), Token(']', 5)]"
- " -> None")
+ "Expected ident, got ")
assert get_error('[#]') == (
- "Expected symbol or '*', got '#' at "
- "[Token('[', 0), Token('#', 1)] -> Token(']', 2)")
+ "Expected ident or '*', got ")
assert get_error('[foo=#]') == (
- "Expected string or symbol, got '#' at "
- "[Token('[', 0), Symbol('foo', 1), Token('=', 4), Token('#', 5)]"
- " -> Token(']', 6)")
+ "Expected string or ident, got ")
assert get_error(':nth-child()') == (
- "Expected argument, got ')' at "
- "[Token(':', 0), Symbol('nth-child', 1), Token('(', 10)]"
- " -> Token(')', 11)")
+ "Expected at least one argument, got ")
assert get_error('[href]a') == (
- "Expected selector, got 'a' at "
- "[Token('[', 0), Symbol('href', 1), Token(']', 5)]"
- " -> Symbol('a', 6)")
+ "Expected selector, got ")
assert get_error('[rel=stylesheet]') == None
assert get_error('[rel:stylesheet]') == (
- "Operator expected, got ':' at [Token('[', 0), Symbol('rel', 1), "
- "Token(':', 4)] -> Symbol('stylesheet', 5)")
+ "Operator expected, got ")
assert get_error('[rel=stylesheet') == (
- "Expected ']', got 'None' at [Token('[', 0), Symbol('rel', 1), "
- "Token('=', 4), Symbol('stylesheet', 5)] -> None")
+ "Expected ']', got ")
assert get_error(':lang(fr)') == None
assert get_error(':lang(fr') == (
- "Expected ')', got 'None' at [Token(':', 0), Symbol('lang', 1), "
- "Token('(', 5), Symbol('fr', 6)] -> None")
+ "Expected an argument, got ")
assert get_error(':contains("foo') == (
- "Expected closing \" for string in: 'foo' at "
- "[Token(':', 0), Symbol('contains', 1), Token('(', 9)] -> None")
+ "Unclosed string at 10")
assert get_error('foo!') == (
- "Unexpected symbol: '!' at [Symbol('foo', 0)] -> None")
+ "Expected selector, got ")
# Mis-placed pseudo-elements
assert get_error('a:before:empty') == (
- "A pseudo-element must be at the end of a selector at "
- "[Symbol('a', 0), Token(':', 1), Symbol('before', 2)] "
- "-> Token(':', 8)")
+ "Got pseudo-element ::before not at the end of a selector")
assert get_error('li:before a') == (
- "A pseudo-element must be at the end of a selector at "
- "[Symbol('li', 0), Token(':', 2), Symbol('before', 3), "
- "Token(' ', 9)] -> Symbol('a', 10)")
+ "Got pseudo-element ::before not at the end of a selector")
assert get_error(':not(:before)') == (
- "Pseudo-elements are not allowed inside :not() at "
- "[Token(':', 0), Symbol('not', 1), Token('(', 4), Token(':', 5),"
- " Symbol('before', 6)] -> Token(')', 12)")
+ "Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
- "Got nested :not() at [Token(':', 0), Symbol('not', 1), "
- "Token('(', 4), Token(':', 5), Symbol('not', 6), Token('(', 9)]"
- " -> Symbol('a', 10)")
+ "Got nested :not()")
def test_translation(self):
def xpath(css):
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 53ba40b..2c11fc5 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -15,7 +15,7 @@
import sys
import re
-from cssselect.parser import parse, parse_series, SelectorError
+from cssselect.parser import parse, parse_series_from_tokens, SelectorError
if sys.version_info[0] < 3:
@@ -250,10 +250,10 @@ def xpath_attrib(self, selector):
name = selector.attrib.lower()
else:
name = selector.attrib
- if selector.namespace == '*':
- name = '@' + name
- else:
+ if selector.namespace:
name = '@%s:%s' % (selector.namespace, name)
+ else:
+ name = '@' + name
if self.lower_case_attribute_values:
value = selector.value.lower()
else:
@@ -274,11 +274,12 @@ def xpath_hash(self, id_selector):
def xpath_element(self, selector):
"""Translate a type or universal selector."""
- if self.lower_case_element_names:
- element = selector.element.lower()
- else:
- element = selector.element
- if selector.namespace != '*':
+ element = selector.element
+ if not element:
+ element = '*'
+ elif self.lower_case_element_names:
+ element = element.lower()
+ if selector.namespace:
# Namespace prefixes are case-sensitive.
# http://www.w3.org/TR/css3-namespace/#prefixes
element = '%s:%s' % (selector.namespace, element)
@@ -311,7 +312,7 @@ def xpath_indirect_adjacent_combinator(self, left, right):
def xpath_nth_child_function(self, xpath, function, last=False,
add_name_test=True):
try:
- a, b = parse_series(function.arguments)
+ a, b = parse_series_from_tokens(function.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % function.arguments)
if add_name_test:
@@ -367,18 +368,29 @@ def xpath_nth_last_of_type_function(self, xpath, function):
add_name_test=False)
def xpath_contains_function(self, xpath, function):
- return xpath.add_condition('contains(string(.), %s)'
- % self.xpath_literal(function.arguments))
+ # Defined there, removed in later drafts:
+ # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
+ if function.argument_types() not in (['STRING'], ['IDENT']):
+ raise ExpressionError(
+ "Expected a single string or ident for :contains(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ 'contains(string(.), %s)' % self.xpath_literal(value))
def xpath_lang_function(self, xpath, function):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
+ raise ExpressionError(
+ "Expected a single string or ident for :lang(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat("
# XPath 1.0 has no lower-case function...
"translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
"'abcdefghijklmnopqrstuvwxyz'), "
"'-'), %s)]"
- % (self.lang_attribute, self.xpath_literal(
- function.arguments.lower() + '-')))
+ % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
# Pseudo: dispatch by pseudo-class name
From c221b7bdc5328368279732ad5e82736639a1c066 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 10:37:54 +0200
Subject: [PATCH 005/192] Workaround element/attribute names with special
characters
For element names, these are equivalent in XPath:
foo
*[name() = "foo"]
And for attribute names:
@foo
attribute:*[name() = "foo"]
The former is faster but some characters are not allowed in it.
Since I am not sure which characters, only use it for "safe" names
that match ^[a-zA-Z_][a-zA-Z0-9_.-]*$
This is overly restrictive, but should cover every name actually used
in XML, HTML, SVG, etc.
---
cssselect/tests.py | 14 ++++++++++++++
cssselect/xpath.py | 32 +++++++++++++++++++++++++-------
2 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index ff0effa..ab3c558 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -373,6 +373,17 @@ def xpath(css):
"e/following-sibling::f")
assert xpath('div#container p') == (
"div[@id = 'container']/descendant-or-self::*/p")
+
+ # Invalid characters in XPath element names
+ assert xpath(r'di\a0 v') == (
+ "*[name() = 'di\xa0v']")
+ assert xpath(r'di\[v') == (
+ "*[name() = 'di[v']")
+ assert xpath(r'[h\a0 ref]') == (
+ "*[attribute::*[name() = 'h\xa0ref']]")
+ assert xpath(r'[h\]ref]') == (
+ "*[attribute::*[name() = 'h]ref']]")
+
self.assertRaises(ExpressionError, xpath, ':first-of-type')
self.assertRaises(ExpressionError, xpath, ':only-of-type')
self.assertRaises(ExpressionError, xpath, ':last-of-type')
@@ -551,6 +562,9 @@ def pcss(main, *selectors, **kwargs):
assert pcss('ol :Not(li[class])') == [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li']
+ # Invalid characters in XPath element names, should not crash
+ assert pcss(r'di\a0 v', r'div\[') == []
+ assert pcss(r'[h\a0 ref]', r'[h\]ref]') == []
# HTML-specific
assert pcss(':link', html_only=True) == [
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 2c11fc5..d479510 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -83,6 +83,11 @@ def join(self, combiner, other):
split_at_single_quotes = re.compile("('+)").split
+# The spec is actually more permissive than that, but don’t bother.
+# This is just for the fast path.
+# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
+is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
+
#### Translation
@@ -181,7 +186,9 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
tree = getattr(selector, 'parsed_tree', None)
if not tree:
raise TypeError('Expected a parsed selector, got %r' % (selector,))
- return (prefix or '') + _unicode(self.xpath(tree))
+ xpath = self.xpath(tree)
+ assert isinstance(xpath, XPathExpr) # help debug a missing 'return'
+ return (prefix or '') + _unicode(xpath)
@staticmethod
def xpath_literal(s):
@@ -250,15 +257,19 @@ def xpath_attrib(self, selector):
name = selector.attrib.lower()
else:
name = selector.attrib
+ safe = is_safe_name(name)
if selector.namespace:
- name = '@%s:%s' % (selector.namespace, name)
+ name = '%s:%s' % (selector.namespace, name)
+ safe = safe and is_safe_name(selector.namespace)
+ if safe:
+ attrib = '@' + name
else:
- name = '@' + name
+ attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
if self.lower_case_attribute_values:
value = selector.value.lower()
else:
value = selector.value
- return method(self.xpath(selector.selector), name, value)
+ return method(self.xpath(selector.selector), attrib, value)
def xpath_class(self, class_selector):
"""Translate a class selector."""
@@ -277,13 +288,20 @@ def xpath_element(self, selector):
element = selector.element
if not element:
element = '*'
- elif self.lower_case_element_names:
- element = element.lower()
+ safe = True
+ else:
+ safe = is_safe_name(element)
+ if self.lower_case_element_names:
+ element = element.lower()
if selector.namespace:
# Namespace prefixes are case-sensitive.
# http://www.w3.org/TR/css3-namespace/#prefixes
element = '%s:%s' % (selector.namespace, element)
- return XPathExpr(element=element)
+ safe = safe and is_safe_name(selector.namespace)
+ xpath = XPathExpr(element=element)
+ if not safe:
+ xpath.add_name_test()
+ return xpath
# CombinedSelector: dispatch by combinator
From d405f8930b76d39e8f70f6394575f889343b5477 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 12:22:10 +0200
Subject: [PATCH 006/192] Add tests for series with whitespace
Together with the previous 2 commits, this fixes #2 and #7
---
cssselect/parser.py | 18 +++++++-----------
cssselect/tests.py | 37 ++++++++++++++++++++++++++-----------
cssselect/xpath.py | 4 ++--
3 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 9ccd66f..a7d9889 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -487,22 +487,18 @@ def parse_attrib(selector, stream):
return Attrib(selector, namespace, attrib, op, value.value)
-def parse_series_from_tokens(tokens):
- for token in tokens:
- if token.type == 'STRING':
- raise ValueError('String tokens not allowed in series.')
- return parse_series(''.join(token.value for token in tokens))
-
-
-def parse_series(s):
+def parse_series(tokens):
"""
- Parses things like '1n+2', or 'an+b' generally
+ Parses the arguments for :nth-child() and friends.
- :raises: :class:`ValueError`
+ :raises: A list of tokens
:returns: :``(a, b)``
"""
- s = s.strip()
+ for token in tokens:
+ if token.type == 'STRING':
+ raise ValueError('String tokens not allowed in series.')
+ s = ''.join(token.value for token in tokens).strip()
if s == 'odd':
return (2, 1)
elif s == 'even':
diff --git a/cssselect/tests.py b/cssselect/tests.py
index ab3c558..796537b 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -435,17 +435,32 @@ def test_unicode_escapes(self):
'''descendant-or-self::*[@aval = "' '"]''')
def test_series(self):
- assert parse_series('1n+3') == (1, 3)
- assert parse_series('n-5') == (1, -5)
- assert parse_series('odd') == (2, 1)
- assert parse_series('even') == (2, 0)
- assert parse_series('3n') == (3, 0)
- assert parse_series('n') == (1, 0)
- assert parse_series('+n') == (1, 0)
- assert parse_series('-n') == (-1, 0)
- assert parse_series('5') == (0, 5)
- self.assertRaises(ValueError, parse_series, 'foo')
- self.assertRaises(ValueError, parse_series, 'n+')
+ def series(css):
+ selector, = parse(':nth-child(%s)' % css)
+ args = selector.parsed_tree.arguments
+ try:
+ return parse_series(args)
+ except ValueError:
+ return None
+
+ assert series('1n+3') == (1, 3)
+ assert series('1n +3') == (1, 3)
+ assert series('1n + 3') == (1, 3)
+ assert series('1n+ 3') == (1, 3)
+ assert series('1n-3') == (1, -3)
+ assert series('1n -3') == (1, -3)
+ assert series('1n - 3') == (1, -3)
+ assert series('1n- 3') == (1, -3)
+ assert series('n-5') == (1, -5)
+ assert series('odd') == (2, 1)
+ assert series('even') == (2, 0)
+ assert series('3n') == (3, 0)
+ assert series('n') == (1, 0)
+ assert series('+n') == (1, 0)
+ assert series('-n') == (-1, 0)
+ assert series('5') == (0, 5)
+ assert series('foo') == None
+ assert series('n+') == None
def test_select(self):
document = etree.fromstring(HTML_IDS)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index d479510..bc42077 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -15,7 +15,7 @@
import sys
import re
-from cssselect.parser import parse, parse_series_from_tokens, SelectorError
+from cssselect.parser import parse, parse_series, SelectorError
if sys.version_info[0] < 3:
@@ -330,7 +330,7 @@ def xpath_indirect_adjacent_combinator(self, left, right):
def xpath_nth_child_function(self, xpath, function, last=False,
add_name_test=True):
try:
- a, b = parse_series_from_tokens(function.arguments)
+ a, b = parse_series(function.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % function.arguments)
if add_name_test:
From e48ecc642d48cf545a7169ec20b211cdc105694e Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 16:52:19 +0200
Subject: [PATCH 007/192] Fix #10: '~=', '^=' and '*=' attribute operators with
an empty string
---
cssselect/tests.py | 5 +++++
cssselect/xpath.py | 42 ++++++++++++++++++++++++++++++------------
2 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 796537b..42312f9 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -502,11 +502,16 @@ def pcss(main, *selectors, **kwargs):
assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor']
assert pcss('a[rel="tag"]') == ['tag-anchor']
assert pcss('a[href*="localhost"]') == ['tag-anchor']
+ assert pcss('a[href*=""]') == []
assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor']
assert pcss('a[href^="http:"]') == ['tag-anchor']
+ assert pcss('a[href^=""]') == []
assert pcss('a[href$="org"]') == ['nofollow-anchor']
+ assert pcss('a[href$=""]') == []
assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
'foobar-div']
+ assert pcss('[foobar~="ab bc"]',
+ '[foobar~=""]', '[foobar~=" \t"]') == []
assert pcss('div[foobar~="cd"]') == []
assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
# Attribute values are case sensitive
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index bc42077..efdc1e1 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -88,6 +88,9 @@ def join(self, combiner, other):
# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
+# Test that the string is not empty and does not contain whitespace
+is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
+
#### Translation
@@ -490,9 +493,12 @@ def xpath_attrib_different(self, xpath, name, value):
return xpath
def xpath_attrib_includes(self, xpath, name, value):
- xpath.add_condition(
- "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
- % (name, name, self.xpath_literal(' '+value+' ')))
+ if is_non_whitespace(value):
+ xpath.add_condition(
+ "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
+ % (name, name, self.xpath_literal(' '+value+' ')))
+ else:
+ xpath.add_condition('0')
return xpath
def xpath_attrib_dashmatch(self, xpath, name, value):
@@ -504,19 +510,31 @@ def xpath_attrib_dashmatch(self, xpath, name, value):
return xpath
def xpath_attrib_prefixmatch(self, xpath, name, value):
- return xpath.add_condition('%s and starts-with(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ if value:
+ xpath.add_condition('%s and starts-with(%s, %s)' % (
+ name, name, self.xpath_literal(value)))
+ else:
+ xpath.add_condition('0')
+ return xpath
def xpath_attrib_suffixmatch(self, xpath, name, value):
- # Oddly there is a starts-with in XPath 1.0, but not ends-with
- return xpath.add_condition(
- '%s and substring(%s, string-length(%s)-%s) = %s'
- % (name, name, name, len(value)-1, self.xpath_literal(value)))
+ if value:
+ # Oddly there is a starts-with in XPath 1.0, but not ends-with
+ xpath.add_condition(
+ '%s and substring(%s, string-length(%s)-%s) = %s'
+ % (name, name, name, len(value)-1, self.xpath_literal(value)))
+ else:
+ xpath.add_condition('0')
+ return xpath
def xpath_attrib_substringmatch(self, xpath, name, value):
- # Attribute selectors are case sensitive
- return xpath.add_condition('%s and contains(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ if value:
+ # Attribute selectors are case sensitive
+ xpath.add_condition('%s and contains(%s, %s)' % (
+ name, name, self.xpath_literal(value)))
+ else:
+ xpath.add_condition('0')
+ return xpath
class HTMLTranslator(GenericTranslator):
From 13023edb0d2dac9fa3bca2db54877b5fab57862b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 17:00:15 +0200
Subject: [PATCH 008/192] Changelog for 0.7
---
CHANGES | 16 ++++++++++++++++
cssselect/__init__.py | 2 +-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index c72c2ec..908787b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,22 @@
Changelog
=========
+Version 0.7
+-----------
+
+Released on 2012-06-14.
+
+Bug fix release: see #2, #7 and #10 on GitHub.
+
+* The tokenizer and parser have been rewritten to be much closer to the
+ specified grammar. In particular, non-ASCII characters and backslash-escapes
+ are now handled correctly.
+* Special characters are protected in the output so that generated XPath
+ exrpessions should always be valid
+* The ``~=``, ``^=`` and ``*=`` attribute operators now correctly never match
+ when used with an empty string.
+
+
Version 0.6.1
-------------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 08b529e..2ea6409 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.6.1'
+VERSION = '0.7'
__version__ = VERSION
From c192fcb38d4a147b83cc46e32cfad4ba800ed180 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 17:33:25 +0200
Subject: [PATCH 009/192] Make pseudo-elements lower-case in the ASCII range.
See http://www.w3.org/TR/selectors/#casesens
Pseudo-classes were already case-insensitive, but the
lower-casing was moved to the parser.
---
cssselect/parser.py | 14 +++++++++++---
cssselect/tests.py | 24 +++++++++++++-----------
cssselect/xpath.py | 4 ++--
3 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index a7d9889..46c1c98 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -25,6 +25,11 @@
_unichr = chr
+def ascii_lower(string):
+ """Lower-case, but only in the ASCII range."""
+ return string.encode('utf8').lower().decode('utf8')
+
+
class SelectorError(Exception):
"""Common parent for :class:`SelectorSyntaxError` and
:class:`ExpressionError`.
@@ -52,6 +57,8 @@ class Selector(object):
"""
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
+ if pseudo_element is not None:
+ pseudo_element = ascii_lower(pseudo_element)
#: The identifier for the pseudo-element as a string, or ``None``.
#:
#: +-------------------------+----------------+----------------+
@@ -114,7 +121,7 @@ class Function(object):
"""
def __init__(self, selector, name, arguments):
self.selector = selector
- self.name = name
+ self.name = ascii_lower(name)
self.arguments = arguments
def __repr__(self):
@@ -137,7 +144,7 @@ class Pseudo(object):
"""
def __init__(self, selector, ident):
self.selector = selector
- self.ident = ident
+ self.ident = ascii_lower(ident)
def __repr__(self):
return '%s[%r:%s]' % (
@@ -393,7 +400,8 @@ def parse_simple_selector(stream, inside_negation=False):
pseudo_element = stream.next_ident()
continue
ident = stream.next_ident()
- if ident in ('first-line', 'first-letter', 'before', 'after'):
+ if ident.lower() in ('first-line', 'first-letter',
+ 'before', 'after'):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element = _unicode(ident)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 42312f9..7170c2a 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -167,17 +167,17 @@ def parse_one(css):
assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
# Special cases for CSS 2.1 pseudo-elements
- assert parse_one(':before') == ('Element[*]', 'before')
- assert parse_one(':after') == ('Element[*]', 'after')
- assert parse_one(':first-line') == ('Element[*]', 'first-line')
- assert parse_one(':first-letter') == ('Element[*]', 'first-letter')
+ assert parse_one(':BEfore') == ('Element[*]', 'before')
+ assert parse_one(':aftER') == ('Element[*]', 'after')
+ assert parse_one(':First-Line') == ('Element[*]', 'first-line')
+ assert parse_one(':First-Letter') == ('Element[*]', 'first-letter')
- assert parse_one('::before') == ('Element[*]', 'before')
- assert parse_one('::after') == ('Element[*]', 'after')
- assert parse_one('::first-line') == ('Element[*]', 'first-line')
- assert parse_one('::first-letter') == ('Element[*]', 'first-letter')
+ assert parse_one('::befoRE') == ('Element[*]', 'before')
+ assert parse_one('::AFter') == ('Element[*]', 'after')
+ assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
+ assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')
- assert parse_one('::selection') == ('Element[*]', 'selection')
+ assert parse_one('::Selection') == ('Element[*]', 'selection')
assert parse_one('foo:after') == ('Element[foo]', 'after')
assert parse_one('foo::selection') == ('Element[foo]', 'selection')
assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == (
@@ -346,13 +346,15 @@ def xpath(css):
"e[last() = 1]")
assert xpath('e:empty') == (
"e[not(*) and not(normalize-space())]")
+ assert xpath('e:EmPTY') == (
+ "e[not(*) and not(normalize-space())]")
assert xpath('e:root') == (
"e[not(parent::*)]")
assert xpath('e:hover') == (
"e[0]") # never matches
assert xpath('e:contains("foo")') == (
"e[contains(string(.), 'foo')]")
- assert xpath('e:contains(foo)') == (
+ assert xpath('e:ConTains(foo)') == (
"e[contains(string(.), 'foo')]")
assert xpath('e.warning') == (
"e[@class and contains("
@@ -361,7 +363,7 @@ def xpath(css):
"e[@id = 'myid']")
assert xpath('e:not(:nth-child(odd))') == (
"e[not((position() -1) mod 2 = 0 and position() >= 1)]")
- assert xpath('e:not(*)') == (
+ assert xpath('e:nOT(*)') == (
"e[0]") # never matches
assert xpath('e f') == (
"e/descendant-or-self::*/f")
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index efdc1e1..8f77c86 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -234,7 +234,7 @@ def xpath_negation(self, negation):
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
- method = 'xpath_%s_function' % function.name.replace('-', '_').lower()
+ method = 'xpath_%s_function' % function.name.replace('-', '_')
method = getattr(self, method, None)
if not method:
raise ExpressionError(
@@ -243,7 +243,7 @@ def xpath_function(self, function):
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
- method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_').lower()
+ method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
method = getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
From 12e04521b5615cb22e9fe5966b2e243c62e319eb Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 23:33:56 +0200
Subject: [PATCH 010/192] Fix lack for operator.methodcaller in Python <2.6
---
cssselect/parser.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 46c1c98..67da69c 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -584,7 +584,12 @@ def _compile(pattern):
_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
# Same as r'\1', but faster on CPython
-_replace_simple = operator.methodcaller('group', 1)
+if hasattr(operator, 'methodcaller'):
+ # Python 2.6+
+ _replace_simple = operator.methodcaller('group', 1)
+else:
+ def _replace_simple(match):
+ return match.group(1)
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
From 06da45755992e1ea80364f254c064e4a6e36af07 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 23:34:41 +0200
Subject: [PATCH 011/192] Fix unicode in tests with Python 2.x
---
cssselect/parser.py | 2 +-
cssselect/tests.py | 13 ++++++-------
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 67da69c..f423f30 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -539,7 +539,7 @@ def __new__(cls, type_, value, pos):
return obj
def __repr__(self):
- return '<%s %r at %i>' % (self.type, self.value, self.pos)
+ return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
def is_delim(self, *values):
return self.type == 'DELIM' and self.value in values
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 7170c2a..eaf165c 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -39,16 +39,15 @@ def u(text):
class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
tokens = [
- repr(item).replace("u'", "'") # Py 2/3
- for item in tokenize(
+ _unicode(item) for item in tokenize(
u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
assert tokens == [
- "",
+ u(""),
"",
"' at 5>",
"",
# the no-break space is not whitespace in CSS
- r"",
+ u(""), # f\xa0
"",
"",
"",
@@ -294,7 +293,7 @@ def get_error(css):
def test_translation(self):
def xpath(css):
- return str(GenericTranslator().css_to_xpath(css, prefix=''))
+ return _unicode(GenericTranslator().css_to_xpath(css, prefix=''))
assert xpath('*') == "*"
assert xpath('e') == "e"
@@ -378,11 +377,11 @@ def xpath(css):
# Invalid characters in XPath element names
assert xpath(r'di\a0 v') == (
- "*[name() = 'di\xa0v']")
+ u("*[name() = 'di v']")) # di\xa0v
assert xpath(r'di\[v') == (
"*[name() = 'di[v']")
assert xpath(r'[h\a0 ref]') == (
- "*[attribute::*[name() = 'h\xa0ref']]")
+ u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref
assert xpath(r'[h\]ref]') == (
"*[attribute::*[name() = 'h]ref']]")
From 4f1fb912e808ca8838eaa6d7d51dbcdef23d651b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 23:36:49 +0200
Subject: [PATCH 012/192] Changelog for 0.7.1
---
CHANGES | 12 ++++++++++++
cssselect/__init__.py | 2 +-
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index 908787b..c278667 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,18 @@
Changelog
=========
+Version 0.7.1
+-------------
+
+Released on 2012-06-14. Code name *remember-to-test-with-tox*.
+
+0.7 broke the parser in Python 2.4 and 2.5; the tests in 2.x.
+Now all is well again.
+
+Also, pseudo-elements are now correctly made lower-case. (They are supposed
+to be case-insensitive.)
+
+
Version 0.7
-----------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 2ea6409..f1c00b0 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.7'
+VERSION = '0.7.1'
__version__ = VERSION
From b0b462453c002c55dfd4cca7dd6ebbb53fc339e7 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 14 Jun 2012 23:40:40 +0200
Subject: [PATCH 013/192] Add a config file for Travis CI.
---
.travis.yml | 13 +++++++++++++
1 file changed, 13 insertions(+)
create mode 100644 .travis.yml
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..487a6ee
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+language: python
+
+python:
+ - "2.5"
+ - "2.6"
+ - "2.7"
+ - "3.1"
+ - "3.2"
+
+install:
+ - pip install --use-mirrors lxml -e .
+
+script: py.test
From 3e5abd8e33db470edc487a518a06f44478b7c9c1 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Fri, 29 Jun 2012 15:13:58 +0200
Subject: [PATCH 014/192] Do the right with non-ASCII pseudo-classes. Fix #14
Make sure that getattr() with a default does not raise an
UnicodeError or a TypeError on either Py2 or 3.
Instead, all non-ASCII pseudo-classes are invalid selectors
(as an inexistant pseudo-class should.)
---
cssselect/tests.py | 1 +
cssselect/xpath.py | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index eaf165c..8fce905 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -385,6 +385,7 @@ def xpath(css):
assert xpath(r'[h\]ref]') == (
"*[attribute::*[name() = 'h]ref']]")
+ self.assertRaises(ExpressionError, xpath, u(':fİrst-child'))
self.assertRaises(ExpressionError, xpath, ':first-of-type')
self.assertRaises(ExpressionError, xpath, ':only-of-type')
self.assertRaises(ExpressionError, xpath, ':last-of-type')
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 8f77c86..e48dc52 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -235,6 +235,8 @@ def xpath_negation(self, negation):
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
method = 'xpath_%s_function' % function.name.replace('-', '_')
+ # getattr() with a non-ASCII name fails on Python 2.x
+ method = method.encode('ascii', 'replace').decode('ascii')
method = getattr(self, method, None)
if not method:
raise ExpressionError(
@@ -244,6 +246,8 @@ def xpath_function(self, function):
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
+ # getattr() with a non-ASCII name fails on Python 2.x
+ method = method.encode('ascii', 'replace').decode('ascii')
method = getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
From 9846271380f1187221d5b404409a54e81a7e84bf Mon Sep 17 00:00:00 2001
From: Simon Potter
Date: Thu, 15 Nov 2012 21:44:32 +1300
Subject: [PATCH 015/192] Use XPath 'lang()' in XML docs.
---
cssselect/tests.py | 38 ++++++++++++++++++++++++++++++++++++++
cssselect/xpath.py | 21 +++++++++++++++------
2 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 8fce905..c89e8ce 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -464,6 +464,30 @@ def series(css):
assert series('foo') == None
assert series('n+') == None
+ def test_lang(self):
+ document = etree.fromstring(XMLLANG_IDS)
+ sort_key = dict(
+ (el, count) for count, el in enumerate(document.getiterator())
+ ).__getitem__
+ css_to_xpath = GenericTranslator().css_to_xpath
+
+ def langid(selector):
+ xpath = css_to_xpath(selector)
+ items = document.xpath(xpath)
+ items.sort(key=sort_key)
+ return [element.get('id', 'nil') for element in items]
+
+ assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth']
+ assert langid(':lang("en-us")') == ['second', 'fourth']
+ assert langid(':lang(en-nz)') == ['third']
+ assert langid(':lang(fr)') == ['fifth']
+ assert langid(':lang(ru)') == ['sixth']
+ assert langid(":lang('ZH')") == ['eighth']
+ assert langid(':lang(de) :lang(zh)') == ['eighth']
+ assert langid(':lang(en), :lang(zh)') == [
+ 'first', 'second', 'third', 'fourth', 'eighth']
+ assert langid(':lang(es)') == []
+
def test_select(self):
document = etree.fromstring(HTML_IDS)
sort_key = dict(
@@ -675,6 +699,20 @@ def count(selector):
assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
+XMLLANG_IDS = '''
+
+ a
+ b
+ c
+ d
+ e
+ f
+
+
+
+
+'''
+
HTML_IDS = '''
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index e48dc52..c67c0e6 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -410,12 +410,7 @@ def xpath_lang_function(self, xpath, function):
% function.arguments)
value = function.arguments[0].value
return xpath.add_condition(
- "ancestor-or-self::*[@lang][1][starts-with(concat("
- # XPath 1.0 has no lower-case function...
- "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
- "'abcdefghijklmnopqrstuvwxyz'), "
- "'-'), %s)]"
- % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+ "lang(%s)" % (self.xpath_literal(value)))
# Pseudo: dispatch by pseudo-class name
@@ -575,6 +570,20 @@ def xpath_checked_pseudo(self, xpath):
"and (name(.) = 'input' or name(.) = 'command')"
"and (@type = 'checkbox' or @type = 'radio'))")
+ def xpath_lang_function(self, xpath, function):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
+ raise ExpressionError(
+ "Expected a single string or ident for :lang(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ "ancestor-or-self::*[@lang][1][starts-with(concat("
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]"
+ % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+
def xpath_link_pseudo(self, xpath):
return xpath.add_condition("@href and "
"(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
From 7712c2a124910bfb9126f64aad4f2f84e0ee6e58 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 15 Nov 2012 09:58:18 +0100
Subject: [PATCH 016/192] Add Simon Potter to authors.
---
AUTHORS | 1 +
1 file changed, 1 insertion(+)
diff --git a/AUTHORS b/AUTHORS
index 8c69e8f..a4ae5f1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,4 +1,5 @@
Ian Bicking
Laurence Rowe
+Simon Potter
Simon Sapin
Stefan Behnel
From ac10a368052b2975384f65541d4473c46769b96e Mon Sep 17 00:00:00 2001
From: Simon Potter
Date: Thu, 15 Nov 2012 23:07:06 +1300
Subject: [PATCH 017/192] Using string-length() to test for emptiness of text
nodes.
Whitespace is *not* empty.
---
cssselect/tests.py | 6 +++---
cssselect/xpath.py | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index c89e8ce..4c64275 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -344,9 +344,9 @@ def xpath(css):
assert xpath('e:only-of-type') == (
"e[last() = 1]")
assert xpath('e:empty') == (
- "e[not(*) and not(normalize-space())]")
+ "e[not(*) and not(string-length())]")
assert xpath('e:EmPTY') == (
- "e[not(*) and not(normalize-space())]")
+ "e[not(*) and not(string-length())]")
assert xpath('e:root') == (
"e[not(parent::*)]")
assert xpath('e:hover') == (
@@ -575,7 +575,7 @@ def pcss(main, *selectors, **kwargs):
assert pcss('p:only-of-type') == ['paragraph']
assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
assert pcss('li:empty') == [
- 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li']
+ 'third-li', 'fourth-li', 'fifth-li', 'sixth-li']
assert pcss(':root', 'html:root') == ['html']
assert pcss('li:root', '* :root') == []
assert pcss('*:contains("link")', ':CONtains("link")') == [
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index c67c0e6..a56b697 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -454,7 +454,7 @@ def xpath_only_of_type_pseudo(self, xpath):
return xpath.add_condition('last() = 1')
def xpath_empty_pseudo(self, xpath):
- return xpath.add_condition("not(*) and not(normalize-space())")
+ return xpath.add_condition("not(*) and not(string-length())")
def pseudo_never_matches(self, xpath):
"""Common implementation for pseudo-classes that never match."""
From 91e752d4994f3d95b89b850b96672f47418623d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gra=C3=B1a?=
Date: Thu, 10 Jan 2013 14:27:46 -0200
Subject: [PATCH 018/192] Let extended translators override what XPathExpr
class is used
GenericTranslator offers an excelent way to support custom selectors
trough method hooks and allowing to return a *new* XPathExpr from this
hooks.
The main problem is that returning extended `XPathExpr` instances fail
for combiners because `XPathExpr.join()` assume a fixed XPathExpr
instance attributes (element, path and condition) to copy from `other` to `self`
`XPathExpr.join()` can be extended in subclass but needs that `left`
xpath instance to be of the extended class too, and right now we can
only control `right` xpath type.
The problem can be mitigated by recasting all xpath returned from
`GenericTranslator.xpath_element()` that only works because it is the
only hook that cast `XPathExpr` instances.
The proposed change allow projects extending GenericTranslator to also
safely extend `XPathExpr` to correctly support combiners in extended
features.
---
cssselect/xpath.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index a56b697..4b74997 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -146,6 +146,9 @@ class GenericTranslator(object):
lower_case_attribute_names = False
lower_case_attribute_values = False
+ # class used to represent and xpath expression
+ xpathexpr_cls = XPathExpr
+
def css_to_xpath(self, css, prefix='descendant-or-self::'):
"""Translate a *group of selectors* to XPath.
@@ -190,7 +193,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
if not tree:
raise TypeError('Expected a parsed selector, got %r' % (selector,))
xpath = self.xpath(tree)
- assert isinstance(xpath, XPathExpr) # help debug a missing 'return'
+ assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
return (prefix or '') + _unicode(xpath)
@staticmethod
@@ -305,7 +308,7 @@ def xpath_element(self, selector):
# http://www.w3.org/TR/css3-namespace/#prefixes
element = '%s:%s' % (selector.namespace, element)
safe = safe and is_safe_name(selector.namespace)
- xpath = XPathExpr(element=element)
+ xpath = self.xpathexpr_cls(element=element)
if not safe:
xpath.add_name_test()
return xpath
From eac05a4743a52a4e09de181b3db362bb21daa672 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 10 Jan 2013 19:07:24 +0100
Subject: [PATCH 019/192] =?UTF-8?q?HERE=C2=A0BE=C2=A0DRAGONS?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
cssselect/xpath.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 4b74997..69e636d 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -102,6 +102,19 @@ class GenericTranslator(object):
of element names and attribute names.
"""
+
+ ####
+ #### HERE BE DRAGONS
+ ####
+ #### You are welcome to hook into this to change some behavior,
+ #### but do so at your own risks.
+ #### Until is has recieved a lot more work and review,
+ #### I reserve the right to change this API in backward-incompatible ways
+ #### with any minor version of cssselect.
+ #### See https://github.com/SimonSapin/cssselect/pull/22
+ #### -- Simon Sapin.
+ ####
+
combinator_mapping = {
' ': 'descendant',
'>': 'child',
From 1b95a44cd990abe3682b8c2ec250478d3673fd97 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Fri, 15 Mar 2013 16:53:28 +0100
Subject: [PATCH 020/192] Changelog for 0.8
---
CHANGES | 24 ++++++++++++++++++++++++
cssselect/__init__.py | 2 +-
tox.ini | 2 +-
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/CHANGES b/CHANGES
index c278667..fdc1615 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,30 @@
Changelog
=========
+Version 0.8
+-----------
+
+Released on 2013-03-15.
+
+Improvements:
+
+* `#22 `_
+ Let extended translators override what XPathExpr class is used
+* `#19 `_
+ Use the built-in ``lang()`` XPath function
+ for implementing the ``:lang()`` pseudo-class
+ with XML documents.
+ This is probably faster than ``ancestor-or-self::``.
+
+Bug fixes:
+
+* `#14 `_
+ Fix non-ASCII pseudo-classes. (Invalid selector instead of crash.)
+* `#20 `_
+ As per the spec, elements containing only whitespace are not considered empty
+ for the ``:empty`` pseudo-class.
+
+
Version 0.7.1
-------------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index f1c00b0..fd341ab 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.7.1'
+VERSION = '0.8'
__version__ = VERSION
diff --git a/tox.ini b/tox.ini
index 9a552c2..ad83007 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py24,py25,py26,py27,py31,py32
+envlist = py24,py25,py26,py27,py31,py32,py33
[testenv]
deps=lxml
From e6de035685c666eda774deda8db4b69a43cca64d Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Sun, 15 Sep 2013 11:10:42 +0200
Subject: [PATCH 021/192] Travis-CI config: remove Python 3.1, add Python 3.3
Python 3.1 not supported anymore in Travis
http://about.travis-ci.org/docs/user/ci-environment/#Python-VM-images
---
.travis.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.travis.yml b/.travis.yml
index 487a6ee..4c5fdf7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,8 +4,8 @@ python:
- "2.5"
- "2.6"
- "2.7"
- - "3.1"
- "3.2"
+ - "3.3"
install:
- pip install --use-mirrors lxml -e .
From 9fff95b04e89d2afa12cd1c73a0bc7da475ccff2 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Sun, 15 Sep 2013 11:02:09 +0200
Subject: [PATCH 022/192] Remove string() conversion of element in "contains"
function translation
---
cssselect/tests.py | 4 ++--
cssselect/xpath.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 4c64275..851cc85 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -352,9 +352,9 @@ def xpath(css):
assert xpath('e:hover') == (
"e[0]") # never matches
assert xpath('e:contains("foo")') == (
- "e[contains(string(.), 'foo')]")
+ "e[contains(., 'foo')]")
assert xpath('e:ConTains(foo)') == (
- "e[contains(string(.), 'foo')]")
+ "e[contains(., 'foo')]")
assert xpath('e.warning') == (
"e[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' warning ')]")
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 69e636d..4821099 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -417,7 +417,7 @@ def xpath_contains_function(self, xpath, function):
% function.arguments)
value = function.arguments[0].value
return xpath.add_condition(
- 'contains(string(.), %s)' % self.xpath_literal(value))
+ 'contains(., %s)' % self.xpath_literal(value))
def xpath_lang_function(self, xpath, function):
if function.argument_types() not in (['STRING'], ['IDENT']):
From ecda4b9a1d55102f09eb27606b8ff04f6b4a2854 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Sun, 15 Sep 2013 10:32:27 +0200
Subject: [PATCH 023/192] Nicer exception on unknown node type in the parsed
tree
---
cssselect/xpath.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 69e636d..4b42614 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -226,7 +226,9 @@ def xpath_literal(s):
def xpath(self, parsed_selector):
"""Translate any parsed selector object."""
type_name = type(parsed_selector).__name__
- method = getattr(self, 'xpath_%s' % type_name.lower())
+ method = getattr(self, 'xpath_%s' % type_name.lower(), None)
+ if method is None:
+ raise ExpressionError('%s is not supported.' % type_name)
return method(parsed_selector)
From 039a844bceb1584306d28647b4bf6170a324b69b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Sun, 15 Sep 2013 22:58:45 +0200
Subject: [PATCH 024/192] Add Paul Tremberth to authors.
---
AUTHORS | 1 +
1 file changed, 1 insertion(+)
diff --git a/AUTHORS b/AUTHORS
index a4ae5f1..43be02e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,5 +1,6 @@
Ian Bicking
Laurence Rowe
+Paul Tremberth
Simon Potter
Simon Sapin
Stefan Behnel
From a4b12ae07c1d7ef71b7aae15034b259d209d7960 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Sun, 15 Sep 2013 23:54:54 +0200
Subject: [PATCH 025/192] Add parser support for functional pseudo-elements.
See #29.
---
cssselect/parser.py | 63 +++++++++++++++++++++++++++----------
cssselect/tests.py | 76 +++++++++++++++++++++++++++++++++++++++++++--
cssselect/xpath.py | 34 +++++++++++++-------
3 files changed, 141 insertions(+), 32 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index f423f30..217ecd5 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -57,7 +57,8 @@ class Selector(object):
"""
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
- if pseudo_element is not None:
+ if pseudo_element is not None and not isinstance(
+ pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: The identifier for the pseudo-element as a string, or ``None``.
#:
@@ -78,6 +79,8 @@ def __init__(self, tree, pseudo_element=None):
self.pseudo_element = pseudo_element
def __repr__(self):
+ if isinstance(self.pseudo_element, FunctionalPseudoElement):
+ pseudo_element = repr(self.pseudo_element)
if self.pseudo_element:
pseudo_element = '::%s' % self.pseudo_element
else:
@@ -115,6 +118,28 @@ def specificity(self):
return a, b, c
+class FunctionalPseudoElement(object):
+ """
+ Represents selector::name(expr)
+ """
+ def __init__(self, name, arguments):
+ self.name = ascii_lower(name)
+ self.arguments = arguments
+
+ def __repr__(self):
+ return '%s[::%s(%r)]' % (
+ self.__class__.__name__, self.name,
+ [token.value for token in self.arguments])
+
+ def argument_types(self):
+ return [token.type for token in self.arguments]
+
+ def specificity(self):
+ a, b, c = self.selector.specificity()
+ b += 1
+ return a, b, c
+
+
class Function(object):
"""
Represents selector:name(expr)
@@ -398,6 +423,10 @@ def parse_simple_selector(stream, inside_negation=False):
if stream.peek() == ('DELIM', ':'):
stream.next()
pseudo_element = stream.next_ident()
+ if stream.peek() == ('DELIM', '('):
+ stream.next()
+ pseudo_element = FunctionalPseudoElement(
+ pseudo_element, parse_arguments(stream))
continue
ident = stream.next_ident()
if ident.lower() in ('first-line', 'first-letter',
@@ -425,22 +454,7 @@ def parse_simple_selector(stream, inside_negation=False):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
else:
- arguments = []
- while 1:
- stream.skip_whitespace()
- next = stream.next()
- if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
- ('DELIM', '+'), ('DELIM', '-')]:
- arguments.append(next)
- elif next == ('DELIM', ')'):
- break
- else:
- raise SelectorSyntaxError(
- "Expected an argument, got %s" % (next,))
- if not arguments:
- raise SelectorSyntaxError(
- "Expected at least one argument, got %s" % (next,))
- result = Function(result, ident, arguments)
+ result = Function(result, ident, parse_arguments(stream))
else:
raise SelectorSyntaxError(
"Expected selector, got %s" % (peek,))
@@ -450,6 +464,21 @@ def parse_simple_selector(stream, inside_negation=False):
return result, pseudo_element
+def parse_arguments(stream):
+ arguments = []
+ while 1:
+ stream.skip_whitespace()
+ next = stream.next()
+ if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
+ ('DELIM', '+'), ('DELIM', '-')]:
+ arguments.append(next)
+ elif next == ('DELIM', ')'):
+ return arguments
+ else:
+ raise SelectorSyntaxError(
+ "Expected an argument, got %s" % (next,))
+
+
def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 851cc85..7665733 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -23,7 +23,9 @@
from lxml import etree, html
from cssselect import (parse, GenericTranslator, HTMLTranslator,
SelectorSyntaxError, ExpressionError)
-from cssselect.parser import tokenize, parse_series, _unicode
+from cssselect.parser import (tokenize, parse_series, _unicode,
+ FunctionalPseudoElement)
+from cssselect.xpath import _unicode_safe_getattr, XPathExpr
if sys.version_info[0] < 3:
@@ -150,6 +152,7 @@ def parse_pseudo(css):
result = []
for selector in parse(css):
pseudo = selector.pseudo_element
+ pseudo = _unicode(pseudo) if pseudo else pseudo
# No Symbol here
assert pseudo is None or type(pseudo) is _unicode
selector = repr(selector.parsed_tree).replace("(u'", "('")
@@ -176,6 +179,10 @@ def parse_one(css):
assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')
+ assert parse_one('::text-content') == ('Element[*]', 'text-content')
+ assert parse_one('::attr(name)') == (
+ "Element[*]", "FunctionalPseudoElement[::attr(['name'])]")
+
assert parse_one('::Selection') == ('Element[*]', 'selection')
assert parse_one('foo:after') == ('Element[foo]', 'after')
assert parse_one('foo::selection') == ('Element[foo]', 'selection')
@@ -264,8 +271,6 @@ def get_error(css):
"Expected ident or '*', got ")
assert get_error('[foo=#]') == (
"Expected string or ident, got ")
- assert get_error(':nth-child()') == (
- "Expected at least one argument, got ")
assert get_error('[href]a') == (
"Expected selector, got ")
assert get_error('[rel=stylesheet]') == None
@@ -436,6 +441,71 @@ def test_unicode_escapes(self):
assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == (
'''descendant-or-self::*[@aval = "' '"]''')
+ def test_xpath_pseudo_elements(self):
+ class CustomTranslator(GenericTranslator):
+ def xpath_pseudo_element(self, xpath, pseudo_element):
+ if isinstance(pseudo_element, FunctionalPseudoElement):
+ method = 'xpath_%s_functional_pseudo_element' % (
+ pseudo_element.name.replace('-', '_'))
+ method = _unicode_safe_getattr(self, method, None)
+ if not method:
+ raise ExpressionError(
+ "The functional pseudo-element ::%s() is unknown"
+ % functional.name)
+ xpath = method(xpath, pseudo_element.arguments)
+ else:
+ method = 'xpath_%s_simple_pseudo_element' % (
+ pseudo_element.replace('-', '_'))
+ method = _unicode_safe_getattr(self, method, None)
+ if not method:
+ raise ExpressionError(
+ "The pseudo-element ::%s is unknown"
+ % pseudo_element)
+ xpath = method(xpath)
+ return xpath
+
+ # functional pseudo-class:
+ # elements that have a certain number of attributes
+ def xpath_nb_attr_function(self, xpath, function):
+ nb_attributes = int(function.arguments[0].value)
+ return xpath.add_condition(
+ "count(@*)=%d" % nb_attributes)
+
+ # pseudo-class:
+ # elements that have 5 attributes
+ def xpath_five_attributes_pseudo(self, xpath):
+ return xpath.add_condition("count(@*)=5")
+
+ # functional pseudo-element:
+ # element's attribute by name
+ def xpath_attr_functional_pseudo_element(self, xpath, arguments):
+ attribute_name = arguments[0].value
+ other = XPathExpr('@%s' % attribute_name, '', )
+ return xpath.join('/', other)
+
+ # pseudo-element:
+ # element's text() nodes
+ def xpath_text_node_simple_pseudo_element(self, xpath):
+ other = XPathExpr('text()', '', )
+ return xpath.join('/', other)
+
+ # pseudo-element:
+ # element's href attribute
+ def xpath_attr_href_simple_pseudo_element(self, xpath):
+ other = XPathExpr('@href', '', )
+ return xpath.join('/', other)
+
+ def xpath(css):
+ return _unicode(CustomTranslator().css_to_xpath(css))
+
+ assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]"
+ assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]"
+ assert xpath('::attr(href)') == "descendant-or-self::*/@href"
+ assert xpath('::text-node') == "descendant-or-self::*/text()"
+ assert xpath('::attr-href') == "descendant-or-self::*/@href"
+ assert xpath('p img::attr(src)') == (
+ "descendant-or-self::p/descendant-or-self::*/img/@src")
+
def test_series(self):
def series(css):
selector, = parse(':nth-child(%s)' % css)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 3e742bf..e37a742 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -26,6 +26,12 @@
_unicode = str
+def _unicode_safe_getattr(obj, name, default=None):
+ # getattr() with a non-ASCII name fails on Python 2.x
+ name = name.encode('ascii', 'replace').decode('ascii')
+ return getattr(obj, name, default)
+
+
class ExpressionError(SelectorError, RuntimeError):
"""Unknown or unsupported selector (eg. pseudo-class)."""
@@ -178,14 +184,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
- selectors = parse(css)
- for selector in selectors:
- if selector.pseudo_element:
- raise ExpressionError('Pseudo-elements are not supported.')
-
return ' | '.join(
self.selector_to_xpath(selector, prefix)
- for selector in selectors)
+ for selector in parse(css))
def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
"""Translate a parsed selector to XPath.
@@ -207,8 +208,21 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
raise TypeError('Expected a parsed selector, got %r' % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
+ if selector.pseudo_element:
+ xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
return (prefix or '') + _unicode(xpath)
+ def xpath_pseudo_element(self, xpath, pseudo_element):
+ """Translate a pseudo-element.
+
+ Defaults to not supporting pseudo-elements at all,
+ but can be overridden by sub-classes.
+
+ """
+ if pseudo_element:
+ raise ExpressionError('Pseudo-elements are not supported.')
+ return xpath
+
@staticmethod
def xpath_literal(s):
s = _unicode(s)
@@ -253,9 +267,7 @@ def xpath_negation(self, negation):
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
method = 'xpath_%s_function' % function.name.replace('-', '_')
- # getattr() with a non-ASCII name fails on Python 2.x
- method = method.encode('ascii', 'replace').decode('ascii')
- method = getattr(self, method, None)
+ method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
"The pseudo-class :%s() is unknown" % function.name)
@@ -264,9 +276,7 @@ def xpath_function(self, function):
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
- # getattr() with a non-ASCII name fails on Python 2.x
- method = method.encode('ascii', 'replace').decode('ascii')
- method = getattr(self, method, None)
+ method = _unicode_safe_getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
raise ExpressionError(
From f8a89bfae5f76499aa8795fe97b7fff8841ed729 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Sun, 15 Sep 2013 23:58:21 +0200
Subject: [PATCH 026/192] Document functional pseudo-elements.
---
cssselect/parser.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 217ecd5..1f9e7df 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -60,7 +60,9 @@ def __init__(self, tree, pseudo_element=None):
if pseudo_element is not None and not isinstance(
pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
- #: The identifier for the pseudo-element as a string, or ``None``.
+ #: A :class:`FunctionalPseudoElement`,
+ #: or the identifier for the pseudo-element as a string,
+ # or ``None``.
#:
#: +-------------------------+----------------+----------------+
#: | | Selector | Pseudo-element |
From e1a0f0def44aff9c4769cbd2fe924e80e2c45c1b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Mon, 16 Sep 2013 00:01:48 +0200
Subject: [PATCH 027/192] Remove unnecessary check.
---
cssselect/xpath.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index e37a742..a5d3b9b 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -219,9 +219,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element):
but can be overridden by sub-classes.
"""
- if pseudo_element:
- raise ExpressionError('Pseudo-elements are not supported.')
- return xpath
+ raise ExpressionError('Pseudo-elements are not supported.')
@staticmethod
def xpath_literal(s):
From d7e78ee9359407bdc96417d6ecf730f7af02889b Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Mon, 16 Sep 2013 13:36:04 +0200
Subject: [PATCH 028/192] Fix exception message in functional pseudo-element
test example
---
cssselect/tests.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 7665733..e7c0193 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -451,7 +451,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element):
if not method:
raise ExpressionError(
"The functional pseudo-element ::%s() is unknown"
- % functional.name)
+ % pseudo_element.name)
xpath = method(xpath, pseudo_element.arguments)
else:
method = 'xpath_%s_simple_pseudo_element' % (
From 06ca3147aedd9d4b1d01aac9e2b1de627dbe1827 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Fri, 11 Oct 2013 16:16:29 +0100
Subject: [PATCH 029/192] Document FunctionalPseudoElement.
---
cssselect/__init__.py | 4 ++--
cssselect/parser.py | 41 ++++++++++++++++++++++++++++-------------
docs/index.rst | 2 ++
3 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index fd341ab..bb0d59d 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -13,8 +13,8 @@
"""
-from cssselect.parser import (parse, Selector, SelectorError,
- SelectorSyntaxError)
+from cssselect.parser import (parse, Selector, FunctionalPseudoElement,
+ SelectorError, SelectorSyntaxError)
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 1f9e7df..f2b32b7 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -64,18 +64,20 @@ def __init__(self, tree, pseudo_element=None):
#: or the identifier for the pseudo-element as a string,
# or ``None``.
#:
- #: +-------------------------+----------------+----------------+
- #: | | Selector | Pseudo-element |
- #: +=========================+================+================+
- #: | CSS3 syntax | ``a::before`` | ``'before'`` |
- #: +-------------------------+----------------+----------------+
- #: | Older syntax | ``a:before`` | ``'before'`` |
- #: +-------------------------+----------------+----------------+
- #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
- #: | not in Selectors3 | | |
- #: +-------------------------+----------------+----------------+
- #: | Invalid pseudo-class | ``li:marker`` | ``None`` |
- #: +-------------------------+----------------+----------------+
+ #: +-------------------------+----------------+--------------------------------+
+ #: | | Selector | Pseudo-element |
+ #: +=========================+================+================================+
+ #: | CSS3 syntax | ``a::before`` | ``'before'`` |
+ #: +-------------------------+----------------+--------------------------------+
+ #: | Older syntax | ``a:before`` | ``'before'`` |
+ #: +-------------------------+----------------+--------------------------------+
+ #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
+ #: | not in Selectors3 | | |
+ #: +-------------------------+----------------+--------------------------------+
+ #: | Invalid pseudo-class | ``li:marker`` | ``None`` |
+ #: +-------------------------+----------------+--------------------------------+
+ #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
+ #: +-------------------------+----------------+--------------------------------+
#:
#: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
self.pseudo_element = pseudo_element
@@ -122,7 +124,20 @@ def specificity(self):
class FunctionalPseudoElement(object):
"""
- Represents selector::name(expr)
+ Represents selector::name(arguments)
+
+ .. attribute:: name
+
+ The name (identifier) of the pseudo-element, as a string.
+
+ .. attribute:: arguments
+
+ The arguments of the pseudo-element, as a list of tokens.
+
+ **Note:** tokens are not part of the public API,
+ and may change between cssselect versions.
+ Use at your own risks.
+
"""
def __init__(self, name, arguments):
self.name = ascii_lower(name)
diff --git a/docs/index.rst b/docs/index.rst
index 9aec19e..4ac7401 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -54,6 +54,8 @@ is a group of two selectors.
.. autoclass:: Selector()
:members:
+.. autoclass:: FunctionalPseudoElement
+
.. autoclass:: GenericTranslator
:members: css_to_xpath, selector_to_xpath
From 12c00f5a14f3e68064bd15af15466fa8a27e0026 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Fri, 11 Oct 2013 16:17:00 +0100
Subject: [PATCH 030/192] Changelog for 0.9
Releasing on PyPI fixes #33.
---
CHANGES | 8 ++++++++
cssselect/__init__.py | 2 +-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index fdc1615..5527a2e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,14 @@
Changelog
=========
+Version 0.9
+-----------
+
+Released on 2013-10-11.
+
+Add parser support for :attr:`functional pseudo-elements `.
+
+
Version 0.8
-----------
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index bb0d59d..1d0438b 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.8'
+VERSION = '0.9'
__version__ = VERSION
From 4230c8d210c6ff30e27869a75bc39524f8d4e246 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov
Date: Tue, 15 Oct 2013 17:15:30 +0600
Subject: [PATCH 031/192] Fix tox.ini for Python 2.5
See https://bitbucket.org/hpk42/tox/issue/117/tox-160-breaks-when-running-tests-under
---
tox.ini | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tox.ini b/tox.ini
index ad83007..dde0507 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,3 +4,7 @@ envlist = py24,py25,py26,py27,py31,py32,py33
[testenv]
deps=lxml
commands = python cssselect/tests.py
+
+[testenv:py25]
+setenv =
+ PIP_INSECURE = 1
From c4adf6424d243b5141bc62f920ff078fead4939e Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Tue, 15 Oct 2013 15:56:53 +0100
Subject: [PATCH 032/192] Drop official support for Python 2.4 and 3.1.
Fix #35
Fix #36
---
CHANGES | 10 ++++++++++
README.rst | 2 +-
setup.py | 2 --
tox.ini | 2 +-
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/CHANGES b/CHANGES
index 5527a2e..d8aadeb 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,16 @@
Changelog
=========
+Version 0.10
+------------
+
+Not released yet.
+
+Drop official support for Python 2.4 and 3.1, as testing was becoming difficult.
+Nothing will break overnight, but future releases may on may not work on these versions.
+Older releases will remain available on PyPI.
+
+
Version 0.9
-----------
diff --git a/README.rst b/README.rst
index fa53a5b..f523c7f 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.4+ and 3.x
+* Compatible with Python 2.5+ and 3.2+
* Latest documentation `on python.org `_
* Source, issues and pull requests `on Github
`_
diff --git a/setup.py b/setup.py
index 4f9b076..bd1e385 100644
--- a/setup.py
+++ b/setup.py
@@ -34,12 +34,10 @@
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.4',
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.1',
'Programming Language :: Python :: 3.2',
],
**extra_kwargs
diff --git a/tox.ini b/tox.ini
index dde0507..ca053d8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py24,py25,py26,py27,py31,py32,py33
+envlist = py25,py26,py27,py32,py33
[testenv]
deps=lxml
From efc1f7c2485eba5355c0e3c2662a7ba9a5f39c93 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Tue, 15 Oct 2013 15:58:39 +0100
Subject: [PATCH 033/192] Ack some contributors.
---
AUTHORS | 3 +++
1 file changed, 3 insertions(+)
diff --git a/AUTHORS b/AUTHORS
index 43be02e..bf826b9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,6 +1,9 @@
+Daniel Graña
Ian Bicking
Laurence Rowe
+Mikhail Korobov
Paul Tremberth
Simon Potter
Simon Sapin
Stefan Behnel
+Varialus
From d29ac49aa67fa24ada2ffeacb9ccc6e3d56e0c27 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 17 Oct 2013 14:28:02 +0100
Subject: [PATCH 034/192] Switch back to default to ignoring pseudo-elements
... rather than rejecting them.
Fix Kozea/WeasyPrint#128
---
cssselect/tests.py | 5 +++++
cssselect/xpath.py | 20 ++++++++++++--------
2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index e7c0193..8b69740 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -380,6 +380,11 @@ def xpath(css):
assert xpath('div#container p') == (
"div[@id = 'container']/descendant-or-self::*/p")
+ selector, = parse('e:after')
+ assert selector.pseudo_element == 'after'
+ # Pseudo-element is ignored:
+ assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e"
+
# Invalid characters in XPath element names
assert xpath(r'di\a0 v') == (
u("*[name() = 'di v']")) # di\xa0v
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index a5d3b9b..8d8b1d3 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -184,19 +184,23 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
- return ' | '.join(
- self.selector_to_xpath(selector, prefix)
- for selector in parse(css))
+ return ' | '.join(self.selector_to_xpath(selector, prefix,
+ translate_pseudo_elements=True)
+ for selector in parse(css))
- def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
+ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
+ translate_pseudo_elements=False):
"""Translate a parsed selector to XPath.
- The :attr:`~Selector.pseudo_element` attribute of the selector
- is ignored. It is the caller's responsibility to reject selectors
- with pseudo-elements, or to account for them somehow.
:param selector:
A parsed :class:`Selector` object.
+ :param translate_pseudo_elements:
+ Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
+ the :attr:`~Selector.pseudo_element` attribute of the selector
+ is ignored.
+ It is the caller's responsibility to reject selectors
+ with pseudo-elements, or to account for them somehow.
:raises:
:class:`ExpressionError` on unknown/unsupported selectors.
:returns:
@@ -208,7 +212,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
raise TypeError('Expected a parsed selector, got %r' % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
- if selector.pseudo_element:
+ if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
return (prefix or '') + _unicode(xpath)
From 070cc0dfb266f96d9cfa5b75f13949f38fa7b7e3 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 17 Oct 2013 14:38:45 +0100
Subject: [PATCH 035/192] Document the 'prefix' parameter.
---
cssselect/xpath.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 8d8b1d3..e5e74b9 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -176,6 +176,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
:param css:
A *group of selectors* as an Unicode string.
+ :param prefix:
+ This string is prepended to the XPath expression for each selector.
+ The default makes selectors scoped to the context node’s subtree.
:raises:
:class:`SelectorSyntaxError` on invalid selectors,
:class:`ExpressionError` on unknown/unsupported selectors,
@@ -195,6 +198,9 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
:param selector:
A parsed :class:`Selector` object.
+ :param prefix:
+ This string is prepended to the resulting XPath expression.
+ The default makes selectors scoped to the context node’s subtree.
:param translate_pseudo_elements:
Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
the :attr:`~Selector.pseudo_element` attribute of the selector
From 2bec9474eca74c4e1ca62a5ba1ca3bf781eda482 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 17 Oct 2013 14:38:59 +0100
Subject: [PATCH 036/192] Changelog for 0.9.1
---
CHANGES | 27 ++++++++++++++++++++-------
cssselect/__init__.py | 2 +-
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/CHANGES b/CHANGES
index d8aadeb..edbbaca 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,14 +1,21 @@
Changelog
=========
-Version 0.10
-------------
+Version 0.9.1
+-------------
-Not released yet.
+Released on 2013-10-17.
-Drop official support for Python 2.4 and 3.1, as testing was becoming difficult.
-Nothing will break overnight, but future releases may on may not work on these versions.
-Older releases will remain available on PyPI.
+* **Backward incompatible change from 0.9**:
+ :meth:`~GenericTranslator.selector_to_xpath` defaults to
+ ignoring pseudo-elements,
+ as it did in 0.8 and previous versions.
+ (:meth:`~GenericTranslator.css_to_xpath` doesn’t change.)
+* Drop official support for Python 2.4 and 3.1,
+ as testing was becoming difficult.
+ Nothing will break overnight,
+ but future releases may on may not work on these versions.
+ Older releases will remain available on PyPI.
Version 0.9
@@ -16,7 +23,13 @@ Version 0.9
Released on 2013-10-11.
-Add parser support for :attr:`functional pseudo-elements `.
+Add parser support for :attr:`functional
+pseudo-elements `.
+
+*Update:*
+This version accidentally introduced a **backward incompatible** change:
+:meth:`~GenericTranslator.selector_to_xpath` defaults to
+rejecting pseudo-elements instead of ignoring them.
Version 0.8
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 1d0438b..871f1b2 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.9'
+VERSION = '0.9.1'
__version__ = VERSION
From 2db1cd30a2e8ff9e6f53963b2cc4f98a12ba3a2b Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 17 Oct 2013 17:54:15 +0100
Subject: [PATCH 037/192] Fix #39: Selector.__repr__ with functional
pseudo-elements
---
cssselect/parser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index f2b32b7..8426ab0 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -85,7 +85,7 @@ def __init__(self, tree, pseudo_element=None):
def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
- if self.pseudo_element:
+ elif self.pseudo_element:
pseudo_element = '::%s' % self.pseudo_element
else:
pseudo_element = ''
From fd5944a9490f50c94385d6c949fc1d6f39070d18 Mon Sep 17 00:00:00 2001
From: Simon Sapin
Date: Thu, 17 Oct 2013 17:58:28 +0100
Subject: [PATCH 038/192] Fix #40: broken reporting on selector syntax error
---
cssselect/parser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 8426ab0..d71fdda 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -684,7 +684,7 @@ def tokenize(s):
if end_pos == len_s:
raise SelectorSyntaxError('Unclosed string at %s' % pos)
if s[end_pos] != quote:
- raise SelectorSyntaxError('Invalid string at %s' % next_pos)
+ raise SelectorSyntaxError('Invalid string at %s' % pos)
value = _sub_simple_escape(_replace_simple,
_sub_unicode_escape(_replace_unicode,
_sub_newline_escape('', match.group())))
From 38e2edb4a2bebbce1972b24d9cabaedfa14459b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gra=C3=B1a?=
Date: Thu, 17 Oct 2013 15:21:59 -0200
Subject: [PATCH 039/192] Improve selector_to_xpath tests on pseudo-elements
---
cssselect/tests.py | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/cssselect/tests.py b/cssselect/tests.py
index 8b69740..a1fdc9e 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -196,6 +196,21 @@ def parse_one(css):
('Element[bar]', None),
('Element[baz]', 'after')]
+ # Special cases for CSS 2.1 pseudo-elements are ignored by default
+ for pseudo in ('after', 'before', 'first-line', 'first-letter'):
+ selector, = parse('e:%s' % pseudo)
+ assert selector.pseudo_element == pseudo
+ assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e"
+
+ # Pseudo Elements are ignored by default, but if allowed they are not
+ # supported by GenericTranslator
+ tr = GenericTranslator()
+ selector, = parse('e::foo')
+ assert selector.pseudo_element == 'foo'
+ assert tr.selector_to_xpath(selector, prefix='') == "e"
+ self.assertRaises(ExpressionError, tr.selector_to_xpath, selector,
+ translate_pseudo_elements=True)
+
def test_specificity(self):
def specificity(css):
selectors = parse(css)
@@ -380,11 +395,6 @@ def xpath(css):
assert xpath('div#container p') == (
"div[@id = 'container']/descendant-or-self::*/p")
- selector, = parse('e:after')
- assert selector.pseudo_element == 'after'
- # Pseudo-element is ignored:
- assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e"
-
# Invalid characters in XPath element names
assert xpath(r'di\a0 v') == (
u("*[name() = 'di v']")) # di\xa0v
From 9c9aa138b874eec92808fbe4018322bd4c127367 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 25 Jul 2014 10:02:05 +0200
Subject: [PATCH 040/192] Remove Python 2.5 as Travis CI Py2.5 support ended
See http://blog.travis-ci.com/2013-11-18-upcoming-build-environment-updates/
and https://github.com/travis-ci/travis-ci/issues/1668#issuecomment-29151484
Current supported versions:
- 2.6
- 2.7
- 3.2
- 3.3
- 3.4
- pypy
(Source: http://docs.travis-ci.com/user/ci-environment/#Python-VM-images)
---
.travis.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/.travis.yml b/.travis.yml
index 4c5fdf7..93ad08a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,6 @@
language: python
python:
- - "2.5"
- "2.6"
- "2.7"
- "3.2"
From 4112258c92e594fc71f4749954aef6d868e30b24 Mon Sep 17 00:00:00 2001
From: James Salter
Date: Mon, 26 Jan 2015 13:57:30 +0000
Subject: [PATCH 041/192] change coding: utf8 to utf-8
---
cssselect/__init__.py | 2 +-
cssselect/parser.py | 2 +-
cssselect/tests.py | 2 +-
cssselect/xpath.py | 2 +-
setup.py | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 871f1b2..544a058 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -1,4 +1,4 @@
-# coding: utf8
+# coding: utf-8
"""
CSS Selectors based on XPath
============================
diff --git a/cssselect/parser.py b/cssselect/parser.py
index d71fdda..1383c8c 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -1,4 +1,4 @@
-# coding: utf8
+# coding: utf-8
"""
cssselect.parser
================
diff --git a/cssselect/tests.py b/cssselect/tests.py
index a1fdc9e..ec77c6d 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# coding: utf8
+# coding: utf-8
"""
Tests for cssselect
===================
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index e5e74b9..1f2bdd5 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -1,4 +1,4 @@
-# coding: utf8
+# coding: utf-8
"""
cssselect.xpath
===============
diff --git a/setup.py b/setup.py
index bd1e385..42bde1f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# coding: utf8
+# coding: utf-8
import re
import os.path
From 26e321fa1746f3ad91e1e578e6790566eed68e54 Mon Sep 17 00:00:00 2001
From: Nik Nyby
Date: Fri, 19 Jun 2015 00:35:26 -0400
Subject: [PATCH 042/192] Add python 3.4
---
.travis.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.travis.yml b/.travis.yml
index 93ad08a..ba56d9a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ python:
- "2.7"
- "3.2"
- "3.3"
+ - "3.4"
install:
- pip install --use-mirrors lxml -e .
From 5aceab8a2d35e2df4f6277586bceab5cbc0edf16 Mon Sep 17 00:00:00 2001
From: Thomas Grainger
Date: Fri, 2 Oct 2015 00:53:35 +0100
Subject: [PATCH 043/192] Support universal wheels
---
setup.cfg | 3 +++
1 file changed, 3 insertions(+)
diff --git a/setup.cfg b/setup.cfg
index ccddf11..7a3317f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,3 +8,6 @@ upload-dir = docs/_build/html
[pytest]
python_files=tests.py
+
+[bdist_wheel]
+universal = 1
From 4234fa7ffe55b323a68e82b41328e90028d45b5f Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 15:58:22 +0200
Subject: [PATCH 044/192] Amend source code encoding
Following
https://docs.python.org/3/tutorial/interpreter.html#source-code-encoding
https://docs.python.org/2/tutorial/interpreter.html#source-code-encoding
---
cssselect/__init__.py | 2 +-
cssselect/parser.py | 2 +-
cssselect/tests.py | 2 +-
cssselect/xpath.py | 4 ++--
setup.py | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 544a058..d31e164 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -1,4 +1,4 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
"""
CSS Selectors based on XPath
============================
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 1383c8c..d155252 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -1,4 +1,4 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
"""
cssselect.parser
================
diff --git a/cssselect/tests.py b/cssselect/tests.py
index ec77c6d..567e3c5 100755
--- a/cssselect/tests.py
+++ b/cssselect/tests.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# coding: utf-8
+# -*- coding: utf-8 -*-
"""
Tests for cssselect
===================
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 1f2bdd5..7e34f7f 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -1,4 +1,4 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
"""
cssselect.xpath
===============
@@ -108,7 +108,7 @@ class GenericTranslator(object):
of element names and attribute names.
"""
-
+
####
#### HERE BE DRAGONS
####
diff --git a/setup.py b/setup.py
index 42bde1f..208eef6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
import re
import os.path
From 71e2bd767915faa44b39654c230ebd1c9aabc4fe Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 16:50:52 +0200
Subject: [PATCH 045/192] Add Python 3.5 env to Travis CI
---
.travis.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.travis.yml b/.travis.yml
index ba56d9a..acb3eab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,7 @@ python:
- "3.2"
- "3.3"
- "3.4"
+ - "3.5"
install:
- pip install --use-mirrors lxml -e .
From f47fcc111ffc44970a2ca3b9403f0b495b2bc3e6 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 17:27:12 +0200
Subject: [PATCH 046/192] Update setup.py and README with new links
---
README.rst | 8 ++++----
cssselect/xpath.py | 4 ++--
docs/index.rst | 2 +-
setup.py | 9 ++++++---
4 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/README.rst b/README.rst
index f523c7f..2e7964c 100644
--- a/README.rst
+++ b/README.rst
@@ -9,8 +9,8 @@ to find the matching elements in an XML or HTML document.
This module used to live inside of lxml as ``lxml.cssselect`` before it was
extracted as a stand-alone project.
-.. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/
-.. _XPath 1.0: http://www.w3.org/TR/xpath/
+.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/
+.. _XPath 1.0: https://www.w3.org/TR/xpath/
.. _lxml: http://lxml.de/
@@ -18,8 +18,8 @@ Quick facts:
* Free software: BSD licensed
* Compatible with Python 2.5+ and 3.2+
-* Latest documentation `on python.org `_
+* Latest documentation `on python.org `_
* Source, issues and pull requests `on Github
- `_
+ `_
* Releases `on PyPI `_
* Install with ``pip install cssselect``
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 1f2bdd5..f387239 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -108,7 +108,7 @@ class GenericTranslator(object):
of element names and attribute names.
"""
-
+
####
#### HERE BE DRAGONS
####
@@ -117,7 +117,7 @@ class GenericTranslator(object):
#### Until is has recieved a lot more work and review,
#### I reserve the right to change this API in backward-incompatible ways
#### with any minor version of cssselect.
- #### See https://github.com/SimonSapin/cssselect/pull/22
+ #### See https://github.com/scrapy/cssselect/pull/22
#### -- Simon Sapin.
####
diff --git a/docs/index.rst b/docs/index.rst
index 4ac7401..fe473f7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -139,7 +139,7 @@ and their signature. You can look at the `source code`_ to see how it works.
However, be aware that this API is not very stable yet. It might change
and break your sub-class.
-.. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py
+.. _source code: https://github.com/scrapy/cssselect/blob/master/cssselect/xpath.py
Namespaces
diff --git a/setup.py b/setup.py
index 42bde1f..464d6f5 100644
--- a/setup.py
+++ b/setup.py
@@ -21,12 +21,12 @@
version=VERSION,
author='Ian Bicking',
author_email='ianb@colorstudy.com',
- maintainer='Simon Sapin',
- maintainer_email='simon.sapin@exyr.org',
+ maintainer='Paul Tremberth',
+ maintainer_email='paul.tremberth@gmail.com',
description=
'cssselect parses CSS3 Selectors and translates them to XPath 1.0',
long_description=README,
- url='http://packages.python.org/cssselect/',
+ url='https://pythonhosted.org/cssselect/',
license='BSD',
packages=['cssselect'],
classifiers=[
@@ -39,6 +39,9 @@
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
+ 'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
],
**extra_kwargs
)
From 279a361db001812a8339b543b715281312799805 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 17:35:55 +0200
Subject: [PATCH 047/192] Use bumpversion for versioning
---
.bumpversion.cfg | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 .bumpversion.cfg
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
new file mode 100644
index 0000000..a576861
--- /dev/null
+++ b/.bumpversion.cfg
@@ -0,0 +1,6 @@
+[bumpversion]
+current_version = 0.9.1
+commit = True
+tag = True
+
+[bumpversion:file:cssselect/__init__.py]
From f4273b06ca9711a7e9e85f682fa756b73341b204 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 18:04:42 +0200
Subject: [PATCH 048/192] Update changelog for upcoming 0.9.2 release
---
CHANGES | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/CHANGES b/CHANGES
index edbbaca..5ae9a39 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,17 @@
Changelog
=========
+Version 0.9.2
+-------------
+
+Released on 2016-06-15.
+
+* Distribute as universal wheel.
+* Add support for Python 3.3, 3.4 and 3.5.
+* Drop support for Python 2.5 as testing is getting difficult.
+* Improve tests on pseudo-elements.
+
+
Version 0.9.1
-------------
From 46728304b93888edb672ad6bd05bccfb6b5f7124 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 18:05:34 +0200
Subject: [PATCH 049/192] Drop Python 2.5 support
---
setup.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/setup.py b/setup.py
index fc40517..b4d0941 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,6 @@
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
From 97e6a3a6b5a932a80de7456b4dca9ad36feabf43 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 18:10:33 +0200
Subject: [PATCH 050/192] Update authors list
---
AUTHORS | 3 +++
1 file changed, 3 insertions(+)
diff --git a/AUTHORS b/AUTHORS
index bf826b9..70ca409 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,9 +1,12 @@
Daniel Graña
Ian Bicking
+James Salter
Laurence Rowe
Mikhail Korobov
+Nik Nyby
Paul Tremberth
Simon Potter
Simon Sapin
Stefan Behnel
+Thomas Grainger
Varialus
From e687f1eeb97316bbbbdac25cd8a7bf6dfe56700f Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 18:14:32 +0200
Subject: [PATCH 051/192] Update minimal Python 2.x version compatibility in
README (now 2.6+)
---
README.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.rst b/README.rst
index 2e7964c..155149d 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.5+ and 3.2+
+* Compatible with Python 2.6+ and 3.2+
* Latest documentation `on python.org `_
* Source, issues and pull requests `on Github
`_
From 07fdcccf220f8003c4ef44b898e94ecb144d3528 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 18:26:47 +0200
Subject: [PATCH 052/192] =?UTF-8?q?Bump=20version:=200.9.1=20=E2=86=92=200?=
=?UTF-8?q?.9.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 3 ++-
cssselect/__init__.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index a576861..a674e10 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,6 +1,7 @@
[bumpversion]
-current_version = 0.9.1
+current_version = 0.9.2
commit = True
tag = True
[bumpversion:file:cssselect/__init__.py]
+
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index d31e164..ed330ac 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.9.1'
+VERSION = '0.9.2'
__version__ = VERSION
From af30afc98b928b381a2885e0567c019c130db9d8 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 15 Jun 2016 22:43:09 +0200
Subject: [PATCH 053/192] Use "classic" theme for Sphinx docs
---
docs/conf.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/conf.py b/docs/conf.py
index 22e6032..b2612d0 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -95,7 +95,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-#html_theme = 'agogo'
+html_theme = 'classic'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
From ebd6f42459e12532233d6c44c71ae6b36a582288 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Thu, 16 Jun 2016 13:16:42 +0200
Subject: [PATCH 054/192] Move tests file + add codecov on Travis
---
.coveragerc | 1 +
.travis.yml | 10 +++++++++-
setup.cfg | 2 +-
tests/__init__.py | 0
cssselect/tests.py => tests/test_cssselect.py | 0
tox.ini | 9 +++++++--
6 files changed, 18 insertions(+), 4 deletions(-)
create mode 100644 tests/__init__.py
rename cssselect/tests.py => tests/test_cssselect.py (100%)
mode change 100755 => 100644
diff --git a/.coveragerc b/.coveragerc
index 2ee5ff3..ed1fac6 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,5 +1,6 @@
[run]
branch = True
+source = cssselect
[report]
exclude_lines =
diff --git a/.travis.yml b/.travis.yml
index acb3eab..8a4af19 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,5 +10,13 @@ python:
install:
- pip install --use-mirrors lxml -e .
+ - pip install -U codecov pytest-cov
+ - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]];
+ then pip uninstall -y coverage && pip install "coverage<4";
+ fi
-script: py.test
+script:
+ py.test --cov-report term --cov=cssselect
+
+after_success:
+ codecov
diff --git a/setup.cfg b/setup.cfg
index 7a3317f..270daee 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,7 +7,7 @@ build-dir = docs/_build
upload-dir = docs/_build/html
[pytest]
-python_files=tests.py
+testpaths = tests
[bdist_wheel]
universal = 1
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cssselect/tests.py b/tests/test_cssselect.py
old mode 100755
new mode 100644
similarity index 100%
rename from cssselect/tests.py
rename to tests/test_cssselect.py
diff --git a/tox.ini b/tox.ini
index ca053d8..a971384 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,8 +2,13 @@
envlist = py25,py26,py27,py32,py33
[testenv]
-deps=lxml
-commands = python cssselect/tests.py
+deps=
+ lxml
+ pytest
+ pytest-cov
+
+commands =
+ py.test --cov-report term --cov=cssselect
[testenv:py25]
setenv =
From 7c7d1a6786e3c2cb9012bb2abb1fee24a402a0bb Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Wed, 23 Jul 2014 14:32:00 +0200
Subject: [PATCH 055/192] Fix :nth-*(an+b) pseudo-classes selectors for
negative a's
Fix :nth-last-child(1)/:nth-last-of-type(1) translations (fixes #15)
---
cssselect/xpath.py | 82 +++++++++++++++++++++++++++++++----------
tests/test_cssselect.py | 38 +++++++++++++------
2 files changed, 89 insertions(+), 31 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index b0913ab..49e60ce 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -379,37 +379,81 @@ def xpath_nth_child_function(self, xpath, function, last=False,
if add_name_test:
xpath.add_name_test()
xpath.add_star_prefix()
- if a == 0:
- if last:
- b = 'last() - %s' % b
- return xpath.add_condition('position() = %s' % b)
+ # non-last
+ # --------
+ # position() = an+b
+ # -> position() - b = an
+ #
+ # if a < 0:
+ # position() - b <= 0
+ # -> position() <= b
+ #
+ # last
+ # ----
+ # last() - position() = an+b -1
+ # -> last() - position() - b +1 = an
+ #
+ # if a < 0:
+ # last() - position() - b +1 <= 0
+ # -> position() >= last() - b +1
+ #
+ # -b +1 = -(b-1)
if last:
- # FIXME: I'm not sure if this is right
- a = -a
- b = -b
+ b = b - 1
if b > 0:
b_neg = str(-b)
else:
b_neg = '+%s' % (-b)
+ if a == 0:
+ if last:
+ # http://www.w3.org/TR/selectors/#nth-last-child-pseudo
+ # The :nth-last-child(an+b) pseudo-class notation represents
+ # an element that has an+b-1 siblings after it in the document tree
+ #
+ # last() - position() = an+b-1
+ # -> position() = last() -b +1 (for a==0)
+ #
+ if b == 0:
+ b = 'last()'
+ else:
+ b = 'last() %s' % b_neg
+ return xpath.add_condition('position() = %s' % b)
if a != 1:
- expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
+ # last() - position() - b +1 = an
+ if last:
+ left = 'last() - position()'
+ # position() - b = an
+ else:
+ left = 'position()'
+ if b != 0:
+ left = '%s %s' % (left, b_neg)
+ if last or b != 0:
+ left = '(%s)' % left
+ expr = ['%s mod %s = 0' % (left, a)]
else:
expr = []
- if b >= 0:
- expr.append('position() >= %s' % b)
- elif b < 0 and last:
- expr.append('position() < (last() %s)' % b)
+ if last:
+ if b == 0:
+ right = 'last()'
+ else:
+ right = 'last() %s' % b_neg
+ if a > 0:
+ expr.append('(position() <= %s)' % right)
+ else:
+ expr.append('(position() >= %s)' % right)
+ else:
+ # position() > 0 so if b < 0, then position() > b
+ # also, position() >= 1 always
+ if b > 1:
+ if a > 0:
+ expr.append('position() >= %s' % b)
+ else:
+ expr.append('position() <= %s' % b)
+
expr = ' and '.join(expr)
if expr:
xpath.add_condition(expr)
return xpath
- # FIXME: handle an+b, odd, even
- # an+b means every-a, plus b, e.g., 2n+1 means odd
- # 0n+b means b
- # n+0 means a=1, i.e., all elements
- # an means every a elements, i.e., 2n means even
- # -n means -1n
- # -1n+6 means elements 6 and previous
def xpath_nth_last_child_function(self, xpath, function):
return self.xpath_nth_child_function(xpath, function, last=True)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 567e3c5..97b9202 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -336,19 +336,30 @@ def xpath(css):
"@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
assert xpath('e:nth-child(1)') == (
"*/*[name() = 'e' and (position() = 1)]")
+ assert xpath('e:nth-child(3n+2)') == (
+ "*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]")
+ assert xpath('e:nth-child(3n-2)') == (
+ "*/*[name() = 'e' and ((position() +2) mod 3 = 0)]")
+ assert xpath('e:nth-child(-n+6)') == (
+ "*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]")
assert xpath('e:nth-last-child(1)') == (
- "*/*[name() = 'e' and (position() = last() - 1)]")
+ "*/*[name() = 'e' and (position() = last())]")
+ assert xpath('e:nth-last-child(2n)') == (
+ "*/*[name() = 'e' and ("
+ "(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]")
assert xpath('e:nth-last-child(2n+2)') == (
"*/*[name() = 'e' and ("
- "(position() +2) mod -2 = 0 and position() < (last() -2))]")
+ "(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]")
+ # represents the two last e elements
+ assert xpath('e:nth-last-child(-n+2)') == (
+ "*/*[name() = 'e' and ("
+ "(last() - position() -1) mod -1 = 0 and (position() >= last() -1))]")
assert xpath('e:nth-of-type(1)') == (
"*/e[position() = 1]")
assert xpath('e:nth-last-of-type(1)') == (
- "*/e[position() = last() - 1]")
- assert xpath('e:nth-last-of-type(1)') == (
- "*/e[position() = last() - 1]")
+ "*/e[position() = last()]")
assert xpath('div e:nth-last-of-type(1) .aclass') == (
- "div/descendant-or-self::*/e[position() = last() - 1]"
+ "div/descendant-or-self::*/e[position() = last()]"
"/descendant-or-self::*/*[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' aclass ')]")
assert xpath('e:first-child') == (
@@ -381,7 +392,7 @@ def xpath(css):
assert xpath('e#myid') == (
"e[@id = 'myid']")
assert xpath('e:not(:nth-child(odd))') == (
- "e[not((position() -1) mod 2 = 0 and position() >= 1)]")
+ "e[not((position() -1) mod 2 = 0)]")
assert xpath('e:nOT(*)') == (
"e[0]") # never matches
assert xpath('e f') == (
@@ -643,16 +654,19 @@ def pcss(main, *selectors, **kwargs):
# FIXME: I'm not 100% sure this is right:
assert pcss('li:nth-child(3n+1)') == [
'first-li', 'fourth-li', 'seventh-li']
- assert pcss('li:nth-last-child(0)') == [
- 'seventh-li']
+ assert pcss('li:nth-child(-n+3)') == [
+ 'first-li', 'second-li', 'third-li']
+ assert pcss('li:nth-child(-2n+4)') == ['second-li', 'fourth-li']
+ assert pcss('li:nth-last-child(0)') == []
+ assert pcss('li:nth-last-child(1)') == ['seventh-li']
assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [
'second-li', 'fourth-li', 'sixth-li']
- assert pcss('li:nth-last-child(2n+2)') == ['second-li', 'fourth-li']
+ assert pcss('li:nth-last-child(2n+2)') == [
+ 'second-li', 'fourth-li', 'sixth-li']
assert pcss('ol:first-of-type') == ['first-ol']
assert pcss('ol:nth-child(1)') == []
assert pcss('ol:nth-of-type(2)') == ['second-ol']
- # FIXME: like above', '(1) or (2)?
- assert pcss('ol:nth-last-of-type(1)') == ['first-ol']
+ assert pcss('ol:nth-last-of-type(1)') == ['second-ol']
assert pcss('span:only-child') == ['foobar-span']
assert pcss('li div:only-child') == ['li-div']
assert pcss('div *:only-child') == ['li-div', 'foobar-span']
From fa02721c896301de4bfcfac4f705cf83f3425179 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Thu, 7 Jul 2016 23:49:26 +0200
Subject: [PATCH 056/192] Add more tests for *-last-*(an+1)
---
tests/test_cssselect.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 97b9202..2203d02 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -347,9 +347,15 @@ def xpath(css):
assert xpath('e:nth-last-child(2n)') == (
"*/*[name() = 'e' and ("
"(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]")
+ assert xpath('e:nth-last-child(2n+1)') == (
+ "*/*[name() = 'e' and ("
+ "(last() - position()) mod 2 = 0 and (position() <= last()))]")
assert xpath('e:nth-last-child(2n+2)') == (
"*/*[name() = 'e' and ("
"(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]")
+ assert xpath('e:nth-last-child(3n+1)') == (
+ "*/*[name() = 'e' and ("
+ "(last() - position()) mod 3 = 0 and (position() <= last()))]")
# represents the two last e elements
assert xpath('e:nth-last-child(-n+2)') == (
"*/*[name() = 'e' and ("
@@ -661,8 +667,12 @@ def pcss(main, *selectors, **kwargs):
assert pcss('li:nth-last-child(1)') == ['seventh-li']
assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [
'second-li', 'fourth-li', 'sixth-li']
+ assert pcss('li:nth-last-child(2n+1)') == [
+ 'first-li', 'third-li', 'fifth-li', 'seventh-li']
assert pcss('li:nth-last-child(2n+2)') == [
'second-li', 'fourth-li', 'sixth-li']
+ assert pcss('li:nth-last-child(3n+1)') == [
+ 'first-li', 'fourth-li', 'seventh-li']
assert pcss('ol:first-of-type') == ['first-ol']
assert pcss('ol:nth-child(1)') == []
assert pcss('ol:nth-of-type(2)') == ['second-ol']
From cca952942b1149075fa386f67a7f928811cf73c9 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 8 Jul 2016 00:14:58 +0200
Subject: [PATCH 057/192] Remove FIXME comment
---
tests/test_cssselect.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 2203d02..2638ed6 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -657,7 +657,6 @@ def pcss(main, *selectors, **kwargs):
assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [
'first-li', 'third-li', 'fifth-li', 'seventh-li']
assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li']
- # FIXME: I'm not 100% sure this is right:
assert pcss('li:nth-child(3n+1)') == [
'first-li', 'fourth-li', 'seventh-li']
assert pcss('li:nth-child(-n+3)') == [
From f0e100c7bfd3ae93b29a6f1af7e4388a97ddf4f8 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 12 Jul 2016 13:23:11 +0200
Subject: [PATCH 058/192] Count siblings instead of using context position()
---
cssselect/xpath.py | 143 +++++++++++++++++++++++-----------------
tests/test_cssselect.py | 54 +++++++++------
2 files changed, 118 insertions(+), 79 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 49e60ce..f6ef64c 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -376,79 +376,102 @@ def xpath_nth_child_function(self, xpath, function, last=False,
a, b = parse_series(function.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % function.arguments)
+
+ # for the siblings count node-test,
+ # `add_name_test` boolean is inverted and somewhat counter-intuitive:
+ #
+ # nth_of_type() calls nth_child(add_name_test=False)
if add_name_test:
+ nodetest = '*'
xpath.add_name_test()
+ else:
+ nodetest = '%s' % xpath.element
+
xpath.add_star_prefix()
- # non-last
- # --------
- # position() = an+b
- # -> position() - b = an
+
+ # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
#
- # if a < 0:
- # position() - b <= 0
- # -> position() <= b
+ # :nth-child(an+b)
+ # an+b-1 siblings before
+ #
+ # :nth-last-child(an+b)
+ # an+b-1 siblings after
+ #
+ # :nth-of-type(an+b)
+ # an+b-1 siblings with the same expanded element name before
+ #
+ # :nth-last-of-type(an+b)
+ # an+b-1 siblings with the same expanded element name after
+ #
+ # So,
+ # for :nth-child and :nth-of-type
+ #
+ # count(preceding-sibling::) = an+b-1
+ #
+ # for :nth-last-child and :nth-last-of-type
+ #
+ # count(following-sibling::) = an+b-1
#
- # last
- # ----
- # last() - position() = an+b -1
- # -> last() - position() - b +1 = an
+ # therefore,
+ # count(...) - (b-1) ≡ 0 (mod a)
+ #
+ # if a == 0:
+ # ~~~~~~~~~~
+ # count(...) = b-1
#
# if a < 0:
- # last() - position() - b +1 <= 0
- # -> position() >= last() - b +1
+ # ~~~~~~~~~
+ # count(...) - b +1 <= 0
+ # -> count(...) <= b-1
#
- # -b +1 = -(b-1)
- if last:
- b = b - 1
- if b > 0:
- b_neg = str(-b)
+ # if a > 0:
+ # ~~~~~~~~~
+ # count(...) - b +1 >= 0
+ # -> count(...) >= b-1
+
+ # count siblings before or after the element
+ if not last:
+ siblings_count = 'count(preceding-sibling::%s)' % nodetest
else:
- b_neg = '+%s' % (-b)
+ siblings_count = 'count(following-sibling::%s)' % nodetest
+
+ # work with b-1 instead
+ b = b - 1
+
+ # if a == 0:
+ # ~~~~~~~~~~
+ # count(...) = b-1
if a == 0:
- if last:
- # http://www.w3.org/TR/selectors/#nth-last-child-pseudo
- # The :nth-last-child(an+b) pseudo-class notation represents
- # an element that has an+b-1 siblings after it in the document tree
- #
- # last() - position() = an+b-1
- # -> position() = last() -b +1 (for a==0)
- #
- if b == 0:
- b = 'last()'
- else:
- b = 'last() %s' % b_neg
- return xpath.add_condition('position() = %s' % b)
- if a != 1:
- # last() - position() - b +1 = an
- if last:
- left = 'last() - position()'
- # position() - b = an
- else:
- left = 'position()'
- if b != 0:
- left = '%s %s' % (left, b_neg)
- if last or b != 0:
- left = '(%s)' % left
- expr = ['%s mod %s = 0' % (left, a)]
- else:
+ return xpath.add_condition('%s = %s' % (siblings_count, b))
+
+ # special case for operations modulo 1
+ if abs(a) == 1:
expr = []
- if last:
- if b == 0:
- right = 'last()'
- else:
- right = 'last() %s' % b_neg
- if a > 0:
- expr.append('(position() <= %s)' % right)
- else:
- expr.append('(position() >= %s)' % right)
else:
- # position() > 0 so if b < 0, then position() > b
- # also, position() >= 1 always
- if b > 1:
- if a > 0:
- expr.append('position() >= %s' % b)
+ # count(...) - (b-1) ≡ 0 (mod a)
+ left = siblings_count
+ b_neg = -b
+
+ # this is to simplify things like "(... +3) % -3"
+ if a != 0:
+ b_neg = b_neg % abs(a)
+
+ if b_neg != 0:
+ if b_neg < 0:
+ b_neg = str(b_neg)
else:
- expr.append('position() <= %s' % b)
+ b_neg = '+%s' % (b_neg)
+ left = '(%s %s)' % (left, b_neg)
+
+ expr = ['%s mod %s = 0' % (left, a)]
+
+ if a > 0:
+ # siblings count is always > 0
+ # so the following predicate only matter for b > 0
+ if b > 0:
+ expr.append('%s >= %s' % (siblings_count, b))
+ else:
+ expr.append('%s <= %s' % (siblings_count, b))
expr = ' and '.join(expr)
if expr:
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 2638ed6..fd14934 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -335,37 +335,37 @@ def xpath(css):
"e[@hreflang and ("
"@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
assert xpath('e:nth-child(1)') == (
- "*/*[name() = 'e' and (position() = 1)]")
+ "*/*[name() = 'e' and (count(preceding-sibling::*) = 0)]")
assert xpath('e:nth-child(3n+2)') == (
- "*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]")
+ "*/*[name() = 'e' and ("
+ "(count(preceding-sibling::*) +2) mod 3 = 0 and "
+ "count(preceding-sibling::*) >= 1)]")
assert xpath('e:nth-child(3n-2)') == (
- "*/*[name() = 'e' and ((position() +2) mod 3 = 0)]")
+ "*/*[name() = 'e' and ("
+ "count(preceding-sibling::*) mod 3 = 0)]")
assert xpath('e:nth-child(-n+6)') == (
- "*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]")
+ "*/*[name() = 'e' and (count(preceding-sibling::*) <= 5)]")
assert xpath('e:nth-last-child(1)') == (
- "*/*[name() = 'e' and (position() = last())]")
+ "*/*[name() = 'e' and (count(following-sibling::*) = 0)]")
assert xpath('e:nth-last-child(2n)') == (
- "*/*[name() = 'e' and ("
- "(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]")
+ "*/*[name() = 'e' and ((count(following-sibling::*) +1) mod 2 = 0)]")
assert xpath('e:nth-last-child(2n+1)') == (
- "*/*[name() = 'e' and ("
- "(last() - position()) mod 2 = 0 and (position() <= last()))]")
+ "*/*[name() = 'e' and (count(following-sibling::*) mod 2 = 0)]")
assert xpath('e:nth-last-child(2n+2)') == (
"*/*[name() = 'e' and ("
- "(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]")
+ "(count(following-sibling::*) +1) mod 2 = 0 and "
+ "count(following-sibling::*) >= 1)]")
assert xpath('e:nth-last-child(3n+1)') == (
- "*/*[name() = 'e' and ("
- "(last() - position()) mod 3 = 0 and (position() <= last()))]")
+ "*/*[name() = 'e' and (count(following-sibling::*) mod 3 = 0)]")
# represents the two last e elements
assert xpath('e:nth-last-child(-n+2)') == (
- "*/*[name() = 'e' and ("
- "(last() - position() -1) mod -1 = 0 and (position() >= last() -1))]")
+ "*/*[name() = 'e' and (count(following-sibling::*) <= 1)]")
assert xpath('e:nth-of-type(1)') == (
- "*/e[position() = 1]")
+ "*/e[count(preceding-sibling::e) = 0]")
assert xpath('e:nth-last-of-type(1)') == (
- "*/e[position() = last()]")
+ "*/e[count(following-sibling::e) = 0]")
assert xpath('div e:nth-last-of-type(1) .aclass') == (
- "div/descendant-or-self::*/e[position() = last()]"
+ "div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
"/descendant-or-self::*/*[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' aclass ')]")
assert xpath('e:first-child') == (
@@ -398,7 +398,7 @@ def xpath(css):
assert xpath('e#myid') == (
"e[@id = 'myid']")
assert xpath('e:not(:nth-child(odd))') == (
- "e[not((position() -1) mod 2 = 0)]")
+ "e[not(count(preceding-sibling::*) mod 2 = 0)]")
assert xpath('e:nOT(*)') == (
"e[0]") # never matches
assert xpath('e f') == (
@@ -409,6 +409,8 @@ def xpath(css):
"e/following-sibling::*[name() = 'f' and (position() = 1)]")
assert xpath('e ~ f') == (
"e/following-sibling::f")
+ assert xpath('e ~ f:nth-child(3)') == (
+ "e/following-sibling::*[name() = 'f' and (count(preceding-sibling::*) = 2)]")
assert xpath('div#container p') == (
"div[@id = 'container']/descendant-or-self::*/p")
@@ -649,7 +651,8 @@ def pcss(main, *selectors, **kwargs):
assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
'second-li', 'li-div']
assert pcss(':lang("e")', html_only=True) == []
- assert pcss('li:nth-child(3)') == ['third-li']
+ assert pcss('li:nth-child(3)',
+ '#first-li ~ :nth-child(3)') == ['third-li']
assert pcss('li:nth-child(10)') == []
assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
'li:nth-child(2n+0)') == [
@@ -676,6 +679,17 @@ def pcss(main, *selectors, **kwargs):
assert pcss('ol:nth-child(1)') == []
assert pcss('ol:nth-of-type(2)') == ['second-ol']
assert pcss('ol:nth-last-of-type(1)') == ['second-ol']
+
+ # "+" and "~" tests
+ assert pcss('ol#first-ol li + li:nth-child(4)') == ['fourth-li']
+ assert pcss('li + li:nth-child(1)') == []
+ assert pcss('li ~ li:nth-child(2n+1)') == [
+ 'third-li', 'fifth-li', 'seventh-li'
+ ] # all but the first
+ assert pcss('li ~ li:nth-last-child(2n+1)') == [
+ 'third-li', 'fifth-li', 'seventh-li'
+ ] # all but the first
+
assert pcss('span:only-child') == ['foobar-span']
assert pcss('li div:only-child') == ['li-div']
assert pcss('div *:only-child') == ['li-div', 'foobar-span']
@@ -716,6 +730,8 @@ def pcss(main, *selectors, **kwargs):
assert pcss('ol :Not(li[class])') == [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li']
+ assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li']
+
# Invalid characters in XPath element names, should not crash
assert pcss(r'di\a0 v', r'div\[') == []
assert pcss(r'[h\a0 ref]', r'[h\]ref]') == []
From d86287dc211b5b75c549aa2febb0ad4ece0ead02 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 12 Jul 2016 16:21:06 +0200
Subject: [PATCH 059/192] Remove '*/'-prefix and use siblings counts for
:only-* pseudo-classes
---
cssselect/xpath.py | 25 ++++++-------------
tests/test_cssselect.py | 55 +++++++++++++++++++----------------------
2 files changed, 33 insertions(+), 47 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index f6ef64c..ad6decb 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -351,7 +351,7 @@ def xpath_element(self, selector):
def xpath_descendant_combinator(self, left, right):
"""right is a child, grand-child or further descendant of left"""
- return left.join('/descendant-or-self::*/', right)
+ return left.join('/descendant::', right)
def xpath_child_combinator(self, left, right):
"""right is an immediate child of left"""
@@ -383,12 +383,9 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# nth_of_type() calls nth_child(add_name_test=False)
if add_name_test:
nodetest = '*'
- xpath.add_name_test()
else:
nodetest = '%s' % xpath.element
- xpath.add_star_prefix()
-
# From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
#
# :nth-child(an+b)
@@ -522,39 +519,31 @@ def xpath_root_pseudo(self, xpath):
return xpath.add_condition("not(parent::*)")
def xpath_first_child_pseudo(self, xpath):
- xpath.add_star_prefix()
- xpath.add_name_test()
- return xpath.add_condition('position() = 1')
+ return xpath.add_condition('count(preceding-sibling::*) = 0')
def xpath_last_child_pseudo(self, xpath):
- xpath.add_star_prefix()
- xpath.add_name_test()
- return xpath.add_condition('position() = last()')
+ return xpath.add_condition('count(following-sibling::*) = 0')
def xpath_first_of_type_pseudo(self, xpath):
if xpath.element == '*':
raise ExpressionError(
"*:first-of-type is not implemented")
- xpath.add_star_prefix()
- return xpath.add_condition('position() = 1')
+ return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element)
def xpath_last_of_type_pseudo(self, xpath):
if xpath.element == '*':
raise ExpressionError(
"*:last-of-type is not implemented")
- xpath.add_star_prefix()
- return xpath.add_condition('position() = last()')
+ return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element)
def xpath_only_child_pseudo(self, xpath):
- xpath.add_name_test()
- xpath.add_star_prefix()
- return xpath.add_condition('last() = 1')
+ return xpath.add_condition('count(parent::*/child::*) = 1')
def xpath_only_of_type_pseudo(self, xpath):
if xpath.element == '*':
raise ExpressionError(
"*:only-of-type is not implemented")
- return xpath.add_condition('last() = 1')
+ return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element)
def xpath_empty_pseudo(self, xpath):
return xpath.add_condition("not(*) and not(string-length())")
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index fd14934..fe564c0 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -335,51 +335,48 @@ def xpath(css):
"e[@hreflang and ("
"@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
assert xpath('e:nth-child(1)') == (
- "*/*[name() = 'e' and (count(preceding-sibling::*) = 0)]")
+ "e[count(preceding-sibling::*) = 0]")
assert xpath('e:nth-child(3n+2)') == (
- "*/*[name() = 'e' and ("
- "(count(preceding-sibling::*) +2) mod 3 = 0 and "
- "count(preceding-sibling::*) >= 1)]")
+ "e[(count(preceding-sibling::*) +2) mod 3 = 0 and "
+ "count(preceding-sibling::*) >= 1]")
assert xpath('e:nth-child(3n-2)') == (
- "*/*[name() = 'e' and ("
- "count(preceding-sibling::*) mod 3 = 0)]")
+ "e[count(preceding-sibling::*) mod 3 = 0]")
assert xpath('e:nth-child(-n+6)') == (
- "*/*[name() = 'e' and (count(preceding-sibling::*) <= 5)]")
+ "e[count(preceding-sibling::*) <= 5]")
assert xpath('e:nth-last-child(1)') == (
- "*/*[name() = 'e' and (count(following-sibling::*) = 0)]")
+ "e[count(following-sibling::*) = 0]")
assert xpath('e:nth-last-child(2n)') == (
- "*/*[name() = 'e' and ((count(following-sibling::*) +1) mod 2 = 0)]")
+ "e[(count(following-sibling::*) +1) mod 2 = 0]")
assert xpath('e:nth-last-child(2n+1)') == (
- "*/*[name() = 'e' and (count(following-sibling::*) mod 2 = 0)]")
+ "e[count(following-sibling::*) mod 2 = 0]")
assert xpath('e:nth-last-child(2n+2)') == (
- "*/*[name() = 'e' and ("
- "(count(following-sibling::*) +1) mod 2 = 0 and "
- "count(following-sibling::*) >= 1)]")
+ "e[(count(following-sibling::*) +1) mod 2 = 0 and "
+ "count(following-sibling::*) >= 1]")
assert xpath('e:nth-last-child(3n+1)') == (
- "*/*[name() = 'e' and (count(following-sibling::*) mod 3 = 0)]")
+ "e[count(following-sibling::*) mod 3 = 0]")
# represents the two last e elements
assert xpath('e:nth-last-child(-n+2)') == (
- "*/*[name() = 'e' and (count(following-sibling::*) <= 1)]")
+ "e[count(following-sibling::*) <= 1]")
assert xpath('e:nth-of-type(1)') == (
- "*/e[count(preceding-sibling::e) = 0]")
+ "e[count(preceding-sibling::e) = 0]")
assert xpath('e:nth-last-of-type(1)') == (
- "*/e[count(following-sibling::e) = 0]")
+ "e[count(following-sibling::e) = 0]")
assert xpath('div e:nth-last-of-type(1) .aclass') == (
- "div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
- "/descendant-or-self::*/*[@class and contains("
+ "div/descendant::e[count(following-sibling::e) = 0]"
+ "/descendant::*[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' aclass ')]")
assert xpath('e:first-child') == (
- "*/*[name() = 'e' and (position() = 1)]")
+ "e[count(preceding-sibling::*) = 0]")
assert xpath('e:last-child') == (
- "*/*[name() = 'e' and (position() = last())]")
+ "e[count(following-sibling::*) = 0]")
assert xpath('e:first-of-type') == (
- "*/e[position() = 1]")
+ "e[count(preceding-sibling::e) = 0]")
assert xpath('e:last-of-type') == (
- "*/e[position() = last()]")
+ "e[count(following-sibling::e) = 0]")
assert xpath('e:only-child') == (
- "*/*[name() = 'e' and (last() = 1)]")
+ "e[count(parent::*/child::*) = 1]")
assert xpath('e:only-of-type') == (
- "e[last() = 1]")
+ "e[count(parent::*/child::e) = 1]")
assert xpath('e:empty') == (
"e[not(*) and not(string-length())]")
assert xpath('e:EmPTY') == (
@@ -402,7 +399,7 @@ def xpath(css):
assert xpath('e:nOT(*)') == (
"e[0]") # never matches
assert xpath('e f') == (
- "e/descendant-or-self::*/f")
+ "e/descendant::f")
assert xpath('e > f') == (
"e/f")
assert xpath('e + f') == (
@@ -410,9 +407,9 @@ def xpath(css):
assert xpath('e ~ f') == (
"e/following-sibling::f")
assert xpath('e ~ f:nth-child(3)') == (
- "e/following-sibling::*[name() = 'f' and (count(preceding-sibling::*) = 2)]")
+ "e/following-sibling::f[count(preceding-sibling::*) = 2]")
assert xpath('div#container p') == (
- "div[@id = 'container']/descendant-or-self::*/p")
+ "div[@id = 'container']/descendant::p")
# Invalid characters in XPath element names
assert xpath(r'di\a0 v') == (
@@ -538,7 +535,7 @@ def xpath(css):
assert xpath('::text-node') == "descendant-or-self::*/text()"
assert xpath('::attr-href') == "descendant-or-self::*/@href"
assert xpath('p img::attr(src)') == (
- "descendant-or-self::p/descendant-or-self::*/img/@src")
+ "descendant-or-self::p/descendant::img/@src")
def test_series(self):
def series(css):
From ae09a4c409ed2003273383aabaee53e1a8515015 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 12 Jul 2016 16:59:54 +0200
Subject: [PATCH 060/192] Simplify a/b if branches
---
cssselect/xpath.py | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index ad6decb..908b226 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -441,23 +441,22 @@ def xpath_nth_child_function(self, xpath, function, last=False,
if a == 0:
return xpath.add_condition('%s = %s' % (siblings_count, b))
- # special case for operations modulo 1
+ # operations modulo 1 or -1 are simpler, one only needs to verify:
+ # count(...) - (b-1) = 0, 1, 2, 3, etc., i.e. count(...) >= (b-1)
+ # or
+ # count(...) - (b-1) = 0, -1, -2, -3, etc., , i.e. count(...) <= (b-1)
if abs(a) == 1:
expr = []
else:
# count(...) - (b-1) ≡ 0 (mod a)
left = siblings_count
- b_neg = -b
- # this is to simplify things like "(... +3) % -3"
- if a != 0:
- b_neg = b_neg % abs(a)
+ # use modulo on 2nd term -(b-1) to simplify things like "(... +6) % -3",
+ # and also make it positive with |a|
+ b_neg = (-b) % abs(a)
if b_neg != 0:
- if b_neg < 0:
- b_neg = str(b_neg)
- else:
- b_neg = '+%s' % (b_neg)
+ b_neg = '+%s' % (b_neg)
left = '(%s %s)' % (left, b_neg)
expr = ['%s mod %s = 0' % (left, a)]
From 7fdcf083fa8632da8a7e54ee57a22a6cabae8e30 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 12 Jul 2016 17:27:38 +0200
Subject: [PATCH 061/192] Correct comment
---
cssselect/xpath.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 908b226..535b6d0 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -462,8 +462,8 @@ def xpath_nth_child_function(self, xpath, function, last=False,
expr = ['%s mod %s = 0' % (left, a)]
if a > 0:
- # siblings count is always > 0
- # so the following predicate only matter for b > 0
+ # siblings count is always >= 0,
+ # so the following predicate only matters for b > 0
if b > 0:
expr.append('%s >= %s' % (siblings_count, b))
else:
From f0c56955e11bb58d825f884a14008485ee37b998 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Sat, 16 Jul 2016 00:10:56 +0200
Subject: [PATCH 062/192] Rearrange a/b branches and add "early-exit"
conditions
---
cssselect/xpath.py | 94 ++++++++++++++++++++++++++---------------
tests/test_cssselect.py | 42 ++++++++++++++++--
2 files changed, 97 insertions(+), 39 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 535b6d0..418413c 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -377,15 +377,6 @@ def xpath_nth_child_function(self, xpath, function, last=False,
except ValueError:
raise ExpressionError("Invalid series: '%r'" % function.arguments)
- # for the siblings count node-test,
- # `add_name_test` boolean is inverted and somewhat counter-intuitive:
- #
- # nth_of_type() calls nth_child(add_name_test=False)
- if add_name_test:
- nodetest = '*'
- else:
- nodetest = '%s' % xpath.element
-
# From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
#
# :nth-child(an+b)
@@ -426,52 +417,85 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# count(...) - b +1 >= 0
# -> count(...) >= b-1
+ # work with b-1 instead
+ b_min_1 = b - 1
+
+ # early-exit condition 1:
+ # ~~~~~~~~~~~~~~~~~~~~~~~
+ # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
+ # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
+ # there is always an "n" matching any number of siblings (maybe none)
+ if a == 1 and b_min_1 <=0:
+ return xpath
+
+ # early-exit condition 2:
+ # ~~~~~~~~~~~~~~~~~~~~~~~
+ # an+b-1 siblings with a<0 and (b-1)<0 is not possible
+ if a < 0 and b_min_1 < 0:
+ return xpath.add_condition('0')
+
+ # `add_name_test` boolean is inverted and somewhat counter-intuitive:
+ #
+ # nth_of_type() calls nth_child(add_name_test=False)
+ if add_name_test:
+ nodetest = '*'
+ else:
+ nodetest = '%s' % xpath.element
+
# count siblings before or after the element
if not last:
siblings_count = 'count(preceding-sibling::%s)' % nodetest
else:
siblings_count = 'count(following-sibling::%s)' % nodetest
- # work with b-1 instead
- b = b - 1
-
+ # special case of fixed position: nth-*(0n+b)
# if a == 0:
# ~~~~~~~~~~
- # count(...) = b-1
+ # count(***-sibling::***) = b-1
if a == 0:
- return xpath.add_condition('%s = %s' % (siblings_count, b))
+ return xpath.add_condition('%s = %s' % (siblings_count, b_min_1))
- # operations modulo 1 or -1 are simpler, one only needs to verify:
- # count(...) - (b-1) = 0, 1, 2, 3, etc., i.e. count(...) >= (b-1)
- # or
- # count(...) - (b-1) = 0, -1, -2, -3, etc., , i.e. count(...) <= (b-1)
- if abs(a) == 1:
- expr = []
+ expr = []
+
+ if a > 0:
+ # siblings count, an+b-1, is always >= 0,
+ # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
+ # therefore, the predicate is only interesting if (b-1)>0
+ if b_min_1 > 0:
+ expr.append('%s >= %s' % (siblings_count, b_min_1))
else:
- # count(...) - (b-1) ≡ 0 (mod a)
+ # if a<0, and (b-1)<0, no "n" satisfies this,
+ # this is tested above as an early exist condition
+ # otherwise,
+ expr.append('%s <= %s' % (siblings_count, b_min_1))
+
+ # operations modulo 1 or -1 are simpler, one only needs to verify:
+ #
+ # - either:
+ # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
+ # i.e. count(***-sibling::***) >= (b-1)
+ #
+ # - or:
+ # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
+ # i.e. count(***-sibling::***) <= (b-1)
+ # we we just did above.
+ #
+ if abs(a) != 1:
+ # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
left = siblings_count
- # use modulo on 2nd term -(b-1) to simplify things like "(... +6) % -3",
+ # apply "modulo a" on 2nd term, -(b-1),
+ # to simplify things like "(... +6) % -3",
# and also make it positive with |a|
- b_neg = (-b) % abs(a)
+ b_neg = (-b_min_1) % abs(a)
if b_neg != 0:
b_neg = '+%s' % (b_neg)
left = '(%s %s)' % (left, b_neg)
- expr = ['%s mod %s = 0' % (left, a)]
-
- if a > 0:
- # siblings count is always >= 0,
- # so the following predicate only matters for b > 0
- if b > 0:
- expr.append('%s >= %s' % (siblings_count, b))
- else:
- expr.append('%s <= %s' % (siblings_count, b))
+ expr.append('%s mod %s = 0' % (left, a))
- expr = ' and '.join(expr)
- if expr:
- xpath.add_condition(expr)
+ xpath.add_condition(' and '.join(expr))
return xpath
def xpath_nth_last_child_function(self, xpath, function):
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index fe564c0..cdd8d8b 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -334,15 +334,37 @@ def xpath(css):
assert xpath('e[hreflang|="en"]') == (
"e[@hreflang and ("
"@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
+
+ # --- nth-* and nth-last-* -------------------------------------
assert xpath('e:nth-child(1)') == (
"e[count(preceding-sibling::*) = 0]")
+
+ # always true
+ assert xpath('e:nth-child(n)') == (
+ "e")
+ assert xpath('e:nth-child(n+1)') == (
+ "e")
+ # always true too
+ assert xpath('e:nth-child(n-10)') == (
+ "e")
+ # b=2 is the limit...
+ assert xpath('e:nth-child(n+2)') == (
+ "e[count(preceding-sibling::*) >= 1]")
+ # always false
+ assert xpath('e:nth-child(-n)') == (
+ "e[0]")
+ # equivalent to first child
+ assert xpath('e:nth-child(-n+1)') == (
+ "e[count(preceding-sibling::*) <= 0]")
+
assert xpath('e:nth-child(3n+2)') == (
- "e[(count(preceding-sibling::*) +2) mod 3 = 0 and "
- "count(preceding-sibling::*) >= 1]")
+ "e[count(preceding-sibling::*) >= 1 and "
+ "(count(preceding-sibling::*) +2) mod 3 = 0]")
assert xpath('e:nth-child(3n-2)') == (
"e[count(preceding-sibling::*) mod 3 = 0]")
assert xpath('e:nth-child(-n+6)') == (
"e[count(preceding-sibling::*) <= 5]")
+
assert xpath('e:nth-last-child(1)') == (
"e[count(following-sibling::*) = 0]")
assert xpath('e:nth-last-child(2n)') == (
@@ -350,13 +372,14 @@ def xpath(css):
assert xpath('e:nth-last-child(2n+1)') == (
"e[count(following-sibling::*) mod 2 = 0]")
assert xpath('e:nth-last-child(2n+2)') == (
- "e[(count(following-sibling::*) +1) mod 2 = 0 and "
- "count(following-sibling::*) >= 1]")
+ "e[count(following-sibling::*) >= 1 and "
+ "(count(following-sibling::*) +1) mod 2 = 0]")
assert xpath('e:nth-last-child(3n+1)') == (
"e[count(following-sibling::*) mod 3 = 0]")
# represents the two last e elements
assert xpath('e:nth-last-child(-n+2)') == (
"e[count(following-sibling::*) <= 1]")
+
assert xpath('e:nth-of-type(1)') == (
"e[count(preceding-sibling::e) = 0]")
assert xpath('e:nth-last-of-type(1)') == (
@@ -365,6 +388,7 @@ def xpath(css):
"div/descendant::e[count(following-sibling::e) = 0]"
"/descendant::*[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' aclass ')]")
+
assert xpath('e:first-child') == (
"e[count(preceding-sibling::*) = 0]")
assert xpath('e:last-child') == (
@@ -648,6 +672,16 @@ def pcss(main, *selectors, **kwargs):
assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
'second-li', 'li-div']
assert pcss(':lang("e")', html_only=True) == []
+
+ # --- nth-* and nth-last-* -------------------------------------
+
+ # select nothing
+ assert pcss('li:nth-child(-n)') == []
+ # select all children
+ assert pcss('li:nth-child(n)') == [
+ 'first-li', 'second-li', 'third-li', 'fourth-li',
+ 'fifth-li', 'sixth-li', 'seventh-li']
+
assert pcss('li:nth-child(3)',
'#first-li ~ :nth-child(3)') == ['third-li']
assert pcss('li:nth-child(10)') == []
From 10dbd58073327554599cfde453a97d8b4ba2b60f Mon Sep 17 00:00:00 2001
From: Mikhail Korobov
Date: Thu, 8 Sep 2016 19:34:38 +0500
Subject: [PATCH 063/192] TST don't use unsupported pip option
---
.travis.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.travis.yml b/.travis.yml
index 8a4af19..e666cf7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ python:
- "3.5"
install:
- - pip install --use-mirrors lxml -e .
+ - pip install lxml -e .
- pip install -U codecov pytest-cov
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]];
then pip uninstall -y coverage && pip install "coverage<4";
From b5d095316acd59d0364f5d8b6cd3c997bedecf36 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 9 Sep 2016 11:16:47 +0200
Subject: [PATCH 064/192] Force py.test version before 3.0
py.test 3.0 dropped support for Python 3.2
https://github.com/pytest-dev/pytest/issues/1627
---
tox.ini | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tox.ini b/tox.ini
index a971384..7a3359a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@ envlist = py25,py26,py27,py32,py33
[testenv]
deps=
lxml
- pytest
+ pytest<3
pytest-cov
commands =
From 16ce549f59d893fd07382e48640e957639459a52 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 9 Sep 2016 11:42:47 +0200
Subject: [PATCH 065/192] Travis: uninstall pytest before forcing version < 3
---
.travis.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.travis.yml b/.travis.yml
index e666cf7..a89d5b3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ install:
- pip install lxml -e .
- pip install -U codecov pytest-cov
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]];
- then pip uninstall -y coverage && pip install "coverage<4";
+ then pip uninstall -y coverage pytest && pip install "coverage<4" && pip install "pytest<3";
fi
script:
From e84f1b50803522a528314d8a39fdf5c728d0e004 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Mon, 12 Sep 2016 18:17:12 +0200
Subject: [PATCH 066/192] Revert descendant combinator change
---
cssselect/xpath.py | 2 +-
tests/test_cssselect.py | 10 +++++-----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 418413c..698748a 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -351,7 +351,7 @@ def xpath_element(self, selector):
def xpath_descendant_combinator(self, left, right):
"""right is a child, grand-child or further descendant of left"""
- return left.join('/descendant::', right)
+ return left.join('/descendant-or-self::*/', right)
def xpath_child_combinator(self, left, right):
"""right is an immediate child of left"""
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index cdd8d8b..4a0bd39 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -385,8 +385,8 @@ def xpath(css):
assert xpath('e:nth-last-of-type(1)') == (
"e[count(following-sibling::e) = 0]")
assert xpath('div e:nth-last-of-type(1) .aclass') == (
- "div/descendant::e[count(following-sibling::e) = 0]"
- "/descendant::*[@class and contains("
+ "div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
+ "/descendant-or-self::*/*[@class and contains("
"concat(' ', normalize-space(@class), ' '), ' aclass ')]")
assert xpath('e:first-child') == (
@@ -423,7 +423,7 @@ def xpath(css):
assert xpath('e:nOT(*)') == (
"e[0]") # never matches
assert xpath('e f') == (
- "e/descendant::f")
+ "e/descendant-or-self::*/f")
assert xpath('e > f') == (
"e/f")
assert xpath('e + f') == (
@@ -433,7 +433,7 @@ def xpath(css):
assert xpath('e ~ f:nth-child(3)') == (
"e/following-sibling::f[count(preceding-sibling::*) = 2]")
assert xpath('div#container p') == (
- "div[@id = 'container']/descendant::p")
+ "div[@id = 'container']/descendant-or-self::*/p")
# Invalid characters in XPath element names
assert xpath(r'di\a0 v') == (
@@ -559,7 +559,7 @@ def xpath(css):
assert xpath('::text-node') == "descendant-or-self::*/text()"
assert xpath('::attr-href') == "descendant-or-self::*/@href"
assert xpath('p img::attr(src)') == (
- "descendant-or-self::p/descendant::img/@src")
+ "descendant-or-self::p/descendant-or-self::*/img/@src")
def test_series(self):
def series(css):
From 9a1a071e0cb30193d578087d68887cff097ad1cf Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Mon, 17 Oct 2016 12:00:32 +0200
Subject: [PATCH 067/192] Update changelog for upcoming 1.0 release
---
CHANGES | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/CHANGES b/CHANGES
index 5ae9a39..aac466c 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,16 @@
Changelog
=========
+Version 1.0.0
+-------------
+
+Released on YYYY-MM-DD.
+
+* Add code coverage reports.
+* Fix ``:nth-*(an+b)`` pseudo-classes selectors.
+ (except ``*:nth-child()`` which looks untranslatable to XPath 1.0.)
+
+
Version 0.9.2
-------------
From e1b501c02289fecdb1a4f17498161a49d11d1871 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 21 Oct 2016 12:35:15 +0200
Subject: [PATCH 068/192] Set date for 1.0 release
---
CHANGES | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index aac466c..94abe77 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,7 +4,7 @@ Changelog
Version 1.0.0
-------------
-Released on YYYY-MM-DD.
+Released on 2016-10-21.
* Add code coverage reports.
* Fix ``:nth-*(an+b)`` pseudo-classes selectors.
From 4d59c719b2eca9062dd4deccb1e985c9c182fb37 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 21 Oct 2016 12:48:44 +0200
Subject: [PATCH 069/192] =?UTF-8?q?Bump=20version:=200.9.2=20=E2=86=92=201?=
=?UTF-8?q?.0.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 2 +-
cssselect/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index a674e10..426ea28 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.9.2
+current_version = 1.0.0
commit = True
tag = True
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index ed330ac..f46a0e4 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '0.9.2'
+VERSION = '1.0.0'
__version__ = VERSION
From 9c2cdb3c601eed13cc2118a0aff8c0b83b9991d7 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 9 Sep 2016 10:53:29 +0200
Subject: [PATCH 070/192] Travis: remove py32 build after py.test>=3.0 dropped
support for it
See http://doc.pytest.org/en/latest/changelog.html
and https://github.com/pytest-dev/pytest/pull/1627
---
.travis.yml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index a89d5b3..bf21f78 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,6 @@ language: python
python:
- "2.6"
- "2.7"
- - "3.2"
- "3.3"
- "3.4"
- "3.5"
@@ -11,9 +10,6 @@ python:
install:
- pip install lxml -e .
- pip install -U codecov pytest-cov
- - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]];
- then pip uninstall -y coverage pytest && pip install "coverage<4" && pip install "pytest<3";
- fi
script:
py.test --cov-report term --cov=cssselect
From f01843dcedc00804f437b7dfaff5ccae34abfa5c Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 9 Sep 2016 11:06:39 +0200
Subject: [PATCH 071/192] Remove Py3.2 classifier from setup.py
---
setup.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/setup.py b/setup.py
index b4d0941..5d5ec02 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,6 @@
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
From ecbe1a86f920d83bc76966b5538ff5875ff34b3d Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 6 Jan 2017 18:17:27 +0100
Subject: [PATCH 072/192] Add Python 3.6 build on Travis CI
---
.travis.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.travis.yml b/.travis.yml
index a89d5b3..a1fcdb1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ python:
- "3.3"
- "3.4"
- "3.5"
+ - "3.6"
install:
- pip install lxml -e .
From a448648cf7bf69085be53f23db0cbf9ee9dce96c Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 6 Jan 2017 18:33:31 +0100
Subject: [PATCH 073/192] Add Py3.6 classifier to setup.py
---
setup.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/setup.py b/setup.py
index 5d5ec02..6651017 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,7 @@
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
],
**extra_kwargs
)
From 8bfdcc65319537b74467e6d7cc2ce616edd77425 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Fri, 6 Jan 2017 18:34:07 +0100
Subject: [PATCH 074/192] Update README
---
README.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.rst b/README.rst
index 155149d..9ccfc4d 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.6+ and 3.2+
+* Compatible with Python 2.6+ and 3.3+
* Latest documentation `on python.org `_
* Source, issues and pull requests `on Github
`_
From df496e7be1924e27df469d7a1d77525ce501ffa8 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 15:02:15 +0100
Subject: [PATCH 075/192] Move docs to ReadTheDocs
---
README.rst | 2 +-
docs/conf.py | 2 +-
setup.py | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.rst b/README.rst
index 9ccfc4d..587c2d7 100644
--- a/README.rst
+++ b/README.rst
@@ -18,7 +18,7 @@ Quick facts:
* Free software: BSD licensed
* Compatible with Python 2.6+ and 3.3+
-* Latest documentation `on python.org `_
+* Latest documentation `on Read the Docs `_
* Source, issues and pull requests `on Github
`_
* Releases `on PyPI `_
diff --git a/docs/conf.py b/docs/conf.py
index b2612d0..aa897ef 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -43,7 +43,7 @@
# General information about the project.
project = 'cssselect'
-copyright = '2012, Simon Sapin'
+copyright = '2012-2017, Simon Sapin, Scrapy developers'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
diff --git a/setup.py b/setup.py
index 6651017..199ffc7 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
description=
'cssselect parses CSS3 Selectors and translates them to XPath 1.0',
long_description=README,
- url='https://pythonhosted.org/cssselect/',
+ url='https://github.com/scrapy/cssselect',
license='BSD',
packages=['cssselect'],
classifiers=[
From 3987b7c957edc105fde0b4c022a50bd060be6afe Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 15:40:39 +0100
Subject: [PATCH 076/192] Add automatic PyPI deploy to Travis CI config
---
.travis.yml | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index e9dadc2..ca91911 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,11 @@
language: python
-
python:
- - "2.6"
- - "2.7"
- - "3.3"
- - "3.4"
- - "3.5"
- - "3.6"
+ - '2.6'
+ - '2.7'
+ - '3.3'
+ - '3.4'
+ - '3.5'
+ - '3.6'
install:
- pip install lxml -e .
@@ -17,3 +16,13 @@ script:
after_success:
codecov
+
+deploy:
+ provider: pypi
+ distributions: sdist bdist_wheel
+ user: redapple
+ password:
+ secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU=
+ on:
+ tags: true
+ repo: scrapy/cssselect
From 5824741722f841dcf2ffe5818d0b426312e857bb Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 15:51:31 +0100
Subject: [PATCH 077/192] Only deploy from Python 3.6
---
.travis.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.travis.yml b/.travis.yml
index ca91911..5ddb1fd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,3 +26,4 @@ deploy:
on:
tags: true
repo: scrapy/cssselect
+ condition: "$TRAVIS_PYTHON_VERSION == '3.6'"
From 18d38aefc0334918eb8fa2b896009478a7175859 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 16:01:41 +0100
Subject: [PATCH 078/192] Update pytest section in setup.cfg
---
setup.cfg | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.cfg b/setup.cfg
index 270daee..b8c93b1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,7 +6,7 @@ build-dir = docs/_build
[upload_sphinx] # Sphinx-PyPI-upload
upload-dir = docs/_build/html
-[pytest]
+[tool:pytest]
testpaths = tests
[bdist_wheel]
From ed1a15d32e97b6ec11cbeffdeeaf44be8b3e35dc Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 15:32:09 +0100
Subject: [PATCH 079/192] Update changelog for 1.0.1
---
CHANGES | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/CHANGES b/CHANGES
index 94abe77..9238537 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,14 @@
Changelog
=========
+Version 1.0.1
+-------------
+
+Released on 2017-01-XX.
+
+* Add support for Python 3.6.
+* Documentation hosted `on Read the Docs `_
+
Version 1.0.0
-------------
From fee89dfe7453b58b231c2fd1d37621ac30f2450d Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 16:05:40 +0100
Subject: [PATCH 080/192] Set release date for version 1.0.1
---
CHANGES | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index 9238537..92b0371 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,7 +4,7 @@ Changelog
Version 1.0.1
-------------
-Released on 2017-01-XX.
+Released on 2017-01-10.
* Add support for Python 3.6.
* Documentation hosted `on Read the Docs `_
From 7b40f4e59fa7fa9da0fcae29874a9a3a5e120509 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 16:13:30 +0100
Subject: [PATCH 081/192] =?UTF-8?q?Bump=20version:=201.0.0=20=E2=86=92=201?=
=?UTF-8?q?.0.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 2 +-
cssselect/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 426ea28..92c7bcb 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.0
+current_version = 1.0.1
commit = True
tag = True
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index f46a0e4..3b06261 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.0.0'
+VERSION = '1.0.1'
__version__ = VERSION
From 386afc5f001d2c4d6742ac378db25238db8cd671 Mon Sep 17 00:00:00 2001
From: Paul Tremberth
Date: Tue, 10 Jan 2017 16:33:54 +0100
Subject: [PATCH 082/192] Do not upload docs when deploying to PyPI from Travis
CI
---
.travis.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.travis.yml b/.travis.yml
index 5ddb1fd..cc709f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,7 @@ after_success:
deploy:
provider: pypi
distributions: sdist bdist_wheel
+ skip_upload_docs: true
user: redapple
password:
secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU=
From a0e47cca81cc079d26e504d51989c12a5e79d1b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Skytt=C3=A4?=
Date: Thu, 2 Mar 2017 11:41:43 +0200
Subject: [PATCH 083/192] Spelling fixes
---
cssselect/xpath.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 698748a..d0eb2cb 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -114,7 +114,7 @@ class GenericTranslator(object):
####
#### You are welcome to hook into this to change some behavior,
#### but do so at your own risks.
- #### Until is has recieved a lot more work and review,
+ #### Until it has received a lot more work and review,
#### I reserve the right to change this API in backward-incompatible ways
#### with any minor version of cssselect.
#### See https://github.com/scrapy/cssselect/pull/22
From 8b3aa08f2e7e133f2a7df4c6941d843165720ac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Skytt=C3=A4?=
Date: Thu, 2 Mar 2017 11:43:06 +0200
Subject: [PATCH 084/192] Python 3.6 invalid escape sequence deprecation fix
https://docs.python.org/3/whatsnew/3.6.html#deprecated-python-behavior
---
cssselect/parser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index d155252..fe5f53c 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -617,7 +617,7 @@ def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
_match_whitespace = _compile(r'[ \t\r\n\f]+')
-_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
+_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
_match_hash = _compile('#(?:%(nmchar)s)+')
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
_match_string_by_quote = {
From daca23b513f755298b166b328e97c0d830a9fcf1 Mon Sep 17 00:00:00 2001
From: Hugo
Date: Thu, 14 Dec 2017 17:13:19 +0200
Subject: [PATCH 085/192] Ignore IDE metadata
---
.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index 4c89f4c..b0ab86a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
/dist
/docs/_build
/.coverage
+.idea
\ No newline at end of file
From 83014a796af107b6eb934e085011ecdf85dc4c42 Mon Sep 17 00:00:00 2001
From: Hugo
Date: Thu, 14 Dec 2017 17:17:49 +0200
Subject: [PATCH 086/192] Drop support for EOL Python 2.6
---
.travis.yml | 1 -
README.rst | 4 ++--
cssselect/parser.py | 9 +--------
setup.py | 2 +-
tox.ini | 6 +-----
5 files changed, 5 insertions(+), 17 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index cc709f1..61edf5a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
language: python
python:
- - '2.6'
- '2.7'
- '3.3'
- '3.4'
diff --git a/README.rst b/README.rst
index 587c2d7..972b06b 100644
--- a/README.rst
+++ b/README.rst
@@ -17,9 +17,9 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.6+ and 3.3+
+* Compatible with Python 2.7 and 3.3+
* Latest documentation `on Read the Docs `_
-* Source, issues and pull requests `on Github
+* Source, issues and pull requests `on GitHub
`_
* Releases `on PyPI `_
* Install with ``pip install cssselect``
diff --git a/cssselect/parser.py b/cssselect/parser.py
index fe5f53c..dd4709a 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -358,8 +358,6 @@ def parse(css):
# message = "%s at %s -> %r" % (
# e, stream.used, stream.peek())
# e.msg = message
-# if sys.version_info < (2,6):
-# e.message = message
# e.args = tuple([message])
# raise
@@ -630,12 +628,7 @@ def _compile(pattern):
_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
# Same as r'\1', but faster on CPython
-if hasattr(operator, 'methodcaller'):
- # Python 2.6+
- _replace_simple = operator.methodcaller('group', 1)
-else:
- def _replace_simple(match):
- return match.group(1)
+_replace_simple = operator.methodcaller('group', 1)
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
diff --git a/setup.py b/setup.py
index 199ffc7..032aa89 100644
--- a/setup.py
+++ b/setup.py
@@ -29,12 +29,12 @@
url='https://github.com/scrapy/cssselect',
license='BSD',
packages=['cssselect'],
+ python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
diff --git a/tox.ini b/tox.ini
index 7a3359a..a019f4e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py25,py26,py27,py32,py33
+envlist = py27,py33,py34,py35,py36
[testenv]
deps=
@@ -9,7 +9,3 @@ deps=
commands =
py.test --cov-report term --cov=cssselect
-
-[testenv:py25]
-setenv =
- PIP_INSECURE = 1
From c040d86c5458547bbbf80c5fd4aa9ce771f85234 Mon Sep 17 00:00:00 2001
From: Hugo
Date: Thu, 14 Dec 2017 17:19:16 +0200
Subject: [PATCH 087/192] Use 'is' to compare with None
---
tests/test_cssselect.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 4a0bd39..f01aa7f 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -288,12 +288,12 @@ def get_error(css):
"Expected string or ident, got ")
assert get_error('[href]a') == (
"Expected selector, got ")
- assert get_error('[rel=stylesheet]') == None
+ assert get_error('[rel=stylesheet]') is None
assert get_error('[rel:stylesheet]') == (
"Operator expected, got ")
assert get_error('[rel=stylesheet') == (
"Expected ']', got ")
- assert get_error(':lang(fr)') == None
+ assert get_error(':lang(fr)') is None
assert get_error(':lang(fr') == (
"Expected an argument, got ")
assert get_error(':contains("foo') == (
@@ -586,8 +586,8 @@ def series(css):
assert series('+n') == (1, 0)
assert series('-n') == (-1, 0)
assert series('5') == (0, 5)
- assert series('foo') == None
- assert series('n+') == None
+ assert series('foo') is None
+ assert series('n+') is None
def test_lang(self):
document = etree.fromstring(XMLLANG_IDS)
From 1060ca1f3f1746caad8673f0c99299a389f1bc7c Mon Sep 17 00:00:00 2001
From: Hugo
Date: Thu, 14 Dec 2017 17:20:08 +0200
Subject: [PATCH 088/192] Remove redundant parentheses
---
cssselect/parser.py | 10 +++++-----
cssselect/xpath.py | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index dd4709a..9bb039c 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -552,14 +552,14 @@ def parse_series(tokens):
raise ValueError('String tokens not allowed in series.')
s = ''.join(token.value for token in tokens).strip()
if s == 'odd':
- return (2, 1)
+ return 2, 1
elif s == 'even':
- return (2, 0)
+ return 2, 0
elif s == 'n':
- return (1, 0)
+ return 1, 0
if 'n' not in s:
# Just b
- return (0, int(s))
+ return 0, int(s)
a, b = s.split('n', 1)
if not a:
a = 1
@@ -571,7 +571,7 @@ def parse_series(tokens):
b = 0
else:
b = int(b)
- return (a, b)
+ return a, b
#### Token objects
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index d0eb2cb..22cd029 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -490,7 +490,7 @@ def xpath_nth_child_function(self, xpath, function, last=False,
b_neg = (-b_min_1) % abs(a)
if b_neg != 0:
- b_neg = '+%s' % (b_neg)
+ b_neg = '+%s' % b_neg
left = '(%s %s)' % (left, b_neg)
expr.append('%s mod %s = 0' % (left, a))
From 6a53f24f3d3118d7e0ae86b2ed7521d6370608d4 Mon Sep 17 00:00:00 2001
From: Hugo
Date: Fri, 22 Dec 2017 10:55:55 +0200
Subject: [PATCH 089/192] Drop support for EOL Python 3.3
---
.travis.yml | 1 -
README.rst | 2 +-
setup.py | 3 +--
tox.ini | 2 +-
4 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 61edf5a..d86d0a8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,6 @@
language: python
python:
- '2.7'
- - '3.3'
- '3.4'
- '3.5'
- '3.6'
diff --git a/README.rst b/README.rst
index 972b06b..c19c6b3 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.7 and 3.3+
+* Compatible with Python 2.7 and 3.4+
* Latest documentation `on Read the Docs `_
* Source, issues and pull requests `on GitHub
`_
diff --git a/setup.py b/setup.py
index 032aa89..243927d 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
url='https://github.com/scrapy/cssselect',
license='BSD',
packages=['cssselect'],
- python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*',
+ python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
@@ -37,7 +37,6 @@
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
diff --git a/tox.ini b/tox.ini
index a019f4e..194490a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py27,py33,py34,py35,py36
+envlist = py27, py34, py35, py36
[testenv]
deps=
From d12b1418624faf166fdeb9db31ee95430d3c37c5 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov
Date: Tue, 26 Dec 2017 17:37:14 +0500
Subject: [PATCH 090/192] badges in README
---
README.rst | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/README.rst b/README.rst
index c19c6b3..9bcd648 100644
--- a/README.rst
+++ b/README.rst
@@ -2,6 +2,22 @@
cssselect: CSS Selectors for Python
===================================
+.. image:: https://img.shields.io/pypi/v/cssselect.svg
+ :target: https://pypi.python.org/pypi/cssselect
+ :alt: PyPI Version
+
+.. image:: https://img.shields.io/pypi/pyversions/cssselect.svg
+ :target: https://pypi.python.org/pypi/cssselect
+ :alt: Supported Python Versions
+
+.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg
+ :target: https://travis-ci.org/scrapy/cssselect
+ :alt: Build Status
+
+.. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg
+ :target: https://codecov.io/github/scrapy/cssselect?branch=master
+ :alt: Coverage report
+
*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_
expressions. Such expressions can be used in lxml_ or another XPath engine
to find the matching elements in an XML or HTML document.
From 73344698e95ce31433fad643598365f954488722 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov
Date: Tue, 26 Dec 2017 17:42:16 +0500
Subject: [PATCH 091/192] DOC changelog
---
CHANGES | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/CHANGES b/CHANGES
index 92b0371..d8b27b6 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,15 @@
Changelog
=========
+Version 1.0.2
+-------------
+
+Released on 2017-12-26.
+
+* Drop support for Python 2.6 and Python 3.3.
+* Fix deprecation warning in Python 3.6.
+* Minor cleanups.
+
Version 1.0.1
-------------
From c42886850a86565a3eda081ecb9eaffdfddb29e8 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov
Date: Tue, 26 Dec 2017 17:44:21 +0500
Subject: [PATCH 092/192] =?UTF-8?q?Bump=20version:=201.0.1=20=E2=86=92=201?=
=?UTF-8?q?.0.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 2 +-
cssselect/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 92c7bcb..e21dbfb 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.1
+current_version = 1.0.2
commit = True
tag = True
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 3b06261..9180b72 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.0.1'
+VERSION = '1.0.2'
__version__ = VERSION
From 2e1234db300f4ad7f2372f15933da4f5a084b788 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gra=C3=B1a?=
Date: Wed, 27 Dec 2017 12:13:36 -0300
Subject: [PATCH 093/192] Update travis->pypi credentials
---
.travis.yml | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index d86d0a8..b76297f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,27 +1,23 @@
language: python
python:
- - '2.7'
- - '3.4'
- - '3.5'
- - '3.6'
-
+- '2.7'
+- '3.4'
+- '3.5'
+- '3.6'
install:
- - pip install lxml -e .
- - pip install -U codecov pytest-cov
-
+- pip install lxml -e .
+- pip install -U codecov pytest-cov
script:
- py.test --cov-report term --cov=cssselect
-
+- py.test --cov-report term --cov=cssselect
after_success:
- codecov
-
+- codecov
deploy:
provider: pypi
distributions: sdist bdist_wheel
skip_upload_docs: true
- user: redapple
+ user: scrapy
password:
- secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU=
+ secure: UjCXD1ZfqgFcCs4ciPMJDaOQefV3ZOKZ8/dTZxcoaQlE1lr6CkaN6CfTdD50SX2M9uCNWvEcYnvs6U4SizgZ27MYzFWuHonED2alHKy4AtrxCEHD/+lGo9d18cNjLMPDZateX/lITjGiZ4rmYZNuA6wmA4P/bTmdazbSufcmMqY=
on:
tags: true
repo: scrapy/cssselect
From 720126ae39316dd21a4e03e56ccc0ba2c6a0fb24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gra=C3=B1a?=
Date: Wed, 27 Dec 2017 12:39:26 -0300
Subject: [PATCH 094/192] Update changelog for 1.0.3 release
---
CHANGES | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/CHANGES b/CHANGES
index d8b27b6..0a0e137 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,13 @@
Changelog
=========
+Version 1.0.3
+-------------
+
+Released on 2017-12-27.
+
+* Fix artifact uploads to pypi
+
Version 1.0.2
-------------
From cb7a7e21de1ba9347d58a6a14b7c78b3de1f49ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gra=C3=B1a?=
Date: Wed, 27 Dec 2017 12:39:30 -0300
Subject: [PATCH 095/192] =?UTF-8?q?Bump=20version:=201.0.2=20=E2=86=92=201?=
=?UTF-8?q?.0.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 2 +-
cssselect/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index e21dbfb..acb5a66 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.2
+current_version = 1.0.3
commit = True
tag = True
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 9180b72..e9f9ce1 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.0.2'
+VERSION = '1.0.3'
__version__ = VERSION
From 4e90061eea44515c7c1c9e48c2b67a3a8489a692 Mon Sep 17 00:00:00 2001
From: Arthur Darcet
Date: Tue, 7 Mar 2017 15:29:46 +0100
Subject: [PATCH 096/192] add a method on the Selector class, to export back
the selector to css
---
cssselect/parser.py | 69 ++++++++++++++++++++++++++++++++++++++---
cssselect/xpath.py | 8 +++--
tests/test_cssselect.py | 31 ++++++++++++++++++
3 files changed, 101 insertions(+), 7 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 9bb039c..53a76bc 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -76,7 +76,7 @@ def __init__(self, tree, pseudo_element=None):
#: +-------------------------+----------------+--------------------------------+
#: | Invalid pseudo-class | ``li:marker`` | ``None`` |
#: +-------------------------+----------------+--------------------------------+
- #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
+ #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
#: +-------------------------+----------------+--------------------------------+
#:
#: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
@@ -92,6 +92,20 @@ def __repr__(self):
return '%s[%r%s]' % (
self.__class__.__name__, self.parsed_tree, pseudo_element)
+ def css(self):
+ """Return a CSS representation for this selector (a string)
+ """
+ if isinstance(self.pseudo_element, FunctionalPseudoElement):
+ pseudo_element = '::%s' % self.pseudo_element.css()
+ elif self.pseudo_element:
+ pseudo_element = '::%s' % self.pseudo_element
+ else:
+ pseudo_element = ''
+ res = '%s%s' % (self.parsed_tree.css(), pseudo_element)
+ if len(res) > 1:
+ res = res.lstrip('*')
+ return res
+
def specificity(self):
"""Return the specificity_ of this selector as a tuple of 3 integers.
@@ -116,6 +130,9 @@ def __repr__(self):
return '%s[%r.%s]' % (
self.__class__.__name__, self.selector, self.class_name)
+ def css(self):
+ return '%s.%s' % (self.selector.css(), self.class_name)
+
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
@@ -151,6 +168,10 @@ def __repr__(self):
def argument_types(self):
return [token.type for token in self.arguments]
+ def css(self):
+ args = ''.join(token.css() for token in self.arguments)
+ return '%s(%s)' % (self.name, args)
+
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
@@ -174,6 +195,10 @@ def __repr__(self):
def argument_types(self):
return [token.type for token in self.arguments]
+ def css(self):
+ args = ''.join(token.css() for token in self.arguments)
+ return '%s:%s(%s)' % (self.selector.css(), self.name, args)
+
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
@@ -192,6 +217,9 @@ def __repr__(self):
return '%s[%r:%s]' % (
self.__class__.__name__, self.selector, self.ident)
+ def css(self):
+ return '%s:%s' % (self.selector.css(), self.ident)
+
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
@@ -210,6 +238,10 @@ def __repr__(self):
return '%s[%r:not(%r)]' % (
self.__class__.__name__, self.selector, self.subselector)
+ def css(self):
+ return '%s:not(%s)' % (self.selector.css(),
+ self.subselector.css())
+
def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
@@ -238,7 +270,20 @@ def __repr__(self):
else:
return '%s[%r[%s %s %r]]' % (
self.__class__.__name__, self.selector, attrib,
- self.operator, self.value)
+ self.operator, self.value.value)
+
+ def css(self):
+ if self.namespace:
+ attrib = '%s|%s' % (self.namespace, self.attrib)
+ else:
+ attrib = self.attrib
+
+ if self.operator == 'exists':
+ op = attrib
+ else:
+ op = '%s%s%s' % (attrib, self.operator, self.value.css())
+
+ return '%s[%s]' % (self.selector.css(), op)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -258,10 +303,13 @@ def __init__(self, namespace=None, element=None):
self.element = element
def __repr__(self):
+ return '%s[%s]' % (self.__class__.__name__, self.css())
+
+ def css(self):
element = self.element or '*'
if self.namespace:
element = '%s|%s' % (self.namespace, element)
- return '%s[%s]' % (self.__class__.__name__, element)
+ return element
def specificity(self):
if self.element:
@@ -282,6 +330,9 @@ def __repr__(self):
return '%s[%r#%s]' % (
self.__class__.__name__, self.selector, self.id)
+ def css(self):
+ return '%s#%s' % (self.selector.css(), self.id)
+
def specificity(self):
a, b, c = self.selector.specificity()
a += 1
@@ -303,6 +354,10 @@ def __repr__(self):
return '%s[%r %s %r]' % (
self.__class__.__name__, self.selector, comb, self.subselector)
+ def css(self):
+ return '%s %s %s' % (self.selector.css(),
+ self.combinator, self.subselector.css())
+
def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
@@ -536,7 +591,7 @@ def parse_attrib(selector, stream):
if next != ('DELIM', ']'):
raise SelectorSyntaxError(
"Expected ']', got %s" % (next,))
- return Attrib(selector, namespace, attrib, op, value.value)
+ return Attrib(selector, namespace, attrib, op, value)
def parse_series(tokens):
@@ -591,6 +646,12 @@ def is_delim(self, *values):
type = property(operator.itemgetter(0))
value = property(operator.itemgetter(1))
+ def css(self):
+ if self.type == 'STRING':
+ return repr(self.value)
+ else:
+ return self.value
+
class EOFToken(Token):
def __new__(cls, pos):
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 22cd029..ad2ccbd 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -308,10 +308,12 @@ def xpath_attrib(self, selector):
attrib = '@' + name
else:
attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
- if self.lower_case_attribute_values:
- value = selector.value.lower()
+ if selector.value is None:
+ value = None
+ elif self.lower_case_attribute_values:
+ value = selector.value.value.lower()
else:
- value = selector.value
+ value = selector.value.value
return method(self.xpath(selector.selector), attrib, value)
def xpath_class(self, class_selector):
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index f01aa7f..96e63f3 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -244,6 +244,37 @@ def specificity(css):
assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
) == (2, 1, 3)
+ def test_css_export(self):
+ def css2css(css, res=None):
+ selectors = parse(css)
+ assert len(selectors) == 1
+ assert selectors[0].css() == (res or css)
+
+ css2css('*')
+ css2css(' foo', 'foo')
+ css2css('Foo', 'Foo')
+ css2css(':empty ', ':empty')
+ css2css(':before', '::before')
+ css2css(':beFOre', '::before')
+ css2css('*:before', '::before')
+ css2css(':nth-child(2)')
+ css2css('.bar')
+ css2css('[baz]')
+ css2css('[baz="4"]', "[baz='4']")
+ css2css('[baz^="4"]', "[baz^='4']")
+ css2css('#lipsum')
+ css2css(':not(*)')
+ css2css(':not(foo)')
+ css2css(':not(*.foo)')
+ css2css(':not(*[foo])')
+ css2css(':not(*:empty)')
+ css2css(':not(*#foo)')
+ css2css('foo:empty')
+ css2css('foo::before')
+ css2css('foo:empty::before')
+ css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)")
+ css2css('#lorem + foo#ipsum:first-child > bar::first-line')
+
def test_parse_errors(self):
def get_error(css):
try:
From 8d0ff3e39c9c4806277e00ae517ab7da3b41d8f0 Mon Sep 17 00:00:00 2001
From: Arthur Darcet
Date: Mon, 11 Feb 2019 17:01:21 +0100
Subject: [PATCH 097/192] rename method to .canonical, and correctly strip
extra * in :not selectors (otherwise edge ignores them)
---
AUTHORS | 1 +
cssselect/parser.py | 49 +++++++++++++++++++++++------------------
tests/test_cssselect.py | 12 +++++-----
3 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/AUTHORS b/AUTHORS
index 70ca409..66dcc22 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -10,3 +10,4 @@ Simon Sapin
Stefan Behnel
Thomas Grainger
Varialus
+Arthur Darcet
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 53a76bc..1aed6f8 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -92,16 +92,16 @@ def __repr__(self):
return '%s[%r%s]' % (
self.__class__.__name__, self.parsed_tree, pseudo_element)
- def css(self):
+ def canonical(self):
"""Return a CSS representation for this selector (a string)
"""
if isinstance(self.pseudo_element, FunctionalPseudoElement):
- pseudo_element = '::%s' % self.pseudo_element.css()
+ pseudo_element = '::%s' % self.pseudo_element.canonical()
elif self.pseudo_element:
pseudo_element = '::%s' % self.pseudo_element
else:
pseudo_element = ''
- res = '%s%s' % (self.parsed_tree.css(), pseudo_element)
+ res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element)
if len(res) > 1:
res = res.lstrip('*')
return res
@@ -130,8 +130,8 @@ def __repr__(self):
return '%s[%r.%s]' % (
self.__class__.__name__, self.selector, self.class_name)
- def css(self):
- return '%s.%s' % (self.selector.css(), self.class_name)
+ def canonical(self):
+ return '%s.%s' % (self.selector.canonical(), self.class_name)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -168,7 +168,7 @@ def __repr__(self):
def argument_types(self):
return [token.type for token in self.arguments]
- def css(self):
+ def canonical(self):
args = ''.join(token.css() for token in self.arguments)
return '%s(%s)' % (self.name, args)
@@ -195,9 +195,9 @@ def __repr__(self):
def argument_types(self):
return [token.type for token in self.arguments]
- def css(self):
+ def canonical(self):
args = ''.join(token.css() for token in self.arguments)
- return '%s:%s(%s)' % (self.selector.css(), self.name, args)
+ return '%s:%s(%s)' % (self.selector.canonical(), self.name, args)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -217,8 +217,8 @@ def __repr__(self):
return '%s[%r:%s]' % (
self.__class__.__name__, self.selector, self.ident)
- def css(self):
- return '%s:%s' % (self.selector.css(), self.ident)
+ def canonical(self):
+ return '%s:%s' % (self.selector.canonical(), self.ident)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -238,9 +238,11 @@ def __repr__(self):
return '%s[%r:not(%r)]' % (
self.__class__.__name__, self.selector, self.subselector)
- def css(self):
- return '%s:not(%s)' % (self.selector.css(),
- self.subselector.css())
+ def canonical(self):
+ subsel = self.subselector.canonical()
+ if len(subsel) > 1:
+ subsel = subsel.lstrip('*')
+ return '%s:not(%s)' % (self.selector.canonical(), subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
@@ -272,7 +274,7 @@ def __repr__(self):
self.__class__.__name__, self.selector, attrib,
self.operator, self.value.value)
- def css(self):
+ def canonical(self):
if self.namespace:
attrib = '%s|%s' % (self.namespace, self.attrib)
else:
@@ -283,7 +285,7 @@ def css(self):
else:
op = '%s%s%s' % (attrib, self.operator, self.value.css())
- return '%s[%s]' % (self.selector.css(), op)
+ return '%s[%s]' % (self.selector.canonical(), op)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -303,9 +305,9 @@ def __init__(self, namespace=None, element=None):
self.element = element
def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self.css())
+ return '%s[%s]' % (self.__class__.__name__, self.canonical())
- def css(self):
+ def canonical(self):
element = self.element or '*'
if self.namespace:
element = '%s|%s' % (self.namespace, element)
@@ -330,8 +332,8 @@ def __repr__(self):
return '%s[%r#%s]' % (
self.__class__.__name__, self.selector, self.id)
- def css(self):
- return '%s#%s' % (self.selector.css(), self.id)
+ def canonical(self):
+ return '%s#%s' % (self.selector.canonical(), self.id)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -354,9 +356,12 @@ def __repr__(self):
return '%s[%r %s %r]' % (
self.__class__.__name__, self.selector, comb, self.subselector)
- def css(self):
- return '%s %s %s' % (self.selector.css(),
- self.combinator, self.subselector.css())
+ def canonical(self):
+ subsel = self.subselector.canonical()
+ if len(subsel) > 1:
+ subsel = subsel.lstrip('*')
+ return '%s %s %s' % (
+ self.selector.canonical(), self.combinator, subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 96e63f3..0819f25 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -248,7 +248,7 @@ def test_css_export(self):
def css2css(css, res=None):
selectors = parse(css)
assert len(selectors) == 1
- assert selectors[0].css() == (res or css)
+ assert selectors[0].canonical() == (res or css)
css2css('*')
css2css(' foo', 'foo')
@@ -262,18 +262,20 @@ def css2css(css, res=None):
css2css('[baz]')
css2css('[baz="4"]', "[baz='4']")
css2css('[baz^="4"]', "[baz^='4']")
+ css2css("[ns|attr='4']")
css2css('#lipsum')
css2css(':not(*)')
css2css(':not(foo)')
- css2css(':not(*.foo)')
- css2css(':not(*[foo])')
- css2css(':not(*:empty)')
- css2css(':not(*#foo)')
+ css2css(':not(*.foo)', ':not(.foo)')
+ css2css(':not(*[foo])', ':not([foo])')
+ css2css(':not(:empty)')
+ css2css(':not(#foo)')
css2css('foo:empty')
css2css('foo::before')
css2css('foo:empty::before')
css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)")
css2css('#lorem + foo#ipsum:first-child > bar::first-line')
+ css2css('foo > *')
def test_parse_errors(self):
def get_error(css):
From f52371a5821f6472129e4c47b4fdd54ed3a8e1f4 Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sat, 15 Jun 2019 22:55:12 +0300
Subject: [PATCH 098/192] css "^" as "." xpath symbol to use css "^ >" to get
immediate children
---
.gitignore | 4 +++-
cssselect/parser.py | 4 ++--
cssselect/xpath.py | 11 +++++++++++
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
index b0ab86a..4436e5d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
/dist
/docs/_build
/.coverage
-.idea
\ No newline at end of file
+.idea
+/venv
+*.vscode
\ No newline at end of file
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 9bb039c..61358d3 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -400,8 +400,8 @@ def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == 'IDENT' or peek == ('DELIM', '*'):
- if peek.type == 'IDENT':
+ if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '^'):
+ if peek.type == 'IDENT' or peek == ('DELIM', '^'):
namespace = stream.next().value
else:
stream.next()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 22cd029..4e5f85a 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -187,6 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
+ # no prefix if css immediate children (example: css "^ > div" to xpath "./div")
+ child_re = r'^[ \t\r\n\f]*\^[ \t\r\n\f]*>'
+ if re.match(child_re, css):
+ prefix = ''
+ # prefix = 'child::'
+ # css = re.sub(child_re, '', css)
+ # print('*' * 50)
+ # print(css)
return ' | '.join(self.selector_to_xpath(selector, prefix,
translate_pseudo_elements=True)
for selector in parse(css))
@@ -332,6 +340,9 @@ def xpath_element(self, selector):
if not element:
element = '*'
safe = True
+ if element == '^':
+ element = '.'
+ safe = True
else:
safe = is_safe_name(element)
if self.lower_case_element_names:
From 053f2669eef8c7942346ee7ee101777f0e267cbc Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 00:27:52 +0300
Subject: [PATCH 099/192] Implement CSS immediate children
---
cssselect/parser.py | 4 ++--
cssselect/xpath.py | 25 ++++++++++++++-----------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 61358d3..11e9ff5 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -400,8 +400,8 @@ def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '^'):
- if peek.type == 'IDENT' or peek == ('DELIM', '^'):
+ if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'):
+ if peek.type == 'IDENT' or peek == ('DELIM', '<'):
namespace = stream.next().value
else:
stream.next()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 4e5f85a..e71d21c 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -187,14 +187,6 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
- # no prefix if css immediate children (example: css "^ > div" to xpath "./div")
- child_re = r'^[ \t\r\n\f]*\^[ \t\r\n\f]*>'
- if re.match(child_re, css):
- prefix = ''
- # prefix = 'child::'
- # css = re.sub(child_re, '', css)
- # print('*' * 50)
- # print(css)
return ' | '.join(self.selector_to_xpath(selector, prefix,
translate_pseudo_elements=True)
for selector in parse(css))
@@ -228,7 +220,18 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
- return (prefix or '') + _unicode(xpath)
+
+ unicode_xpath = _unicode(xpath)
+ # CSS immediate children (CSS "<> div" to XPath "child::div" or "./div")
+ # Works only at the start of a selector
+ # Needed to get immediate children of a processed selector in Scrapy
+ # product = response.css('.product')
+ # name = product.css('<> div')
+ child_re = r'^[ \t\r\n\f]*\<[ \t\r\n\f]*\/'
+ if re.match(child_re, unicode_xpath):
+ prefix = 'child::'
+ unicode_xpath = re.sub(child_re, '', unicode_xpath)
+ return (prefix or '') + unicode_xpath
def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
@@ -340,8 +343,8 @@ def xpath_element(self, selector):
if not element:
element = '*'
safe = True
- if element == '^':
- element = '.'
+ if element == '<':
+ element = '<'
safe = True
else:
safe = is_safe_name(element)
From 9ec22422722561060bca1d7805556c77681d7b18 Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 01:38:13 +0300
Subject: [PATCH 100/192] Add tests and errors handling.
---
cssselect/parser.py | 15 ++++++++++++++-
tests/test_cssselect.py | 31 +++++++++++++++++++++++++++++--
2 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 11e9ff5..5d9360c 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -401,8 +401,21 @@ def parse_simple_selector(stream, inside_negation=False):
selector_start = len(stream.used)
peek = stream.peek()
if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'):
- if peek.type == 'IDENT' or peek == ('DELIM', '<'):
+ if peek.type == 'IDENT':
namespace = stream.next().value
+ elif peek == ('DELIM', '<'):
+ if not (len(stream.used) == 0 or
+ (len(stream.used) == 1 and stream.used[0].type == 'S')):
+ raise SelectorSyntaxError(
+ 'Got immediate child pseudo-element "<>" not at the start of a selector'
+ )
+ namespace = stream.next().value
+ stream.skip_whitespace()
+ peek = stream.peek()
+ if not peek == ('DELIM', '>'):
+ raise SelectorSyntaxError(
+ 'Got incomplete immediate child pseudo-element "<>" (no ">")'
+ )
else:
stream.next()
namespace = None
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index f01aa7f..49bb7ba 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -42,7 +42,7 @@ class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
tokens = [
_unicode(item) for item in tokenize(
- u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
+ u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)<'))]
assert tokens == [
u(""),
"",
@@ -61,7 +61,8 @@ def test_tokenizer(self):
"",
"",
"",
- "",
+ "<' at 42>",
+ "",
]
def test_parser(self):
@@ -146,6 +147,18 @@ def parse_many(first, *others):
'Negation[Element[div]:not(Class[Element[div].foo])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
+ # assert parse_many('<') == ['Element[<]']
+ # assert parse_many('<> foo') == [
+ # 'CombinedSelector[Element[<] > Element[foo]]'
+ # ]
+ # assert parse_many('<> foo bar > div') == [
+ # 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] '
+ # ' Element[bar]] > Element[div]]'
+ # ]
+ # assert parse_many('<> #foo #bar') == [
+ # 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] '
+ # ' Hash[Element[*]#bar]]'
+ # ]
def test_pseudo_elements(self):
def parse_pseudo(css):
@@ -310,6 +323,12 @@ def get_error(css):
"Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
"Got nested :not()")
+ assert get_error('<> div <> header') == (
+ 'Got immediate child pseudo-element "<>" not at the start of a selector'
+ )
+ assert get_error('< div p') == (
+ 'Got incomplete immediate child pseudo-element "<>" (no ">")')
+ assert get_error('> div p') == ("Expected selector, got ' at 0>")
def test_translation(self):
def xpath(css):
@@ -483,6 +502,8 @@ def test_quoting(self):
'''descendant-or-self::*[@aval = '"']''')
assert css_to_xpath('*[aval=\'"""\']') == (
'''descendant-or-self::*[@aval = '"""']''')
+ assert css_to_xpath('<> div[dataimg=""]') == (
+ "child::div[@dataimg = '']")
def test_unicode_escapes(self):
# \22 == '"' \20 == ' '
@@ -672,6 +693,11 @@ def pcss(main, *selectors, **kwargs):
assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
'second-li', 'li-div']
assert pcss(':lang("e")', html_only=True) == []
+ assert pcss('<> div') == []
+ assert pcss('<> body') == ['nil']
+ assert pcss('<> body > div') == ['outer-div', 'foobar-div']
+ assert pcss('<> head') == ['nil']
+ assert pcss('<> html') == []
# --- nth-* and nth-last-* -------------------------------------
@@ -853,6 +879,7 @@ def count(selector):
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
+ assert count('<> div') == 1
XMLLANG_IDS = '''
From 7c697daf87f1e7cea3f48a145b1cb7a5458750ad Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 01:45:25 +0300
Subject: [PATCH 101/192] Add more tests.
---
.gitignore | 2 +-
tests/test_cssselect.py | 23 +++++++++++------------
2 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4436e5d..5c47adf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,4 @@
/.coverage
.idea
/venv
-*.vscode
\ No newline at end of file
+*.vscode
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 49bb7ba..f68893b 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -147,18 +147,17 @@ def parse_many(first, *others):
'Negation[Element[div]:not(Class[Element[div].foo])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
- # assert parse_many('<') == ['Element[<]']
- # assert parse_many('<> foo') == [
- # 'CombinedSelector[Element[<] > Element[foo]]'
- # ]
- # assert parse_many('<> foo bar > div') == [
- # 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] '
- # ' Element[bar]] > Element[div]]'
- # ]
- # assert parse_many('<> #foo #bar') == [
- # 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] '
- # ' Hash[Element[*]#bar]]'
- # ]
+ assert parse_many('<> foo') == [
+ 'CombinedSelector[Element[<] > Element[foo]]'
+ ]
+ assert parse_many('<> foo bar > div') == [
+ 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] '
+ ' Element[bar]] > Element[div]]'
+ ]
+ assert parse_many('<> #foo #bar') == [
+ 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] '
+ ' Hash[Element[*]#bar]]'
+ ]
def test_pseudo_elements(self):
def parse_pseudo(css):
From 37b3c0ffcd1db16ca240487f1e0f8bb716a3385c Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 02:12:07 +0300
Subject: [PATCH 102/192] Code review fixes.
---
.gitignore | 4 +---
cssselect/xpath.py | 1 -
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5c47adf..b0ab86a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,4 @@
/dist
/docs/_build
/.coverage
-.idea
-/venv
-*.vscode
+.idea
\ No newline at end of file
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index e71d21c..e3843b5 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -344,7 +344,6 @@ def xpath_element(self, selector):
element = '*'
safe = True
if element == '<':
- element = '<'
safe = True
else:
safe = is_safe_name(element)
From 920b3d644fa62c95db40141c5322d38e98bbe8d3 Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 17:57:08 +0300
Subject: [PATCH 103/192] Change "<>" selector to ":scope"
---
cssselect/parser.py | 22 ++++++++-------------
cssselect/xpath.py | 23 +++++++++-------------
tests/test_cssselect.py | 43 ++++++++++++++++++++++-------------------
3 files changed, 40 insertions(+), 48 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 5d9360c..99b25a3 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -400,22 +400,9 @@ def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'):
+ if peek.type == 'IDENT' or peek == ('DELIM', '*'):
if peek.type == 'IDENT':
namespace = stream.next().value
- elif peek == ('DELIM', '<'):
- if not (len(stream.used) == 0 or
- (len(stream.used) == 1 and stream.used[0].type == 'S')):
- raise SelectorSyntaxError(
- 'Got immediate child pseudo-element "<>" not at the start of a selector'
- )
- namespace = stream.next().value
- stream.skip_whitespace()
- peek = stream.peek()
- if not peek == ('DELIM', '>'):
- raise SelectorSyntaxError(
- 'Got incomplete immediate child pseudo-element "<>" (no ">")'
- )
else:
stream.next()
namespace = None
@@ -465,6 +452,13 @@ def parse_simple_selector(stream, inside_negation=False):
continue
if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
+ if result.ident == 'scope':
+ if not (len(stream.used) == 2 or
+ (len(stream.used) == 3
+ and stream.used[0].type == 'S')):
+ raise SelectorSyntaxError(
+ 'Got immediate child pseudo-element ":scope" '
+ 'not at the start of a selector')
continue
stream.next()
stream.skip_whitespace()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index e3843b5..d5bbf72 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -220,18 +220,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
-
- unicode_xpath = _unicode(xpath)
- # CSS immediate children (CSS "<> div" to XPath "child::div" or "./div")
- # Works only at the start of a selector
- # Needed to get immediate children of a processed selector in Scrapy
- # product = response.css('.product')
- # name = product.css('<> div')
- child_re = r'^[ \t\r\n\f]*\<[ \t\r\n\f]*\/'
- if re.match(child_re, unicode_xpath):
- prefix = 'child::'
- unicode_xpath = re.sub(child_re, '', unicode_xpath)
- return (prefix or '') + unicode_xpath
+ return (prefix or '') + _unicode(xpath)
def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
@@ -343,8 +332,6 @@ def xpath_element(self, selector):
if not element:
element = '*'
safe = True
- if element == '<':
- safe = True
else:
safe = is_safe_name(element)
if self.lower_case_element_names:
@@ -554,6 +541,14 @@ def xpath_lang_function(self, xpath, function):
def xpath_root_pseudo(self, xpath):
return xpath.add_condition("not(parent::*)")
+ # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
+ # Works only at the start of a selector
+ # Needed to get immediate children of a processed selector in Scrapy
+ # for product in response.css('.product'):
+ # description = product.css(':scope > div::text').get()
+ def xpath_scope_pseudo(self, xpath):
+ return xpath.add_condition("1")
+
def xpath_first_child_pseudo(self, xpath):
return xpath.add_condition('count(preceding-sibling::*) = 0')
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index f68893b..0f2a836 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -147,18 +147,19 @@ def parse_many(first, *others):
'Negation[Element[div]:not(Class[Element[div].foo])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
- assert parse_many('<> foo') == [
- 'CombinedSelector[Element[<] > Element[foo]]'
+ assert parse_many(':scope > foo') == [
+ 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
]
- assert parse_many('<> foo bar > div') == [
- 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] '
- ' Element[bar]] > Element[div]]'
+ assert parse_many(':scope > foo bar > div') == [
+ 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
+ 'Element[foo]] Element[bar]] > Element[div]]'
]
- assert parse_many('<> #foo #bar') == [
- 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] '
- ' Hash[Element[*]#bar]]'
+ assert parse_many(':scope > #foo #bar') == [
+ 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
+ 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]'
]
+ # TODO ADD TESTS
def test_pseudo_elements(self):
def parse_pseudo(css):
result = []
@@ -179,6 +180,7 @@ def parse_one(css):
assert parse_one('foo') == ('Element[foo]', None)
assert parse_one('*') == ('Element[*]', None)
assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
+ assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None)
# Special cases for CSS 2.1 pseudo-elements
assert parse_one(':BEfore') == ('Element[*]', 'before')
@@ -322,11 +324,9 @@ def get_error(css):
"Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
"Got nested :not()")
- assert get_error('<> div <> header') == (
- 'Got immediate child pseudo-element "<>" not at the start of a selector'
+ assert get_error(':scope > div :scope header') == (
+ 'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
- assert get_error('< div p') == (
- 'Got incomplete immediate child pseudo-element "<>" (no ">")')
assert get_error('> div p') == ("Expected selector, got ' at 0>")
def test_translation(self):
@@ -501,8 +501,8 @@ def test_quoting(self):
'''descendant-or-self::*[@aval = '"']''')
assert css_to_xpath('*[aval=\'"""\']') == (
'''descendant-or-self::*[@aval = '"""']''')
- assert css_to_xpath('<> div[dataimg=""]') == (
- "child::div[@dataimg = '']")
+ assert css_to_xpath(':scope > div[dataimg=""]') == (
+ "descendant-or-self::*[1]/div[@dataimg = '']")
def test_unicode_escapes(self):
# \22 == '"' \20 == ' '
@@ -580,6 +580,7 @@ def xpath(css):
assert xpath('::attr-href') == "descendant-or-self::*/@href"
assert xpath('p img::attr(src)') == (
"descendant-or-self::p/descendant-or-self::*/img/@src")
+ assert xpath(':scope') == "descendant-or-self::*[1]"
def test_series(self):
def series(css):
@@ -692,11 +693,11 @@ def pcss(main, *selectors, **kwargs):
assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
'second-li', 'li-div']
assert pcss(':lang("e")', html_only=True) == []
- assert pcss('<> div') == []
- assert pcss('<> body') == ['nil']
- assert pcss('<> body > div') == ['outer-div', 'foobar-div']
- assert pcss('<> head') == ['nil']
- assert pcss('<> html') == []
+ assert pcss(':scope > div') == []
+ assert pcss(':scope body') == ['nil']
+ assert pcss(':scope body > div') == ['outer-div', 'foobar-div']
+ assert pcss(':scope head') == ['nil']
+ assert pcss(':scope html') == []
# --- nth-* and nth-last-* -------------------------------------
@@ -878,7 +879,9 @@ def count(selector):
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
- assert count('<> div') == 1
+ assert count(':scope > div') == 1
+ assert count(':scope > div > div[class=dialog]') == 1
+ assert count(':scope > div div') == 242
XMLLANG_IDS = '''
From 97ab897ce8995662517d45fe417f63e1a1dcc73b Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 18:09:43 +0300
Subject: [PATCH 104/192] Add more tests.
---
tests/test_cssselect.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 0f2a836..80dc687 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -150,6 +150,9 @@ def parse_many(first, *others):
assert parse_many(':scope > foo') == [
'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
]
+ assert parse_many(' :scope > foo') == [
+ 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]'
+ ]
assert parse_many(':scope > foo bar > div') == [
'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > '
'Element[foo]] Element[bar]] > Element[div]]'
@@ -205,10 +208,14 @@ def parse_one(css):
'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
'selection')
- parse_pseudo('foo:before, bar, baz:after') == [
- ('Element[foo]', 'before'),
- ('Element[bar]', None),
- ('Element[baz]', 'after')]
+ assert parse_pseudo(':scope > div, foo bar') == [
+ ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
+ ('CombinedSelector[Element[foo] Element[bar]]', None)
+ ]
+ assert parse_pseudo('foo:before, bar, baz:after') == [
+ ('Element[foo]', 'before'), ('Element[bar]', None),
+ ('Element[baz]', 'after')
+ ]
# Special cases for CSS 2.1 pseudo-elements are ignored by default
for pseudo in ('after', 'before', 'first-line', 'first-letter'):
From 8cc4a266f4851e3b2502e8e740af9a9af8771ac0 Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Sun, 16 Jun 2019 18:23:00 +0300
Subject: [PATCH 105/192] Lint
---
cssselect/parser.py | 3 ++-
tests/test_cssselect.py | 9 +++++----
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 99b25a3..bcd1854 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -452,7 +452,8 @@ def parse_simple_selector(stream, inside_negation=False):
continue
if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
- if result.ident == 'scope':
+ if result.ident == 'scope' and repr(
+ result) == 'Pseudo[Element[*]:scope]':
if not (len(stream.used) == 2 or
(len(stream.used) == 3
and stream.used[0].type == 'S')):
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 80dc687..5c97f30 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -42,7 +42,7 @@ class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
tokens = [
_unicode(item) for item in tokenize(
- u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)<'))]
+ u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
assert tokens == [
u(""),
"",
@@ -61,8 +61,7 @@ def test_tokenizer(self):
"",
"",
"",
- "<' at 42>",
- "",
+ "",
]
def test_parser(self):
@@ -162,7 +161,6 @@ def parse_many(first, *others):
'Hash[Element[*]#foo]] Hash[Element[*]#bar]]'
]
- # TODO ADD TESTS
def test_pseudo_elements(self):
def parse_pseudo(css):
result = []
@@ -334,6 +332,9 @@ def get_error(css):
assert get_error(':scope > div :scope header') == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
+ assert get_error('div :scope header') == (
+ 'Got immediate child pseudo-element ":scope" not at the start of a selector'
+ )
assert get_error('> div p') == ("Expected selector, got ' at 0>")
def test_translation(self):
From 270f11835e81eba71441e53f4a555405df2e2a0c Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Mon, 17 Jun 2019 14:53:19 +0300
Subject: [PATCH 106/192] Improve test coverage.
---
cssselect/parser.py | 3 +--
tests/test_cssselect.py | 12 +++++++++++-
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index bcd1854..3be71bb 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -452,8 +452,7 @@ def parse_simple_selector(stream, inside_negation=False):
continue
if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
- if result.ident == 'scope' and repr(
- result) == 'Pseudo[Element[*]:scope]':
+ if result.__repr__() == 'Pseudo[Element[*]:scope]':
if not (len(stream.used) == 2 or
(len(stream.used) == 3
and stream.used[0].type == 'S')):
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 5c97f30..8b562da 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -178,6 +178,12 @@ def parse_one(css):
assert len(result) == 1
return result[0]
+ def test_pseudo_repr(css):
+ result = parse(css)
+ assert len(result) == 1
+ selector = result[0]
+ return selector.parsed_tree.__repr__()
+
assert parse_one('foo') == ('Element[foo]', None)
assert parse_one('*') == ('Element[*]', None)
assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
@@ -205,7 +211,6 @@ def parse_one(css):
'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
'selection')
-
assert parse_pseudo(':scope > div, foo bar') == [
('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
('CombinedSelector[Element[foo] Element[bar]]', None)
@@ -230,6 +235,11 @@ def parse_one(css):
self.assertRaises(ExpressionError, tr.selector_to_xpath, selector,
translate_pseudo_elements=True)
+ # Special test for the unicode symbols and ':scope' element if check
+ # Errors if use repr() instead of __repr__()
+ assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]'
+ assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]'
+
def test_specificity(self):
def specificity(css):
selectors = parse(css)
From 4b966853c84f44c8fb079213337e36d4992dd7f0 Mon Sep 17 00:00:00 2001
From: sortafreel
Date: Tue, 18 Jun 2019 21:39:06 +0300
Subject: [PATCH 107/192] Edit docs.
---
docs/index.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/docs/index.rst b/docs/index.rst
index fe473f7..c7f0c1a 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -108,8 +108,10 @@ in the Level 3 specification:
* ``:not()`` accepts a *sequence of simple selectors*, not just single
*simple selector*. For example, ``:not(a.important[rel])`` is allowed,
even though the negation contains 3 *simple selectors*.
+* ``:scope`` allows to access immediate children of a selector: ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must be used at the start of a selector. Simplified version of `level 4 reference`_.
.. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
+.. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope
..
The following claim was copied from lxml:
From 81c8dab8a17e389be9390260e8e22b5c0ef4df4c Mon Sep 17 00:00:00 2001
From: Simon Potter
Date: Thu, 11 Jul 2019 19:23:43 +1200
Subject: [PATCH 108/192] Parse |ident as ident. No longer an error.
---
cssselect/parser.py | 3 +++
tests/test_cssselect.py | 1 +
2 files changed, 4 insertions(+)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 3be71bb..b96d26a 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -430,6 +430,9 @@ def parse_simple_selector(stream, inside_negation=False):
elif peek == ('DELIM', '.'):
stream.next()
result = Class(result, stream.next_ident())
+ elif peek == ('DELIM', '|'):
+ stream.next()
+ result = Element(None, stream.next_ident())
elif peek == ('DELIM', '['):
stream.next()
result = parse_attrib(result, stream)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 8b562da..d2432ab 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -81,6 +81,7 @@ def parse_many(first, *others):
assert parse_many('*') == ['Element[*]']
assert parse_many('*|*') == ['Element[*]']
assert parse_many('*|foo') == ['Element[foo]']
+ assert parse_many('|foo') == ['Element[foo]']
assert parse_many('foo|*') == ['Element[foo|*]']
assert parse_many('foo|bar') == ['Element[foo|bar]']
# This will never match, but it is valid:
From 6d758551c700c784c690ff59c8ccf679c573d506 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Fri, 9 Aug 2019 10:31:51 +0200
Subject: [PATCH 109/192] Enforce a working lxml version on the Python 3.4 CI
environment
---
.travis.yml | 3 +--
tests/requirements.txt | 5 +++++
tox.ini | 4 +---
3 files changed, 7 insertions(+), 5 deletions(-)
create mode 100644 tests/requirements.txt
diff --git a/.travis.yml b/.travis.yml
index b76297f..bfc557f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,8 +5,7 @@ python:
- '3.5'
- '3.6'
install:
-- pip install lxml -e .
-- pip install -U codecov pytest-cov
+- pip install -r tests/requirements.txt -e .
script:
- py.test --cov-report term --cov=cssselect
after_success:
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..5232e84
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,5 @@
+codecov
+lxml;python_version!="3.4"
+lxml<=4.3.5;python_version=="3.4"
+pytest
+pytest-cov
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
index 194490a..49a1dda 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,9 +3,7 @@ envlist = py27, py34, py35, py36
[testenv]
deps=
- lxml
- pytest<3
- pytest-cov
+ -r tests/requirements.txt
commands =
py.test --cov-report term --cov=cssselect
From cff38f1f00972b9851ff64fa8380022aa0d76b9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Thu, 1 Aug 2019 18:30:00 +0200
Subject: [PATCH 110/192] Cover cssselect 1.1.0 in the CHANGES file
---
CHANGES | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/CHANGES b/CHANGES
index 0a0e137..a6c5233 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,20 @@
Changelog
=========
+Version 1.1.0
+-------------
+
+Released on 2019-08-09.
+
+* Support for the ``:scope`` selector, which allows to access immediate
+ children of a selector.
+
+* Support for the ``|E`` syntax for type selectors without a namespace.
+
+* A new selector method, ``canonical``, returns the CSS expression of the
+ selector, as a string.
+
+
Version 1.0.3
-------------
@@ -8,6 +22,7 @@ Released on 2017-12-27.
* Fix artifact uploads to pypi
+
Version 1.0.2
-------------
@@ -17,6 +32,7 @@ Released on 2017-12-26.
* Fix deprecation warning in Python 3.6.
* Minor cleanups.
+
Version 1.0.1
-------------
@@ -25,6 +41,7 @@ Released on 2017-01-10.
* Add support for Python 3.6.
* Documentation hosted `on Read the Docs `_
+
Version 1.0.0
-------------
From 518e3e1babcc3db38ab8afe948c05a4799693108 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Fri, 9 Aug 2019 11:17:29 +0200
Subject: [PATCH 111/192] =?UTF-8?q?Bump=20version:=201.0.3=20=E2=86=92=201?=
=?UTF-8?q?.1.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.bumpversion.cfg | 2 +-
cssselect/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index acb5a66..122d3d4 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.3
+current_version = 1.1.0
commit = True
tag = True
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index e9f9ce1..b41cef9 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -18,5 +18,5 @@
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.0.3'
+VERSION = '1.1.0'
__version__ = VERSION
From 91822333b7a2ddbb1f11b624b304c2563be2d0ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Tue, 17 Sep 2019 10:58:16 +0200
Subject: [PATCH 112/192] Package tests
Fixes #92
---
MANIFEST.in | 1 +
1 file changed, 1 insertion(+)
diff --git a/MANIFEST.in b/MANIFEST.in
index e98d213..a367dc0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc
recursive-include docs *
+recursive-include tests *
prune docs/_build
From c909f051d0034171c0658e25aa3ee4d1b745f8a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Thu, 19 Sep 2019 11:50:53 +0200
Subject: [PATCH 113/192] Support :scope after a comma delimiter
---
cssselect/parser.py | 9 +++++++--
tests/test_cssselect.py | 10 +++++++++-
tox.ini | 2 +-
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 7125030..0185cb2 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -517,8 +517,13 @@ def parse_simple_selector(stream, inside_negation=False):
result = Pseudo(result, ident)
if result.__repr__() == 'Pseudo[Element[*]:scope]':
if not (len(stream.used) == 2 or
- (len(stream.used) == 3
- and stream.used[0].type == 'S')):
+ (len(stream.used) == 3 and
+ stream.used[0].type == 'S') or
+ (len(stream.used) >= 3 and
+ stream.used[-3].is_delim(',')) or
+ (len(stream.used) >= 4 and
+ stream.used[-3].type == 'S' and
+ stream.used[-4].is_delim(','))):
raise SelectorSyntaxError(
'Got immediate child pseudo-element ":scope" '
'not at the start of a selector')
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 320736c..b81f8c5 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -214,7 +214,15 @@ def test_pseudo_repr(css):
'selection')
assert parse_pseudo(':scope > div, foo bar') == [
('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
- ('CombinedSelector[Element[foo] Element[bar]]', None)
+ ('CombinedSelector[Element[foo] Element[bar]]', None),
+ ]
+ assert parse_pseudo('foo bar, :scope > div') == [
+ ('CombinedSelector[Element[foo] Element[bar]]', None),
+ ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
+ ]
+ assert parse_pseudo('foo bar,:scope > div') == [
+ ('CombinedSelector[Element[foo] Element[bar]]', None),
+ ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None),
]
assert parse_pseudo('foo:before, bar, baz:after') == [
('Element[foo]', 'before'), ('Element[bar]', None),
diff --git a/tox.ini b/tox.ini
index 49a1dda..6a09b07 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,4 +6,4 @@ deps=
-r tests/requirements.txt
commands =
- py.test --cov-report term --cov=cssselect
+ py.test --cov-report term --cov=cssselect {posargs}
From 928ad922ddf3701bef5dc178a485b0d0246b784e Mon Sep 17 00:00:00 2001
From: Akshita Agarwal
Date: Wed, 16 Oct 2019 17:46:06 +0530
Subject: [PATCH 114/192] add 3.7 version after running tests
---
setup.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/setup.py b/setup.py
index 243927d..de7128d 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,7 @@
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7'
],
**extra_kwargs
)
From 24eb0952eaf4c1f1ed86c123840e606959962953 Mon Sep 17 00:00:00 2001
From: Akshita Agarwal
Date: Wed, 16 Oct 2019 21:14:46 +0530
Subject: [PATCH 115/192] address comments
---
.travis.yml | 3 ++-
tox.ini | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index bfc557f..69ecf93 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@ python:
- '3.4'
- '3.5'
- '3.6'
+- '3.7'
install:
- pip install -r tests/requirements.txt -e .
script:
@@ -20,4 +21,4 @@ deploy:
on:
tags: true
repo: scrapy/cssselect
- condition: "$TRAVIS_PYTHON_VERSION == '3.6'"
+ condition: "$TRAVIS_PYTHON_VERSION == '3.7'"
diff --git a/tox.ini b/tox.ini
index 49a1dda..32136a0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py27, py34, py35, py36
+envlist = py27, py34, py35, py36, py37
[testenv]
deps=
From c05327240d73beda2132a1d3fcf0d33317738a58 Mon Sep 17 00:00:00 2001
From: whybin <31753349+whybin@users.noreply.github.com>
Date: Thu, 31 May 2018 16:24:45 -0700
Subject: [PATCH 116/192] Add XPath tests for operator precedence
---
tests/test_cssselect.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 320736c..94da2e1 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -622,6 +622,11 @@ def xpath_attr_href_simple_pseudo_element(self, xpath):
other = XPathExpr('@href', '', )
return xpath.join('/', other)
+ # pseudo-element:
+ # used to demonstrate operator precedence
+ def xpath_first_or_second_pseudo(self, xpath):
+ return xpath.add_condition("@id = 'first' or @id = 'second'")
+
def xpath(css):
return _unicode(CustomTranslator().css_to_xpath(css))
@@ -633,6 +638,25 @@ def xpath(css):
assert xpath('p img::attr(src)') == (
"descendant-or-self::p/descendant-or-self::*/img/@src")
assert xpath(':scope') == "descendant-or-self::*[1]"
+ assert xpath(':first-or-second[href]') == (
+ "descendant-or-self::*[(@id = 'first' or @id = 'second') "
+ "and (@href)]")
+
+ assert str(XPathExpr('', '', condition='@href')) == "[(@href)]"
+
+ document = etree.fromstring(OPERATOR_PRECEDENCE_IDS)
+ sort_key = dict(
+ (el, count) for count, el in enumerate(document.getiterator())
+ ).__getitem__
+ def operator_id(selector):
+ xpath = CustomTranslator().css_to_xpath(selector)
+ items = document.xpath(xpath)
+ items.sort(key=sort_key)
+ return [element.get('id', 'nil') for element in items]
+
+ assert operator_id(':first-or-second') == ['first', 'second']
+ assert operator_id(':first-or-second[href]') == ['second']
+ assert operator_id('[href]:first-or-second') == ['second']
def test_series(self):
def series(css):
@@ -935,6 +959,14 @@ def count(selector):
assert count(':scope > div > div[class=dialog]') == 1
assert count(':scope > div div') == 242
+OPERATOR_PRECEDENCE_IDS = '''
+
+
+
+
+
+'''
+
XMLLANG_IDS = '''
a
From 754b701bc26dcd239ae1d9813774f75f78ed2dc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Mon, 21 Oct 2019 16:43:10 +0200
Subject: [PATCH 117/192] Use parentheses when joining with AND
potentially-complex expressions
---
cssselect/xpath.py | 17 +++++++++++------
tests/test_cssselect.py | 12 ++++++------
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index db50c77..14e9b80 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -56,7 +56,7 @@ def __repr__(self):
def add_condition(self, condition):
if self.condition:
- self.condition = '%s and (%s)' % (self.condition, condition)
+ self.condition = '(%s) and (%s)' % (self.condition, condition)
else:
self.condition = condition
return self
@@ -457,19 +457,19 @@ def xpath_nth_child_function(self, xpath, function, last=False,
if a == 0:
return xpath.add_condition('%s = %s' % (siblings_count, b_min_1))
- expr = []
+ expressions = []
if a > 0:
# siblings count, an+b-1, is always >= 0,
# so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
# therefore, the predicate is only interesting if (b-1)>0
if b_min_1 > 0:
- expr.append('%s >= %s' % (siblings_count, b_min_1))
+ expressions.append('%s >= %s' % (siblings_count, b_min_1))
else:
# if a<0, and (b-1)<0, no "n" satisfies this,
# this is tested above as an early exist condition
# otherwise,
- expr.append('%s <= %s' % (siblings_count, b_min_1))
+ expressions.append('%s <= %s' % (siblings_count, b_min_1))
# operations modulo 1 or -1 are simpler, one only needs to verify:
#
@@ -495,9 +495,14 @@ def xpath_nth_child_function(self, xpath, function, last=False,
b_neg = '+%s' % b_neg
left = '(%s %s)' % (left, b_neg)
- expr.append('%s mod %s = 0' % (left, a))
+ expressions.append('%s mod %s = 0' % (left, a))
- xpath.add_condition(' and '.join(expr))
+ if len(expressions) > 1:
+ template = '(%s)'
+ else:
+ template = '%s'
+ xpath.add_condition(' and '.join(template % expression
+ for expression in expressions))
return xpath
def xpath_nth_last_child_function(self, xpath, function):
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 94da2e1..d6969f2 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -428,8 +428,8 @@ def xpath(css):
"e[count(preceding-sibling::*) <= 0]")
assert xpath('e:nth-child(3n+2)') == (
- "e[count(preceding-sibling::*) >= 1 and "
- "(count(preceding-sibling::*) +2) mod 3 = 0]")
+ "e[(count(preceding-sibling::*) >= 1) and "
+ "((count(preceding-sibling::*) +2) mod 3 = 0)]")
assert xpath('e:nth-child(3n-2)') == (
"e[count(preceding-sibling::*) mod 3 = 0]")
assert xpath('e:nth-child(-n+6)') == (
@@ -442,8 +442,8 @@ def xpath(css):
assert xpath('e:nth-last-child(2n+1)') == (
"e[count(following-sibling::*) mod 2 = 0]")
assert xpath('e:nth-last-child(2n+2)') == (
- "e[count(following-sibling::*) >= 1 and "
- "(count(following-sibling::*) +1) mod 2 = 0]")
+ "e[(count(following-sibling::*) >= 1) and "
+ "((count(following-sibling::*) +1) mod 2 = 0)]")
assert xpath('e:nth-last-child(3n+1)') == (
"e[count(following-sibling::*) mod 3 = 0]")
# represents the two last e elements
@@ -497,7 +497,7 @@ def xpath(css):
assert xpath('e > f') == (
"e/f")
assert xpath('e + f') == (
- "e/following-sibling::*[name() = 'f' and (position() = 1)]")
+ "e/following-sibling::*[(name() = 'f') and (position() = 1)]")
assert xpath('e ~ f') == (
"e/following-sibling::f")
assert xpath('e ~ f:nth-child(3)') == (
@@ -642,7 +642,7 @@ def xpath(css):
"descendant-or-self::*[(@id = 'first' or @id = 'second') "
"and (@href)]")
- assert str(XPathExpr('', '', condition='@href')) == "[(@href)]"
+ assert str(XPathExpr('', '', condition='@href')) == "[@href]"
document = etree.fromstring(OPERATOR_PRECEDENCE_IDS)
sort_key = dict(
From dde3b5e68ba2e49ec4552a75a805536c7dcdc896 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Tue, 22 Oct 2019 16:55:17 +0200
Subject: [PATCH 118/192] Enable nitpicky Sphinx warnings, fix issues and fail
on new issues
---
.travis.yml | 24 ++++++++++++++++++------
CHANGES | 4 ++--
docs/conf.py | 5 +++++
tox.ini | 10 +++++++++-
4 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 69ecf93..bd043e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,28 @@
language: python
-python:
-- '2.7'
-- '3.4'
-- '3.5'
-- '3.6'
-- '3.7'
+matrix:
+ include:
+ - python: 2.7
+ env: TOXENV=py27
+ - python: 3.4
+ env: TOXENV=py34
+ - python: 3.5
+ env: TOXENV=py35
+ - python: 3.6
+ env: TOXENV=py36
+ - python: 3.7
+ env: TOXENV=py37
+ - python: 3.7
+ env: TOXENV=docs
+
install:
- pip install -r tests/requirements.txt -e .
+
script:
- py.test --cov-report term --cov=cssselect
+
after_success:
- codecov
+
deploy:
provider: pypi
distributions: sdist bdist_wheel
diff --git a/CHANGES b/CHANGES
index a6c5233..4e7185f 100644
--- a/CHANGES
+++ b/CHANGES
@@ -199,14 +199,14 @@ Version 0.3
Released on 2012-04-17.
* Fix many parsing bugs.
-* Rename the :class:`Translator` class to :class:`GenericTranslator`
+* Rename the ``Translator`` class to :class:`GenericTranslator`
* There, implement ``:target``, ``:hover``, ``:focus``, ``:active``
``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
as never matching.
* Make a new HTML-specific ``HTMLTranslator`` subclass. There, implement
``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
as appropriate for HTML, with all links "not visited".
-* Remove the :func:`css_to_xpath` function. The translator classes
+* Remove the ``css_to_xpath`` function. The translator classes
are the new API.
* Add support for ``:contains()`` back, but case-sensitive. lxml will
override it to be case-insensitive for backward-compatibility.
diff --git a/docs/conf.py b/docs/conf.py
index aa897ef..86898c2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -248,3 +248,8 @@
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'http://docs.python.org/': None}
+
+
+# --- Nitpicking options ------------------------------------------------------
+
+nitpicky = True
diff --git a/tox.ini b/tox.ini
index 32136a0..1d50b69 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py27, py34, py35, py36, py37
+envlist = py27, py34, py35, py36, py37, docs
[testenv]
deps=
@@ -7,3 +7,11 @@ deps=
commands =
py.test --cov-report term --cov=cssselect
+
+[testenv:docs]
+changedir = docs
+deps =
+ sphinx
+ sphinx_rtd_theme
+commands =
+ sphinx-build -W -b html . {envtmpdir}/html
\ No newline at end of file
From f4a04641c6ff66aec9e4247be079b02942c81b65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Tue, 22 Oct 2019 18:13:54 +0200
Subject: [PATCH 119/192] Remove unused FunctionalPseudoElement methods
---
cssselect/parser.py | 8 --------
1 file changed, 8 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 7125030..b63e3df 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -165,18 +165,10 @@ def __repr__(self):
self.__class__.__name__, self.name,
[token.value for token in self.arguments])
- def argument_types(self):
- return [token.type for token in self.arguments]
-
def canonical(self):
args = ''.join(token.css() for token in self.arguments)
return '%s(%s)' % (self.name, args)
- def specificity(self):
- a, b, c = self.selector.specificity()
- b += 1
- return a, b, c
-
class Function(object):
"""
From b4efd7f0e61324be6fe8d25d268628490dda82ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Tue, 22 Oct 2019 18:37:35 +0200
Subject: [PATCH 120/192] Revert "Remove unused FunctionalPseudoElement
methods"
This reverts commit f4a04641c6ff66aec9e4247be079b02942c81b65.
---
cssselect/parser.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index b63e3df..7125030 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -165,10 +165,18 @@ def __repr__(self):
self.__class__.__name__, self.name,
[token.value for token in self.arguments])
+ def argument_types(self):
+ return [token.type for token in self.arguments]
+
def canonical(self):
args = ''.join(token.css() for token in self.arguments)
return '%s(%s)' % (self.name, args)
+ def specificity(self):
+ a, b, c = self.selector.specificity()
+ b += 1
+ return a, b, c
+
class Function(object):
"""
From 9c1fbc9c194c312077a9f82fab0b0c2e57a22e77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Tue, 22 Oct 2019 19:03:04 +0200
Subject: [PATCH 121/192] Cover all FunctionalPseudoElement methods with tests
---
tests/test_cssselect.py | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index 320736c..e4bd74e 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -258,6 +258,7 @@ def specificity(css):
assert specificity('[baz="4"]') == (0, 1, 0)
assert specificity('[baz^="4"]') == (0, 1, 0)
assert specificity('#lipsum') == (1, 0, 0)
+ assert specificity('::attr(name)') == (0, 0, 1)
assert specificity(':not(*)') == (0, 0, 0)
assert specificity(':not(foo)') == (0, 0, 1)
@@ -686,6 +687,31 @@ def langid(selector):
'first', 'second', 'third', 'fourth', 'eighth']
assert langid(':lang(es)') == []
+ def test_argument_types(self):
+
+ class CustomTranslator(GenericTranslator):
+
+ def __init__(self):
+ self.argument_types = []
+
+ def xpath_pseudo_element(self, xpath, function):
+ self.argument_types += function.argument_types()
+
+ def argument_types(css):
+ translator = CustomTranslator()
+ translator.css_to_xpath(css)
+ return translator.argument_types
+
+ mappings = (
+ ('', []),
+ ('ident', ['IDENT']),
+ ('"string"', ['STRING']),
+ ('1', ['NUMBER']),
+ )
+ for argument_string, argument_list in mappings:
+ css = '::pseudo_element({})'.format(argument_string)
+ assert argument_types(css) == argument_list
+
def test_select(self):
document = etree.fromstring(HTML_IDS)
sort_key = dict(
From 98019114d6b01f64cdcf38ad34abd5cc63e2accd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Thu, 24 Oct 2019 13:44:49 +0200
Subject: [PATCH 122/192] Add a PyLint CI pipeline
---
.travis.yml | 23 ++++++++++++++++-------
pylintrc | 33 +++++++++++++++++++++++++++++++++
tox.ini | 8 ++++++++
3 files changed, 57 insertions(+), 7 deletions(-)
create mode 100644 pylintrc
diff --git a/.travis.yml b/.travis.yml
index 69ecf93..63d3ef2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,19 @@
language: python
-python:
-- '2.7'
-- '3.4'
-- '3.5'
-- '3.6'
-- '3.7'
+sudo: false
+matrix:
+ include:
+ - python: 3.7
+ env: TOXENV=pylint
+ - python: 2.7
+ env: TOXENV=py27
+ - python: 3.4
+ env: TOXENV=py34
+ - python: 3.5
+ env: TOXENV=py35
+ - python: 3.6
+ env: TOXENV=py36
+ - python: 3.7
+ env: TOXENV=py37
install:
- pip install -r tests/requirements.txt -e .
script:
@@ -21,4 +30,4 @@ deploy:
on:
tags: true
repo: scrapy/cssselect
- condition: "$TRAVIS_PYTHON_VERSION == '3.7'"
+ condition: "$TOXENV == py37"
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 0000000..b6972ec
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,33 @@
+[MASTER]
+persistent=no
+
+[MESSAGES CONTROL]
+disable=assignment-from-no-return,
+ bad-continuation,
+ bad-whitespace,
+ c-extension-no-member,
+ consider-using-in,
+ fixme,
+ inconsistent-return-statements,
+ invalid-name,
+ missing-class-docstring,
+ missing-function-docstring,
+ missing-module-docstring,
+ multiple-imports,
+ no-else-return,
+ no-member,
+ no-self-use,
+ redefined-builtin,
+ redefined-outer-name,
+ too-few-public-methods,
+ too-many-arguments,
+ too-many-branches,
+ too-many-function-args,
+ too-many-lines,
+ too-many-public-methods,
+ too-many-statements,
+ undefined-variable,
+ unidiomatic-typecheck,
+ unused-argument,
+ unused-import,
+ useless-object-inheritance # Required for Python 2 support
diff --git a/tox.ini b/tox.ini
index 32136a0..430720a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,3 +7,11 @@ deps=
commands =
py.test --cov-report term --cov=cssselect
+
+[testenv:pylint]
+basepython = python3.7
+deps =
+ {[testenv]deps}
+ pylint
+commands =
+ pylint cssselect docs setup.py tests
From cc573dfd2f83266c35014682e0fd16727b24f2fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Mon, 11 Nov 2019 12:30:25 +0100
Subject: [PATCH 123/192] Also run tests from the documentation
---
docs/conftest.py | 16 ++++++++++++++++
tests/requirements.txt | 3 ++-
tox.ini | 2 +-
3 files changed, 19 insertions(+), 2 deletions(-)
create mode 100644 docs/conftest.py
diff --git a/docs/conftest.py b/docs/conftest.py
new file mode 100644
index 0000000..a98f9e5
--- /dev/null
+++ b/docs/conftest.py
@@ -0,0 +1,16 @@
+from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
+
+from sybil import Sybil
+from sybil.parsers.codeblock import CodeBlockParser
+from sybil.parsers.doctest import DocTestParser
+from sybil.parsers.skip import skip
+
+
+pytest_collect_file = Sybil(
+ parsers=[
+ DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
+ CodeBlockParser(future_imports=['print_function']),
+ skip,
+ ],
+ pattern='*.rst',
+).pytest()
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 5232e84..00f8f94 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,4 +2,5 @@ codecov
lxml;python_version!="3.4"
lxml<=4.3.5;python_version=="3.4"
pytest
-pytest-cov
\ No newline at end of file
+pytest-cov
+sybil
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
index 32136a0..ad6780d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,4 +6,4 @@ deps=
-r tests/requirements.txt
commands =
- py.test --cov-report term --cov=cssselect
+ py.test --cov-report term --cov=cssselect docs tests
From 05c0e76dc68ac0a62ef4ba47c1e1ace855053a1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Thu, 14 Nov 2019 12:24:42 +0100
Subject: [PATCH 124/192] Add bandit to CI
---
.bandit.yml | 2 ++
.travis.yml | 15 +++++++++------
tox.ini | 6 ++++++
3 files changed, 17 insertions(+), 6 deletions(-)
create mode 100644 .bandit.yml
diff --git a/.bandit.yml b/.bandit.yml
new file mode 100644
index 0000000..7fcde04
--- /dev/null
+++ b/.bandit.yml
@@ -0,0 +1,2 @@
+skips:
+- B101
diff --git a/.travis.yml b/.travis.yml
index 69ecf93..dbf5885 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,13 @@
language: python
-python:
-- '2.7'
-- '3.4'
-- '3.5'
-- '3.6'
-- '3.7'
+matrix:
+ include:
+ - env: TOXENV=security
+ python: 3.8
+ - python: 2.7
+ - python: 3.4
+ - python: 3.5
+ - python: 3.6
+ - python: 3.7
install:
- pip install -r tests/requirements.txt -e .
script:
diff --git a/tox.ini b/tox.ini
index 32136a0..4db8e7c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,3 +7,9 @@ deps=
commands =
py.test --cov-report term --cov=cssselect
+
+[testenv:security]
+deps =
+ bandit
+commands =
+ bandit -r -c .bandit.yml {posargs:cssselect}
From b26932d66cd629dbe491b31f2ac5462d2311db14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Sun, 11 Oct 2020 20:12:40 +0200
Subject: [PATCH 125/192] Fix class reference (#110)
---
cssselect/xpath.py | 2 +-
tests/requirements.txt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index db50c77..eb8be92 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -180,7 +180,7 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
This string is prepended to the XPath expression for each selector.
The default makes selectors scoped to the context node’s subtree.
:raises:
- :class:`SelectorSyntaxError` on invalid selectors,
+ :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
:class:`ExpressionError` on unknown/unsupported selectors,
including pseudo-elements.
:returns:
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 5232e84..000d5f2 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,5 +1,5 @@
codecov
lxml;python_version!="3.4"
lxml<=4.3.5;python_version=="3.4"
-pytest
+pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4
pytest-cov
\ No newline at end of file
From 163404122e5a05afe71dba59d808d7afd9726344 Mon Sep 17 00:00:00 2001
From: KOLANICH
Date: Sat, 20 Feb 2021 17:20:36 +0300
Subject: [PATCH 126/192] Added .editorconfig according to PEP 8
---
.editorconfig | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 .editorconfig
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..38558bf
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,11 @@
+root = true
+
+[*]
+charset = utf-8
+indent_style = space
+indent_size = 4
+insert_final_newline = true
+end_of_line = lf
+
+[*.{yml,yaml}]
+indent_size = 2
From 1f643a84d651ebd3075c2f61e30f46bf90451b46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Fri, 5 Mar 2021 16:50:19 +0100
Subject: [PATCH 127/192] =?UTF-8?q?Travis=20CI=20=E2=86=92=20GitHub=20Acti?=
=?UTF-8?q?ons?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/publish.yml | 31 ++++++++++++++++++++++++++
.github/workflows/tests.yml | 41 +++++++++++++++++++++++++++++++++++
.travis.yml | 24 --------------------
tox.ini | 2 +-
4 files changed, 73 insertions(+), 25 deletions(-)
create mode 100644 .github/workflows/publish.yml
create mode 100644 .github/workflows/tests.yml
delete mode 100644 .travis.yml
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..7c0f8d0
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,31 @@
+name: Publish
+on: [push]
+
+jobs:
+ publish:
+ runs-on: ubuntu-latest
+ if: startsWith(github.event.ref, 'refs/tags/')
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python 3.8
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3
+
+ - name: Check Tag
+ id: check-release-tag
+ run: |
+ if [[ ${{ github.event.ref }} =~ ^refs/tags/[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then
+ echo ::set-output name=release_tag::true
+ fi
+
+ - name: Publish to PyPI
+ if: steps.check-release-tag.outputs.release_tag == 'true'
+ run: |
+ pip install --upgrade setuptools wheel twine
+ python setup.py sdist bdist_wheel
+ export TWINE_USERNAME=__token__
+ export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }}
+ twine upload dist/*
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..1a0cf65
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,41 @@
+name: Tests
+on: [push, pull_request]
+
+jobs:
+ tests:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ include:
+ - python-version: 2.7
+ env:
+ TOXENV: py
+ - python-version: 3.4
+ env:
+ TOXENV: py
+ - python-version: 3.5
+ env:
+ TOXENV: py
+ - python-version: 3.6
+ env:
+ TOXENV: py
+ - python-version: 3.7
+ env:
+ TOXENV: py
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Run tests
+ env: ${{ matrix.env }}
+ run: |
+ pip install -U tox
+ tox
+
+ - name: Upload coverage report
+ run: bash <(curl -s https://codecov.io/bash)
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 69ecf93..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-language: python
-python:
-- '2.7'
-- '3.4'
-- '3.5'
-- '3.6'
-- '3.7'
-install:
-- pip install -r tests/requirements.txt -e .
-script:
-- py.test --cov-report term --cov=cssselect
-after_success:
-- codecov
-deploy:
- provider: pypi
- distributions: sdist bdist_wheel
- skip_upload_docs: true
- user: scrapy
- password:
- secure: UjCXD1ZfqgFcCs4ciPMJDaOQefV3ZOKZ8/dTZxcoaQlE1lr6CkaN6CfTdD50SX2M9uCNWvEcYnvs6U4SizgZ27MYzFWuHonED2alHKy4AtrxCEHD/+lGo9d18cNjLMPDZateX/lITjGiZ4rmYZNuA6wmA4P/bTmdazbSufcmMqY=
- on:
- tags: true
- repo: scrapy/cssselect
- condition: "$TRAVIS_PYTHON_VERSION == '3.7'"
diff --git a/tox.ini b/tox.ini
index 32136a0..4fb1d7c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py27, py34, py35, py36, py37
+envlist = py
[testenv]
deps=
From b9506ce52a622b001d965c99e10d8deaf25e8bd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Fri, 5 Mar 2021 16:51:31 +0100
Subject: [PATCH 128/192] Remove end-of-life Python 3.4 from CI
---
.github/workflows/tests.yml | 3 ---
1 file changed, 3 deletions(-)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 1a0cf65..817d824 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -10,9 +10,6 @@ jobs:
- python-version: 2.7
env:
TOXENV: py
- - python-version: 3.4
- env:
- TOXENV: py
- python-version: 3.5
env:
TOXENV: py
From a2e2894bd79457fed402b91440b63f0b28692b02 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta
Date: Wed, 23 Jun 2021 10:02:12 -0300
Subject: [PATCH 129/192] Update CI badge
---
README.rst | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.rst b/README.rst
index 9bcd648..c6d387f 100644
--- a/README.rst
+++ b/README.rst
@@ -10,9 +10,9 @@ cssselect: CSS Selectors for Python
:target: https://pypi.python.org/pypi/cssselect
:alt: Supported Python Versions
-.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg
- :target: https://travis-ci.org/scrapy/cssselect
- :alt: Build Status
+.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg
+ :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml
+ :alt: Tests
.. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg
:target: https://codecov.io/github/scrapy/cssselect?branch=master
From 4bf687a167e5abd1e50f65b1749baa7634767665 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Wed, 23 Jun 2021 15:43:46 +0200
Subject: [PATCH 130/192] Add flake8 (#105)
---
.flake8 | 15 +++++++++++++++
.github/workflows/checks.yml | 3 +++
tox.ini | 12 +++++++++---
3 files changed, 27 insertions(+), 3 deletions(-)
create mode 100644 .flake8
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..89e6e07
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,15 @@
+[flake8]
+max-line-length = 99
+ignore = W503
+exclude =
+ .git
+ .tox
+ venv*
+
+ # pending revision
+ cssselect/__init__.py
+ cssselect/parser.py
+ cssselect/xpath.py
+ docs/conf.py
+ setup.py
+ tests/test_cssselect.py
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 99fff74..2f38d19 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -7,6 +7,9 @@ jobs:
strategy:
matrix:
include:
+ - python-version: 3
+ env:
+ TOXENV: flake8
- python-version: 3
env:
TOXENV: security
diff --git a/tox.ini b/tox.ini
index eabac24..5ae98ce 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,15 +1,21 @@
[tox]
-envlist = security,py
+envlist = flake8,security,py
[testenv]
+basepython = python3
deps=
-r tests/requirements.txt
-
commands =
py.test --cov-report term --cov=cssselect
+[testenv:flake8]
+deps =
+ flake8==3.9.2
+commands =
+ flake8 {posargs: cssselect setup.py tests docs/conf.py}
+
[testenv:security]
deps =
bandit
commands =
- bandit -r -c .bandit.yml {posargs:cssselect}
+ bandit -r -c .bandit.yml {posargs: cssselect}
From 5399d4194e14ad79247bc589cb777b5a547ac149 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta
Date: Wed, 23 Jun 2021 11:21:22 -0300
Subject: [PATCH 131/192] Add black check
---
.github/workflows/checks.yml | 3 +++
pyproject.toml | 3 +++
setup.py | 50 +++++++++++++++++++-----------------
tox.ini | 8 +++++-
4 files changed, 39 insertions(+), 25 deletions(-)
create mode 100644 pyproject.toml
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 2f38d19..bb50590 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -7,6 +7,9 @@ jobs:
strategy:
matrix:
include:
+ - python-version: 3
+ env:
+ TOXENV: black
- python-version: 3
env:
TOXENV: flake8
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b409f47
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[tool.black]
+line-length = 99
+exclude = 'cssselect/|tests/'
diff --git a/setup.py b/setup.py
index de7128d..3782534 100644
--- a/setup.py
+++ b/setup.py
@@ -2,45 +2,47 @@
import re
import os.path
+
try:
from setuptools import setup
- extra_kwargs = {'test_suite': 'cssselect.tests'}
+
+ extra_kwargs = {"test_suite": "cssselect.tests"}
except ImportError:
from distutils.core import setup
+
extra_kwargs = {}
ROOT = os.path.dirname(__file__)
-README = open(os.path.join(ROOT, 'README.rst')).read()
-INIT_PY = open(os.path.join(ROOT, 'cssselect', '__init__.py')).read()
+README = open(os.path.join(ROOT, "README.rst")).read()
+INIT_PY = open(os.path.join(ROOT, "cssselect", "__init__.py")).read()
VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1)
setup(
- name='cssselect',
+ name="cssselect",
version=VERSION,
- author='Ian Bicking',
- author_email='ianb@colorstudy.com',
- maintainer='Paul Tremberth',
- maintainer_email='paul.tremberth@gmail.com',
- description=
- 'cssselect parses CSS3 Selectors and translates them to XPath 1.0',
+ author="Ian Bicking",
+ author_email="ianb@colorstudy.com",
+ maintainer="Paul Tremberth",
+ maintainer_email="paul.tremberth@gmail.com",
+ description="cssselect parses CSS3 Selectors and translates them to XPath 1.0",
long_description=README,
- url='https://github.com/scrapy/cssselect',
- license='BSD',
- packages=['cssselect'],
- python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
+ url="https://github.com/scrapy/cssselect",
+ license="BSD",
+ packages=["cssselect"],
+ python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
classifiers=[
- 'Development Status :: 4 - Beta',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: BSD License',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7'
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: BSD License",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
],
**extra_kwargs
)
diff --git a/tox.ini b/tox.ini
index 5ae98ce..1d94302 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = flake8,security,py
+envlist = black,flake8,security,py
[testenv]
basepython = python3
@@ -8,6 +8,12 @@ deps=
commands =
py.test --cov-report term --cov=cssselect
+[testenv:black]
+deps =
+ black==21.6b0
+commands =
+ black --check {posargs: cssselect setup.py tests}
+
[testenv:flake8]
deps =
flake8==3.9.2
From 79c341b15930b6c5ec811a4f7953719722148e3a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Thu, 24 Jun 2021 10:42:33 -0300
Subject: [PATCH 132/192] Remove support for py2, py34, py35 (#116)
---
.github/workflows/tests.yml | 20 ++++----------------
.gitignore | 4 +++-
README.rst | 2 +-
setup.py | 10 ++++------
tests/requirements.txt | 5 -----
tox.ini | 11 +++++++----
6 files changed, 19 insertions(+), 33 deletions(-)
delete mode 100644 tests/requirements.txt
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 817d824..799f52f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,19 +6,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- include:
- - python-version: 2.7
- env:
- TOXENV: py
- - python-version: 3.5
- env:
- TOXENV: py
- - python-version: 3.6
- env:
- TOXENV: py
- - python-version: 3.7
- env:
- TOXENV: py
+ python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
@@ -29,10 +17,10 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Run tests
- env: ${{ matrix.env }}
run: |
+ pip install -U pip
pip install -U tox
- tox
+ tox -e py
- name: Upload coverage report
- run: bash <(curl -s https://codecov.io/bash)
\ No newline at end of file
+ run: bash <(curl -s https://codecov.io/bash)
diff --git a/.gitignore b/.gitignore
index b0ab86a..c276bd1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
/dist
/docs/_build
/.coverage
-.idea
\ No newline at end of file
+.idea
+htmlcov/
+coverage.xml
diff --git a/README.rst b/README.rst
index c6d387f..dfeedae 100644
--- a/README.rst
+++ b/README.rst
@@ -33,7 +33,7 @@ extracted as a stand-alone project.
Quick facts:
* Free software: BSD licensed
-* Compatible with Python 2.7 and 3.4+
+* Compatible with Python 3.6+
* Latest documentation `on Read the Docs `_
* Source, issues and pull requests `on GitHub
`_
diff --git a/setup.py b/setup.py
index 3782534..bddda2e 100644
--- a/setup.py
+++ b/setup.py
@@ -31,18 +31,16 @@
url="https://github.com/scrapy/cssselect",
license="BSD",
packages=["cssselect"],
- python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
+ python_requires=">=3.6",
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: BSD License",
- "Programming Language :: Python :: 2",
- "Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.4",
- "Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
],
- **extra_kwargs
+ **extra_kwargs,
)
diff --git a/tests/requirements.txt b/tests/requirements.txt
deleted file mode 100644
index 000d5f2..0000000
--- a/tests/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-codecov
-lxml;python_version!="3.4"
-lxml<=4.3.5;python_version=="3.4"
-pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4
-pytest-cov
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
index 8cbafdf..a9d39b8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,11 +2,14 @@
envlist = black,flake8,security,py
[testenv]
-basepython = python3
-deps=
- -r tests/requirements.txt
+deps =
+ lxml>=4.4
+ pytest-cov>=2.8
+ pytest>=5.4
commands =
- py.test --cov-report term --cov=cssselect
+ pytest --cov=cssselect \
+ --cov-report=term-missing --cov-report=html --cov-report=xml \
+ --verbose {posargs: cssselect tests}
[testenv:black]
deps =
From 7bc326df9ceda7073c75f1cb636183daf38694cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?=
Date: Wed, 30 Jun 2021 14:05:18 +0200
Subject: [PATCH 133/192] Simplify the README.rst file (#103)
---
README.rst | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/README.rst b/README.rst
index dfeedae..9708616 100644
--- a/README.rst
+++ b/README.rst
@@ -18,24 +18,22 @@ cssselect: CSS Selectors for Python
:target: https://codecov.io/github/scrapy/cssselect?branch=master
:alt: Coverage report
-*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_
-expressions. Such expressions can be used in lxml_ or another XPath engine
-to find the matching elements in an XML or HTML document.
+**cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and
+translate them to `XPath 1.0`_ expressions.
-This module used to live inside of lxml as ``lxml.cssselect`` before it was
-extracted as a stand-alone project.
-
-.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/
-.. _XPath 1.0: https://www.w3.org/TR/xpath/
-.. _lxml: http://lxml.de/
+`XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find
+the matching elements in an XML or HTML document.
+Find the cssselect online documentation at https://cssselect.readthedocs.io.
Quick facts:
-* Free software: BSD licensed
-* Compatible with Python 3.6+
-* Latest documentation `on Read the Docs `_
* Source, issues and pull requests `on GitHub
`_
-* Releases `on PyPI `_
+* Releases `on PyPI `_
* Install with ``pip install cssselect``
+
+
+.. _CSS3 selectors: https://www.w3.org/TR/selectors-3/
+.. _XPath 1.0: https://www.w3.org/TR/xpath/all/
+.. _lxml: https://lxml.de/
From 577ca9c1c8f0286b7f34d5bee8192eed6219b677 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta
Date: Wed, 7 Jul 2021 07:58:03 -0300
Subject: [PATCH 134/192] Add pylint to tox's envlist
---
tox.ini | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tox.ini b/tox.ini
index a9d39b8..f260626 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = black,flake8,security,py
+envlist = black,flake8,pylint,security,py
[testenv]
deps =
From 599cbb50866ba4ea4211426a1d30de32e48df5a5 Mon Sep 17 00:00:00 2001
From: Julius Kibunjia
Date: Wed, 14 Jul 2021 11:19:34 +0300
Subject: [PATCH 135/192] Add matches-any pseudo-class: ':is()' (#109)
---
cssselect/parser.py | 51 +++++++++++++++++++++++++++++++++++++++++
cssselect/xpath.py | 13 +++++++++--
tests/test_cssselect.py | 19 +++++++++++++++
3 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 7125030..5494bd4 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -250,6 +250,30 @@ def specificity(self):
return a1 + a2, b1 + b2, c1 + c2
+class Matching(object):
+ """
+ Represents selector:is(selector_list)
+ """
+ def __init__(self, selector, selector_list):
+ self.selector = selector
+ self.selector_list = selector_list
+
+ def __repr__(self):
+ return '%s[%r:is(%s)]' % (
+ self.__class__.__name__, self.selector, ", ".join(
+ map(repr, self.selector_list)))
+
+ def canonical(self):
+ selector_arguments = []
+ for s in self.selector_list:
+ selarg = s.canonical()
+ selector_arguments.append(selarg.lstrip('*'))
+ return '%s:is(%s)' % (self.selector.canonical(),
+ ", ".join(map(str, selector_arguments)))
+
+ def specificity(self):
+ return max([x.specificity() for x in self.selector_list])
+
class Attrib(object):
"""
Represents selector[namespace|attrib operator value]
@@ -432,6 +456,7 @@ def parse_selector_group(stream):
else:
break
+
def parse_selector(stream):
result, pseudo_element = parse_simple_selector(stream)
while 1:
@@ -538,6 +563,9 @@ def parse_simple_selector(stream, inside_negation=False):
if next != ('DELIM', ')'):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
+ elif ident.lower() in ('matches', 'is'):
+ selectors = parse_simple_selector_arguments(stream)
+ result = Matching(result, selectors)
else:
result = Function(result, ident, parse_arguments(stream))
else:
@@ -564,6 +592,29 @@ def parse_arguments(stream):
"Expected an argument, got %s" % (next,))
+def parse_simple_selector_arguments(stream):
+ arguments = []
+ while 1:
+ result, pseudo_element = parse_simple_selector(stream, True)
+ if pseudo_element:
+ raise SelectorSyntaxError(
+ 'Got pseudo-element ::%s inside function'
+ % (pseudo_element, ))
+ stream.skip_whitespace()
+ next = stream.next()
+ if next in (('EOF', None), ('DELIM', ',')):
+ stream.next()
+ stream.skip_whitespace()
+ arguments.append(result)
+ elif next == ('DELIM', ')'):
+ arguments.append(result)
+ break
+ else:
+ raise SelectorSyntaxError(
+ "Expected an argument, got %s" % (next,))
+ return arguments
+
+
def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index a8722bb..db44d42 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -54,9 +54,9 @@ def __str__(self):
def __repr__(self):
return '%s[%s]' % (self.__class__.__name__, self)
- def add_condition(self, condition):
+ def add_condition(self, condition, conjuction='and'):
if self.condition:
- self.condition = '(%s) and (%s)' % (self.condition, condition)
+ self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition)
else:
self.condition = condition
return self
@@ -272,6 +272,15 @@ def xpath_negation(self, negation):
else:
return xpath.add_condition('0')
+ def xpath_matching(self, matching):
+ xpath = self.xpath(matching.selector)
+ exprs = [self.xpath(selector) for selector in matching.selector_list]
+ for e in exprs:
+ e.add_name_test()
+ if e.condition:
+ xpath.add_condition(e.condition, 'or')
+ return xpath
+
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
method = 'xpath_%s_function' % function.name.replace('-', '_')
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index d6969f2..bd37875 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -145,6 +145,10 @@ def parse_many(first, *others):
'Hash[Element[div]#foobar]']
assert parse_many('div:not(div.foo)') == [
'Negation[Element[div]:not(Class[Element[div].foo])]']
+ assert parse_many('div:is(.foo, #bar)') == [
+ 'Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]']
+ assert parse_many(':is(:hover, :visited)') == [
+ 'Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
assert parse_many(':scope > foo') == [
@@ -266,6 +270,9 @@ def specificity(css):
assert specificity(':not(:empty)') == (0, 1, 0)
assert specificity(':not(#foo)') == (1, 0, 0)
+ assert specificity(':is(.foo, #bar)') == (1, 0, 0)
+ assert specificity(':is(:hover, :visited)') == (0, 1, 0)
+
assert specificity('foo:empty') == (0, 1, 1)
assert specificity('foo:before') == (0, 0, 2)
assert specificity('foo::before') == (0, 0, 2)
@@ -300,6 +307,8 @@ def css2css(css, res=None):
css2css(':not(*[foo])', ':not([foo])')
css2css(':not(:empty)')
css2css(':not(#foo)')
+ css2css(':is(#bar, .foo)')
+ css2css(':is(:focused, :visited)')
css2css('foo:empty')
css2css('foo::before')
css2css('foo:empty::before')
@@ -373,6 +382,10 @@ def get_error(css):
"Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
"Got nested :not()")
+ assert get_error(':is(:before)') == (
+ "Got pseudo-element ::before inside function")
+ assert get_error(':is(a b)') == (
+ "Expected an argument, got ")
assert get_error(':scope > div :scope header') == (
'Got immediate child pseudo-element ":scope" not at the start of a selector'
)
@@ -863,6 +876,12 @@ def pcss(main, *selectors, **kwargs):
assert pcss('ol :Not(li[class])') == [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li']
+ assert pcss(':is(#first-li, #second-li)') == [
+ 'first-li', 'second-li']
+ assert pcss('a:is(#name-anchor, #tag-anchor)') == [
+ 'name-anchor', 'tag-anchor']
+ assert pcss(':is(.c)') == [
+ 'first-ol', 'third-li', 'fourth-li']
assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li']
# Invalid characters in XPath element names, should not crash
From b06a7fcb4da29b150abd4bc7d642de2aa1d34db1 Mon Sep 17 00:00:00 2001
From: Pascal Corpet
Date: Wed, 21 Jul 2021 23:32:06 +0200
Subject: [PATCH 136/192] Update to pylint 2.9.5
---
docs/conf.py | 4 ++--
setup.py | 6 ++++--
tox.ini | 2 +-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/docs/conf.py b/docs/conf.py
index aa897ef..62b5202 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -50,8 +50,8 @@
# built documents.
#
# The full version, including alpha/beta/rc tags.
-init_py = open(os.path.join(os.path.dirname(__file__),
- '..', 'cssselect', '__init__.py')).read()
+with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file:
+ init_py = init_file.read()
release = re.search("VERSION = '([^']+)'", init_py).group(1)
# The short X.Y version.
version = release.rstrip('dev')
diff --git a/setup.py b/setup.py
index bddda2e..3a0bea0 100644
--- a/setup.py
+++ b/setup.py
@@ -14,8 +14,10 @@
ROOT = os.path.dirname(__file__)
-README = open(os.path.join(ROOT, "README.rst")).read()
-INIT_PY = open(os.path.join(ROOT, "cssselect", "__init__.py")).read()
+with open(os.path.join(ROOT, "README.rst")) as readme_file:
+ README = readme_file.read()
+with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file:
+ INIT_PY = init_file.read()
VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1)
diff --git a/tox.ini b/tox.ini
index f260626..372ecb9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -26,7 +26,7 @@ commands =
[testenv:pylint]
deps =
{[testenv]deps}
- pylint==2.8.3
+ pylint==2.9.5
commands =
pylint {posargs: cssselect setup.py tests docs}
From 9edc6c3f5cf558bd99fa9e584c6832fabe24b942 Mon Sep 17 00:00:00 2001
From: Pascal Corpet
Date: Mon, 26 Jul 2021 15:55:37 +0200
Subject: [PATCH 137/192] Apply black formatting (#122)
---
cssselect/__init__.py | 11 +-
cssselect/parser.py | 407 ++++++------
cssselect/xpath.py | 327 +++++-----
docs/conf.py | 2 +-
pyproject.toml | 1 -
setup.py | 2 +-
tests/test_cssselect.py | 1295 +++++++++++++++++++--------------------
7 files changed, 1028 insertions(+), 1017 deletions(-)
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index b41cef9..2e4f824 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -13,10 +13,15 @@
"""
-from cssselect.parser import (parse, Selector, FunctionalPseudoElement,
- SelectorError, SelectorSyntaxError)
+from cssselect.parser import (
+ parse,
+ Selector,
+ FunctionalPseudoElement,
+ SelectorError,
+ SelectorSyntaxError,
+)
from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-VERSION = '1.1.0'
+VERSION = "1.1.0"
__version__ = VERSION
diff --git a/cssselect/parser.py b/cssselect/parser.py
index 5494bd4..a27ece5 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -27,7 +27,7 @@
def ascii_lower(string):
"""Lower-case, but only in the ASCII range."""
- return string.encode('utf8').lower().decode('utf8')
+ return string.encode("utf8").lower().decode("utf8")
class SelectorError(Exception):
@@ -39,12 +39,14 @@ class SelectorError(Exception):
"""
+
class SelectorSyntaxError(SelectorError, SyntaxError):
"""Parsing a selector that does not match the grammar."""
#### Parsed objects
+
class Selector(object):
"""
Represents a parsed selector.
@@ -55,10 +57,10 @@ class Selector(object):
or unsupported pseudo-elements.
"""
+
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
- if pseudo_element is not None and not isinstance(
- pseudo_element, FunctionalPseudoElement):
+ if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: A :class:`FunctionalPseudoElement`,
#: or the identifier for the pseudo-element as a string,
@@ -86,24 +88,22 @@ def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
elif self.pseudo_element:
- pseudo_element = '::%s' % self.pseudo_element
+ pseudo_element = "::%s" % self.pseudo_element
else:
- pseudo_element = ''
- return '%s[%r%s]' % (
- self.__class__.__name__, self.parsed_tree, pseudo_element)
+ pseudo_element = ""
+ return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element)
def canonical(self):
- """Return a CSS representation for this selector (a string)
- """
+ """Return a CSS representation for this selector (a string)"""
if isinstance(self.pseudo_element, FunctionalPseudoElement):
- pseudo_element = '::%s' % self.pseudo_element.canonical()
+ pseudo_element = "::%s" % self.pseudo_element.canonical()
elif self.pseudo_element:
- pseudo_element = '::%s' % self.pseudo_element
+ pseudo_element = "::%s" % self.pseudo_element
else:
- pseudo_element = ''
- res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element)
+ pseudo_element = ""
+ res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element)
if len(res) > 1:
- res = res.lstrip('*')
+ res = res.lstrip("*")
return res
def specificity(self):
@@ -122,16 +122,16 @@ class Class(object):
"""
Represents selector.class_name
"""
+
def __init__(self, selector, class_name):
self.selector = selector
self.class_name = class_name
def __repr__(self):
- return '%s[%r.%s]' % (
- self.__class__.__name__, self.selector, self.class_name)
+ return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name)
def canonical(self):
- return '%s.%s' % (self.selector.canonical(), self.class_name)
+ return "%s.%s" % (self.selector.canonical(), self.class_name)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -156,21 +156,24 @@ class FunctionalPseudoElement(object):
Use at your own risks.
"""
+
def __init__(self, name, arguments):
self.name = ascii_lower(name)
self.arguments = arguments
def __repr__(self):
- return '%s[::%s(%r)]' % (
- self.__class__.__name__, self.name,
- [token.value for token in self.arguments])
+ return "%s[::%s(%r)]" % (
+ self.__class__.__name__,
+ self.name,
+ [token.value for token in self.arguments],
+ )
def argument_types(self):
return [token.type for token in self.arguments]
def canonical(self):
- args = ''.join(token.css() for token in self.arguments)
- return '%s(%s)' % (self.name, args)
+ args = "".join(token.css() for token in self.arguments)
+ return "%s(%s)" % (self.name, args)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -182,22 +185,26 @@ class Function(object):
"""
Represents selector:name(expr)
"""
+
def __init__(self, selector, name, arguments):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments
def __repr__(self):
- return '%s[%r:%s(%r)]' % (
- self.__class__.__name__, self.selector, self.name,
- [token.value for token in self.arguments])
+ return "%s[%r:%s(%r)]" % (
+ self.__class__.__name__,
+ self.selector,
+ self.name,
+ [token.value for token in self.arguments],
+ )
def argument_types(self):
return [token.type for token in self.arguments]
def canonical(self):
- args = ''.join(token.css() for token in self.arguments)
- return '%s:%s(%s)' % (self.selector.canonical(), self.name, args)
+ args = "".join(token.css() for token in self.arguments)
+ return "%s:%s(%s)" % (self.selector.canonical(), self.name, args)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -209,16 +216,16 @@ class Pseudo(object):
"""
Represents selector:ident
"""
+
def __init__(self, selector, ident):
self.selector = selector
self.ident = ascii_lower(ident)
def __repr__(self):
- return '%s[%r:%s]' % (
- self.__class__.__name__, self.selector, self.ident)
+ return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident)
def canonical(self):
- return '%s:%s' % (self.selector.canonical(), self.ident)
+ return "%s:%s" % (self.selector.canonical(), self.ident)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -230,19 +237,19 @@ class Negation(object):
"""
Represents selector:not(subselector)
"""
+
def __init__(self, selector, subselector):
self.selector = selector
self.subselector = subselector
def __repr__(self):
- return '%s[%r:not(%r)]' % (
- self.__class__.__name__, self.selector, self.subselector)
+ return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector)
def canonical(self):
subsel = self.subselector.canonical()
if len(subsel) > 1:
- subsel = subsel.lstrip('*')
- return '%s:not(%s)' % (self.selector.canonical(), subsel)
+ subsel = subsel.lstrip("*")
+ return "%s:not(%s)" % (self.selector.canonical(), subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
@@ -254,30 +261,34 @@ class Matching(object):
"""
Represents selector:is(selector_list)
"""
+
def __init__(self, selector, selector_list):
self.selector = selector
self.selector_list = selector_list
def __repr__(self):
- return '%s[%r:is(%s)]' % (
- self.__class__.__name__, self.selector, ", ".join(
- map(repr, self.selector_list)))
+ return "%s[%r:is(%s)]" % (
+ self.__class__.__name__,
+ self.selector,
+ ", ".join(map(repr, self.selector_list)),
+ )
def canonical(self):
selector_arguments = []
for s in self.selector_list:
selarg = s.canonical()
- selector_arguments.append(selarg.lstrip('*'))
- return '%s:is(%s)' % (self.selector.canonical(),
- ", ".join(map(str, selector_arguments)))
+ selector_arguments.append(selarg.lstrip("*"))
+ return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments)))
def specificity(self):
return max([x.specificity() for x in self.selector_list])
+
class Attrib(object):
"""
Represents selector[namespace|attrib operator value]
"""
+
def __init__(self, selector, namespace, attrib, operator, value):
self.selector = selector
self.namespace = namespace
@@ -287,29 +298,32 @@ def __init__(self, selector, namespace, attrib, operator, value):
def __repr__(self):
if self.namespace:
- attrib = '%s|%s' % (self.namespace, self.attrib)
+ attrib = "%s|%s" % (self.namespace, self.attrib)
else:
attrib = self.attrib
- if self.operator == 'exists':
- return '%s[%r[%s]]' % (
- self.__class__.__name__, self.selector, attrib)
+ if self.operator == "exists":
+ return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib)
else:
- return '%s[%r[%s %s %r]]' % (
- self.__class__.__name__, self.selector, attrib,
- self.operator, self.value.value)
+ return "%s[%r[%s %s %r]]" % (
+ self.__class__.__name__,
+ self.selector,
+ attrib,
+ self.operator,
+ self.value.value,
+ )
def canonical(self):
if self.namespace:
- attrib = '%s|%s' % (self.namespace, self.attrib)
+ attrib = "%s|%s" % (self.namespace, self.attrib)
else:
attrib = self.attrib
- if self.operator == 'exists':
+ if self.operator == "exists":
op = attrib
else:
- op = '%s%s%s' % (attrib, self.operator, self.value.css())
+ op = "%s%s%s" % (attrib, self.operator, self.value.css())
- return '%s[%s]' % (self.selector.canonical(), op)
+ return "%s[%s]" % (self.selector.canonical(), op)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -324,17 +338,18 @@ class Element(object):
`None` is for the universal selector '*'
"""
+
def __init__(self, namespace=None, element=None):
self.namespace = namespace
self.element = element
def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self.canonical())
+ return "%s[%s]" % (self.__class__.__name__, self.canonical())
def canonical(self):
- element = self.element or '*'
+ element = self.element or "*"
if self.namespace:
- element = '%s|%s' % (self.namespace, element)
+ element = "%s|%s" % (self.namespace, element)
return element
def specificity(self):
@@ -348,16 +363,16 @@ class Hash(object):
"""
Represents selector#id
"""
+
def __init__(self, selector, id):
self.selector = selector
self.id = id
def __repr__(self):
- return '%s[%r#%s]' % (
- self.__class__.__name__, self.selector, self.id)
+ return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id)
def canonical(self):
- return '%s#%s' % (self.selector.canonical(), self.id)
+ return "%s#%s" % (self.selector.canonical(), self.id)
def specificity(self):
a, b, c = self.selector.specificity()
@@ -373,19 +388,17 @@ def __init__(self, selector, combinator, subselector):
self.subselector = subselector
def __repr__(self):
- if self.combinator == ' ':
- comb = ''
+ if self.combinator == " ":
+ comb = ""
else:
comb = self.combinator
- return '%s[%r %s %r]' % (
- self.__class__.__name__, self.selector, comb, self.subselector)
+ return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector)
def canonical(self):
subsel = self.subselector.canonical()
if len(subsel) > 1:
- subsel = subsel.lstrip('*')
- return '%s %s %s' % (
- self.selector.canonical(), self.combinator, subsel)
+ subsel = subsel.lstrip("*")
+ return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
@@ -396,14 +409,13 @@ def specificity(self):
#### Parser
# foo
-_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
+_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$")
# foo#bar or #bar
-_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
+_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")
# foo.bar or .bar
-_class_re = re.compile(
- r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
+_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$")
def parse(css):
@@ -427,16 +439,16 @@ def parse(css):
return [Selector(Element(element=match.group(1)))]
match = _id_re.match(css)
if match is not None:
- return [Selector(Hash(Element(element=match.group(1) or None),
- match.group(2)))]
+ return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
match = _class_re.match(css)
if match is not None:
- return [Selector(Class(Element(element=match.group(1) or None),
- match.group(2)))]
+ return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))]
stream = TokenStream(tokenize(css))
stream.source = css
return list(parse_selector_group(stream))
+
+
# except SelectorSyntaxError:
# e = sys.exc_info()[1]
# message = "%s at %s -> %r" % (
@@ -450,7 +462,7 @@ def parse_selector_group(stream):
stream.skip_whitespace()
while 1:
yield Selector(*parse_selector(stream))
- if stream.peek() == ('DELIM', ','):
+ if stream.peek() == ("DELIM", ","):
stream.next()
stream.skip_whitespace()
else:
@@ -462,20 +474,20 @@ def parse_selector(stream):
while 1:
stream.skip_whitespace()
peek = stream.peek()
- if peek in (('EOF', None), ('DELIM', ',')):
+ if peek in (("EOF", None), ("DELIM", ",")):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s not at the end of a selector'
- % pseudo_element)
- if peek.is_delim('+', '>', '~'):
+ "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
+ )
+ if peek.is_delim("+", ">", "~"):
# A combinator
combinator = stream.next().value
stream.skip_whitespace()
else:
# By exclusion, the last parse_simple_selector() ended
# at peek == ' '
- combinator = ' '
+ combinator = " "
next_selector, pseudo_element = parse_simple_selector(stream)
result = CombinedSelector(result, combinator, next_selector)
return result, pseudo_element
@@ -485,13 +497,13 @@ def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == 'IDENT' or peek == ('DELIM', '*'):
- if peek.type == 'IDENT':
+ if peek.type == "IDENT" or peek == ("DELIM", "*"):
+ if peek.type == "IDENT":
namespace = stream.next().value
else:
stream.next()
namespace = None
- if stream.peek() == ('DELIM', '|'):
+ if stream.peek() == ("DELIM", "|"):
stream.next()
element = stream.next_ident_or_star()
else:
@@ -503,77 +515,82 @@ def parse_simple_selector(stream, inside_negation=False):
pseudo_element = None
while 1:
peek = stream.peek()
- if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
- inside_negation and peek == ('DELIM', ')')):
+ if (
+ peek.type in ("S", "EOF")
+ or peek.is_delim(",", "+", ">", "~")
+ or (inside_negation and peek == ("DELIM", ")"))
+ ):
break
if pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s not at the end of a selector'
- % pseudo_element)
- if peek.type == 'HASH':
+ "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
+ )
+ if peek.type == "HASH":
result = Hash(result, stream.next().value)
- elif peek == ('DELIM', '.'):
+ elif peek == ("DELIM", "."):
stream.next()
result = Class(result, stream.next_ident())
- elif peek == ('DELIM', '|'):
+ elif peek == ("DELIM", "|"):
stream.next()
result = Element(None, stream.next_ident())
- elif peek == ('DELIM', '['):
+ elif peek == ("DELIM", "["):
stream.next()
result = parse_attrib(result, stream)
- elif peek == ('DELIM', ':'):
+ elif peek == ("DELIM", ":"):
stream.next()
- if stream.peek() == ('DELIM', ':'):
+ if stream.peek() == ("DELIM", ":"):
stream.next()
pseudo_element = stream.next_ident()
- if stream.peek() == ('DELIM', '('):
+ if stream.peek() == ("DELIM", "("):
stream.next()
pseudo_element = FunctionalPseudoElement(
- pseudo_element, parse_arguments(stream))
+ pseudo_element, parse_arguments(stream)
+ )
continue
ident = stream.next_ident()
- if ident.lower() in ('first-line', 'first-letter',
- 'before', 'after'):
+ if ident.lower() in ("first-line", "first-letter", "before", "after"):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element = _unicode(ident)
continue
- if stream.peek() != ('DELIM', '('):
+ if stream.peek() != ("DELIM", "("):
result = Pseudo(result, ident)
- if result.__repr__() == 'Pseudo[Element[*]:scope]':
- if not (len(stream.used) == 2 or
- (len(stream.used) == 3
- and stream.used[0].type == 'S')):
+ if result.__repr__() == "Pseudo[Element[*]:scope]":
+ if not (
+ len(stream.used) == 2
+ or (len(stream.used) == 3 and stream.used[0].type == "S")
+ ):
raise SelectorSyntaxError(
'Got immediate child pseudo-element ":scope" '
- 'not at the start of a selector')
+ "not at the start of a selector"
+ )
continue
stream.next()
stream.skip_whitespace()
- if ident.lower() == 'not':
+ if ident.lower() == "not":
if inside_negation:
- raise SelectorSyntaxError('Got nested :not()')
+ raise SelectorSyntaxError("Got nested :not()")
argument, argument_pseudo_element = parse_simple_selector(
- stream, inside_negation=True)
+ stream, inside_negation=True
+ )
next = stream.next()
if argument_pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s inside :not() at %s'
- % (argument_pseudo_element, next.pos))
- if next != ('DELIM', ')'):
+ "Got pseudo-element ::%s inside :not() at %s"
+ % (argument_pseudo_element, next.pos)
+ )
+ if next != ("DELIM", ")"):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
- elif ident.lower() in ('matches', 'is'):
+ elif ident.lower() in ("matches", "is"):
selectors = parse_simple_selector_arguments(stream)
result = Matching(result, selectors)
else:
result = Function(result, ident, parse_arguments(stream))
else:
- raise SelectorSyntaxError(
- "Expected selector, got %s" % (peek,))
+ raise SelectorSyntaxError("Expected selector, got %s" % (peek,))
if len(stream.used) == selector_start:
- raise SelectorSyntaxError(
- "Expected selector, got %s" % (stream.peek(),))
+ raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),))
return result, pseudo_element
@@ -582,14 +599,12 @@ def parse_arguments(stream):
while 1:
stream.skip_whitespace()
next = stream.next()
- if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
- ('DELIM', '+'), ('DELIM', '-')]:
+ if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]:
arguments.append(next)
- elif next == ('DELIM', ')'):
+ elif next == ("DELIM", ")"):
return arguments
else:
- raise SelectorSyntaxError(
- "Expected an argument, got %s" % (next,))
+ raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
def parse_simple_selector_arguments(stream):
@@ -598,35 +613,33 @@ def parse_simple_selector_arguments(stream):
result, pseudo_element = parse_simple_selector(stream, True)
if pseudo_element:
raise SelectorSyntaxError(
- 'Got pseudo-element ::%s inside function'
- % (pseudo_element, ))
+ "Got pseudo-element ::%s inside function" % (pseudo_element,)
+ )
stream.skip_whitespace()
next = stream.next()
- if next in (('EOF', None), ('DELIM', ',')):
+ if next in (("EOF", None), ("DELIM", ",")):
stream.next()
stream.skip_whitespace()
arguments.append(result)
- elif next == ('DELIM', ')'):
+ elif next == ("DELIM", ")"):
arguments.append(result)
break
else:
- raise SelectorSyntaxError(
- "Expected an argument, got %s" % (next,))
+ raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
return arguments
def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
- if attrib is None and stream.peek() != ('DELIM', '|'):
- raise SelectorSyntaxError(
- "Expected '|', got %s" % (stream.peek(),))
- if stream.peek() == ('DELIM', '|'):
+ if attrib is None and stream.peek() != ("DELIM", "|"):
+ raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),))
+ if stream.peek() == ("DELIM", "|"):
stream.next()
- if stream.peek() == ('DELIM', '='):
+ if stream.peek() == ("DELIM", "="):
namespace = None
stream.next()
- op = '|='
+ op = "|="
else:
namespace = attrib
attrib = stream.next_ident()
@@ -636,27 +649,23 @@ def parse_attrib(selector, stream):
if op is None:
stream.skip_whitespace()
next = stream.next()
- if next == ('DELIM', ']'):
- return Attrib(selector, namespace, attrib, 'exists', None)
- elif next == ('DELIM', '='):
- op = '='
- elif next.is_delim('^', '$', '*', '~', '|', '!') and (
- stream.peek() == ('DELIM', '=')):
- op = next.value + '='
+ if next == ("DELIM", "]"):
+ return Attrib(selector, namespace, attrib, "exists", None)
+ elif next == ("DELIM", "="):
+ op = "="
+ elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")):
+ op = next.value + "="
stream.next()
else:
- raise SelectorSyntaxError(
- "Operator expected, got %s" % (next,))
+ raise SelectorSyntaxError("Operator expected, got %s" % (next,))
stream.skip_whitespace()
value = stream.next()
- if value.type not in ('IDENT', 'STRING'):
- raise SelectorSyntaxError(
- "Expected string or ident, got %s" % (value,))
+ if value.type not in ("IDENT", "STRING"):
+ raise SelectorSyntaxError("Expected string or ident, got %s" % (value,))
stream.skip_whitespace()
next = stream.next()
- if next != ('DELIM', ']'):
- raise SelectorSyntaxError(
- "Expected ']', got %s" % (next,))
+ if next != ("DELIM", "]"):
+ raise SelectorSyntaxError("Expected ']', got %s" % (next,))
return Attrib(selector, namespace, attrib, op, value)
@@ -669,23 +678,23 @@ def parse_series(tokens):
"""
for token in tokens:
- if token.type == 'STRING':
- raise ValueError('String tokens not allowed in series.')
- s = ''.join(token.value for token in tokens).strip()
- if s == 'odd':
+ if token.type == "STRING":
+ raise ValueError("String tokens not allowed in series.")
+ s = "".join(token.value for token in tokens).strip()
+ if s == "odd":
return 2, 1
- elif s == 'even':
+ elif s == "even":
return 2, 0
- elif s == 'n':
+ elif s == "n":
return 1, 0
- if 'n' not in s:
+ if "n" not in s:
# Just b
return 0, int(s)
- a, b = s.split('n', 1)
+ a, b = s.split("n", 1)
if not a:
a = 1
- elif a == '-' or a == '+':
- a = int(a+'1')
+ elif a == "-" or a == "+":
+ a = int(a + "1")
else:
a = int(a)
if not b:
@@ -697,6 +706,7 @@ def parse_series(tokens):
#### Token objects
+
class Token(tuple):
def __new__(cls, type_, value, pos):
obj = tuple.__new__(cls, (type_, value))
@@ -707,13 +717,13 @@ def __repr__(self):
return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
def is_delim(self, *values):
- return self.type == 'DELIM' and self.value in values
+ return self.type == "DELIM" and self.value in values
type = property(operator.itemgetter(0))
value = property(operator.itemgetter(1))
def css(self):
- if self.type == 'STRING':
+ if self.type == "STRING":
return repr(self.value)
else:
return self.value
@@ -721,41 +731,44 @@ def css(self):
class EOFToken(Token):
def __new__(cls, pos):
- return Token.__new__(cls, 'EOF', None, pos)
+ return Token.__new__(cls, "EOF", None, pos)
def __repr__(self):
- return '<%s at %i>' % (self.type, self.pos)
+ return "<%s at %i>" % (self.type, self.pos)
#### Tokenizer
class TokenMacros:
- unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
- escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
- string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
- nonascii = r'[^\0-\177]'
- nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
- nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
+ unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?"
+ escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]"
+ string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape
+ nonascii = r"[^\0-\177]"
+ nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii)
+ nmstart = "[_a-z]|%s|%s" % (escape, nonascii)
+
def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
-_match_whitespace = _compile(r'[ \t\r\n\f]+')
-_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
-_match_hash = _compile('#(?:%(nmchar)s)+')
-_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
+
+_match_whitespace = _compile(r"[ \t\r\n\f]+")
+_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)")
+_match_hash = _compile("#(?:%(nmchar)s)+")
+_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
_match_string_by_quote = {
"'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
'"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
}
-_sub_simple_escape = re.compile(r'\\(.)').sub
+_sub_simple_escape = re.compile(r"\\(.)").sub
_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
-_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
+_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub
# Same as r'\1', but faster on CPython
-_replace_simple = operator.methodcaller('group', 1)
+_replace_simple = operator.methodcaller("group", 1)
+
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
@@ -776,59 +789,62 @@ def tokenize(s):
while pos < len_s:
match = _match_whitespace(s, pos=pos)
if match:
- yield Token('S', ' ', pos)
+ yield Token("S", " ", pos)
pos = match.end()
continue
match = _match_ident(s, pos=pos)
if match:
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode, match.group()))
- yield Token('IDENT', value, pos)
+ value = _sub_simple_escape(
+ _replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
+ )
+ yield Token("IDENT", value, pos)
pos = match.end()
continue
match = _match_hash(s, pos=pos)
if match:
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode, match.group()[1:]))
- yield Token('HASH', value, pos)
+ value = _sub_simple_escape(
+ _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:])
+ )
+ yield Token("HASH", value, pos)
pos = match.end()
continue
quote = s[pos]
if quote in _match_string_by_quote:
match = _match_string_by_quote[quote](s, pos=pos + 1)
- assert match, 'Should have found at least an empty match'
+ assert match, "Should have found at least an empty match"
end_pos = match.end()
if end_pos == len_s:
- raise SelectorSyntaxError('Unclosed string at %s' % pos)
+ raise SelectorSyntaxError("Unclosed string at %s" % pos)
if s[end_pos] != quote:
- raise SelectorSyntaxError('Invalid string at %s' % pos)
- value = _sub_simple_escape(_replace_simple,
- _sub_unicode_escape(_replace_unicode,
- _sub_newline_escape('', match.group())))
- yield Token('STRING', value, pos)
+ raise SelectorSyntaxError("Invalid string at %s" % pos)
+ value = _sub_simple_escape(
+ _replace_simple,
+ _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())),
+ )
+ yield Token("STRING", value, pos)
pos = end_pos + 1
continue
match = _match_number(s, pos=pos)
if match:
value = match.group()
- yield Token('NUMBER', value, pos)
+ yield Token("NUMBER", value, pos)
pos = match.end()
continue
pos2 = pos + 2
- if s[pos:pos2] == '/*':
- pos = s.find('*/', pos2)
+ if s[pos:pos2] == "/*":
+ pos = s.find("*/", pos2)
if pos == -1:
pos = len_s
else:
pos += 2
continue
- yield Token('DELIM', s[pos], pos)
+ yield Token("DELIM", s[pos], pos)
pos += 1
assert pos == len_s
@@ -866,21 +882,20 @@ def peek(self):
def next_ident(self):
next = self.next()
- if next.type != 'IDENT':
- raise SelectorSyntaxError('Expected ident, got %s' % (next,))
+ if next.type != "IDENT":
+ raise SelectorSyntaxError("Expected ident, got %s" % (next,))
return next.value
def next_ident_or_star(self):
next = self.next()
- if next.type == 'IDENT':
+ if next.type == "IDENT":
return next.value
- elif next == ('DELIM', '*'):
+ elif next == ("DELIM", "*"):
return None
else:
- raise SelectorSyntaxError(
- "Expected ident or '*', got %s" % (next,))
+ raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,))
def skip_whitespace(self):
peek = self.peek()
- if peek.type == 'S':
+ if peek.type == "S":
self.next()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index db44d42..f80e629 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -28,7 +28,7 @@
def _unicode_safe_getattr(obj, name, default=None):
# getattr() with a non-ASCII name fails on Python 2.x
- name = name.encode('ascii', 'replace').decode('ascii')
+ name = name.encode("ascii", "replace").decode("ascii")
return getattr(obj, name, default)
@@ -38,48 +38,47 @@ class ExpressionError(SelectorError, RuntimeError):
#### XPath Helpers
-class XPathExpr(object):
- def __init__(self, path='', element='*', condition='', star_prefix=False):
+class XPathExpr(object):
+ def __init__(self, path="", element="*", condition="", star_prefix=False):
self.path = path
self.element = element
self.condition = condition
def __str__(self):
- path = _unicode(self.path) + _unicode(self.element)
+ path = _unicode(self.path) + _unicode(self.element)
if self.condition:
- path += '[%s]' % self.condition
+ path += "[%s]" % self.condition
return path
def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self)
+ return "%s[%s]" % (self.__class__.__name__, self)
- def add_condition(self, condition, conjuction='and'):
+ def add_condition(self, condition, conjuction="and"):
if self.condition:
- self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition)
+ self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition)
else:
self.condition = condition
return self
def add_name_test(self):
- if self.element == '*':
+ if self.element == "*":
# We weren't doing a test anyway
return
- self.add_condition(
- "name() = %s" % GenericTranslator.xpath_literal(self.element))
- self.element = '*'
+ self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element))
+ self.element = "*"
def add_star_prefix(self):
"""
Append '*/' to the path to keep the context constrained
to a single parent.
"""
- self.path += '*/'
+ self.path += "*/"
def join(self, combiner, other):
path = _unicode(self) + combiner
# Any "star prefix" is redundant when joining.
- if other.path != '*/':
+ if other.path != "*/":
path += other.path
self.path = path
self.element = other.element
@@ -92,14 +91,15 @@ def join(self, combiner, other):
# The spec is actually more permissive than that, but don’t bother.
# This is just for the fast path.
# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
-is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
+is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
# Test that the string is not empty and does not contain whitespace
-is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
+is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
#### Translation
+
class GenericTranslator(object):
"""
Translator for "generic" XML documents.
@@ -122,30 +122,30 @@ class GenericTranslator(object):
####
combinator_mapping = {
- ' ': 'descendant',
- '>': 'child',
- '+': 'direct_adjacent',
- '~': 'indirect_adjacent',
+ " ": "descendant",
+ ">": "child",
+ "+": "direct_adjacent",
+ "~": "indirect_adjacent",
}
attribute_operator_mapping = {
- 'exists': 'exists',
- '=': 'equals',
- '~=': 'includes',
- '|=': 'dashmatch',
- '^=': 'prefixmatch',
- '$=': 'suffixmatch',
- '*=': 'substringmatch',
- '!=': 'different', # XXX Not in Level 3 but meh
+ "exists": "exists",
+ "=": "equals",
+ "~=": "includes",
+ "|=": "dashmatch",
+ "^=": "prefixmatch",
+ "$=": "suffixmatch",
+ "*=": "substringmatch",
+ "!=": "different", # XXX Not in Level 3 but meh
}
#: The attribute used for ID selectors depends on the document language:
#: http://www.w3.org/TR/selectors/#id-selectors
- id_attribute = 'id'
+ id_attribute = "id"
#: The attribute used for ``:lang()`` depends on the document language:
#: http://www.w3.org/TR/selectors/#lang-pseudo
- lang_attribute = 'xml:lang'
+ lang_attribute = "xml:lang"
#: The case sensitivity of document language element names,
#: attribute names, and attribute values in selectors depends
@@ -168,7 +168,7 @@ class GenericTranslator(object):
# class used to represent and xpath expression
xpathexpr_cls = XPathExpr
- def css_to_xpath(self, css, prefix='descendant-or-self::'):
+ def css_to_xpath(self, css, prefix="descendant-or-self::"):
"""Translate a *group of selectors* to XPath.
Pseudo-elements are not supported here since XPath only knows
@@ -187,12 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
- return ' | '.join(self.selector_to_xpath(selector, prefix,
- translate_pseudo_elements=True)
- for selector in parse(css))
+ return " | ".join(
+ self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
+ for selector in parse(css)
+ )
- def selector_to_xpath(self, selector, prefix='descendant-or-self::',
- translate_pseudo_elements=False):
+ def selector_to_xpath(
+ self, selector, prefix="descendant-or-self::", translate_pseudo_elements=False
+ ):
"""Translate a parsed selector to XPath.
@@ -213,14 +215,14 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
The equivalent XPath 1.0 expression as an Unicode string.
"""
- tree = getattr(selector, 'parsed_tree', None)
+ tree = getattr(selector, "parsed_tree", None)
if not tree:
- raise TypeError('Expected a parsed selector, got %r' % (selector,))
+ raise TypeError("Expected a parsed selector, got %r" % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
- return (prefix or '') + _unicode(xpath)
+ return (prefix or "") + _unicode(xpath)
def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
@@ -229,7 +231,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element):
but can be overridden by sub-classes.
"""
- raise ExpressionError('Pseudo-elements are not supported.')
+ raise ExpressionError("Pseudo-elements are not supported.")
@staticmethod
def xpath_literal(s):
@@ -239,38 +241,39 @@ def xpath_literal(s):
elif '"' not in s:
s = '"%s"' % s
else:
- s = "concat(%s)" % ','.join([
- (("'" in part) and '"%s"' or "'%s'") % part
- for part in split_at_single_quotes(s) if part
- ])
+ s = "concat(%s)" % ",".join(
+ [
+ (("'" in part) and '"%s"' or "'%s'") % part
+ for part in split_at_single_quotes(s)
+ if part
+ ]
+ )
return s
def xpath(self, parsed_selector):
"""Translate any parsed selector object."""
type_name = type(parsed_selector).__name__
- method = getattr(self, 'xpath_%s' % type_name.lower(), None)
+ method = getattr(self, "xpath_%s" % type_name.lower(), None)
if method is None:
- raise ExpressionError('%s is not supported.' % type_name)
+ raise ExpressionError("%s is not supported." % type_name)
return method(parsed_selector)
-
# Dispatched by parsed object type
def xpath_combinedselector(self, combined):
"""Translate a combined selector."""
combinator = self.combinator_mapping[combined.combinator]
- method = getattr(self, 'xpath_%s_combinator' % combinator)
- return method(self.xpath(combined.selector),
- self.xpath(combined.subselector))
+ method = getattr(self, "xpath_%s_combinator" % combinator)
+ return method(self.xpath(combined.selector), self.xpath(combined.subselector))
def xpath_negation(self, negation):
xpath = self.xpath(negation.selector)
sub_xpath = self.xpath(negation.subselector)
sub_xpath.add_name_test()
if sub_xpath.condition:
- return xpath.add_condition('not(%s)' % sub_xpath.condition)
+ return xpath.add_condition("not(%s)" % sub_xpath.condition)
else:
- return xpath.add_condition('0')
+ return xpath.add_condition("0")
def xpath_matching(self, matching):
xpath = self.xpath(matching.selector)
@@ -278,45 +281,42 @@ def xpath_matching(self, matching):
for e in exprs:
e.add_name_test()
if e.condition:
- xpath.add_condition(e.condition, 'or')
+ xpath.add_condition(e.condition, "or")
return xpath
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
- method = 'xpath_%s_function' % function.name.replace('-', '_')
+ method = "xpath_%s_function" % function.name.replace("-", "_")
method = _unicode_safe_getattr(self, method, None)
if not method:
- raise ExpressionError(
- "The pseudo-class :%s() is unknown" % function.name)
+ raise ExpressionError("The pseudo-class :%s() is unknown" % function.name)
return method(self.xpath(function.selector), function)
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
- method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
+ method = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_")
method = _unicode_safe_getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
- raise ExpressionError(
- "The pseudo-class :%s is unknown" % pseudo.ident)
+ raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident)
return method(self.xpath(pseudo.selector))
-
def xpath_attrib(self, selector):
"""Translate an attribute selector."""
operator = self.attribute_operator_mapping[selector.operator]
- method = getattr(self, 'xpath_attrib_%s' % operator)
+ method = getattr(self, "xpath_attrib_%s" % operator)
if self.lower_case_attribute_names:
name = selector.attrib.lower()
else:
name = selector.attrib
safe = is_safe_name(name)
if selector.namespace:
- name = '%s:%s' % (selector.namespace, name)
+ name = "%s:%s" % (selector.namespace, name)
safe = safe and is_safe_name(selector.namespace)
if safe:
- attrib = '@' + name
+ attrib = "@" + name
else:
- attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
+ attrib = "attribute::*[name() = %s]" % self.xpath_literal(name)
if selector.value is None:
value = None
elif self.lower_case_attribute_values:
@@ -329,19 +329,18 @@ def xpath_class(self, class_selector):
"""Translate a class selector."""
# .foo is defined as [class~=foo] in the spec.
xpath = self.xpath(class_selector.selector)
- return self.xpath_attrib_includes(
- xpath, '@class', class_selector.class_name)
+ return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
def xpath_hash(self, id_selector):
"""Translate an ID selector."""
xpath = self.xpath(id_selector.selector)
- return self.xpath_attrib_equals(xpath, '@id', id_selector.id)
+ return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
def xpath_element(self, selector):
"""Translate a type or universal selector."""
element = selector.element
if not element:
- element = '*'
+ element = "*"
safe = True
else:
safe = is_safe_name(element)
@@ -350,39 +349,36 @@ def xpath_element(self, selector):
if selector.namespace:
# Namespace prefixes are case-sensitive.
# http://www.w3.org/TR/css3-namespace/#prefixes
- element = '%s:%s' % (selector.namespace, element)
+ element = "%s:%s" % (selector.namespace, element)
safe = safe and is_safe_name(selector.namespace)
xpath = self.xpathexpr_cls(element=element)
if not safe:
xpath.add_name_test()
return xpath
-
# CombinedSelector: dispatch by combinator
def xpath_descendant_combinator(self, left, right):
"""right is a child, grand-child or further descendant of left"""
- return left.join('/descendant-or-self::*/', right)
+ return left.join("/descendant-or-self::*/", right)
def xpath_child_combinator(self, left, right):
"""right is an immediate child of left"""
- return left.join('/', right)
+ return left.join("/", right)
def xpath_direct_adjacent_combinator(self, left, right):
"""right is a sibling immediately after left"""
- xpath = left.join('/following-sibling::', right)
+ xpath = left.join("/following-sibling::", right)
xpath.add_name_test()
- return xpath.add_condition('position() = 1')
+ return xpath.add_condition("position() = 1")
def xpath_indirect_adjacent_combinator(self, left, right):
"""right is a sibling after left, immediately or not"""
- return left.join('/following-sibling::', right)
-
+ return left.join("/following-sibling::", right)
# Function: dispatch by function/pseudo-class name
- def xpath_nth_child_function(self, xpath, function, last=False,
- add_name_test=True):
+ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True):
try:
a, b = parse_series(function.arguments)
except ValueError:
@@ -436,35 +432,35 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
# and since n ∈ {0, 1, 2, ...}, if b-1<=0,
# there is always an "n" matching any number of siblings (maybe none)
- if a == 1 and b_min_1 <=0:
+ if a == 1 and b_min_1 <= 0:
return xpath
# early-exit condition 2:
# ~~~~~~~~~~~~~~~~~~~~~~~
# an+b-1 siblings with a<0 and (b-1)<0 is not possible
if a < 0 and b_min_1 < 0:
- return xpath.add_condition('0')
+ return xpath.add_condition("0")
# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
# nth_of_type() calls nth_child(add_name_test=False)
if add_name_test:
- nodetest = '*'
+ nodetest = "*"
else:
- nodetest = '%s' % xpath.element
+ nodetest = "%s" % xpath.element
# count siblings before or after the element
if not last:
- siblings_count = 'count(preceding-sibling::%s)' % nodetest
+ siblings_count = "count(preceding-sibling::%s)" % nodetest
else:
- siblings_count = 'count(following-sibling::%s)' % nodetest
+ siblings_count = "count(following-sibling::%s)" % nodetest
# special case of fixed position: nth-*(0n+b)
# if a == 0:
# ~~~~~~~~~~
# count(***-sibling::***) = b-1
if a == 0:
- return xpath.add_condition('%s = %s' % (siblings_count, b_min_1))
+ return xpath.add_condition("%s = %s" % (siblings_count, b_min_1))
expressions = []
@@ -473,12 +469,12 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
# therefore, the predicate is only interesting if (b-1)>0
if b_min_1 > 0:
- expressions.append('%s >= %s' % (siblings_count, b_min_1))
+ expressions.append("%s >= %s" % (siblings_count, b_min_1))
else:
# if a<0, and (b-1)<0, no "n" satisfies this,
# this is tested above as an early exist condition
# otherwise,
- expressions.append('%s <= %s' % (siblings_count, b_min_1))
+ expressions.append("%s <= %s" % (siblings_count, b_min_1))
# operations modulo 1 or -1 are simpler, one only needs to verify:
#
@@ -501,56 +497,48 @@ def xpath_nth_child_function(self, xpath, function, last=False,
b_neg = (-b_min_1) % abs(a)
if b_neg != 0:
- b_neg = '+%s' % b_neg
- left = '(%s %s)' % (left, b_neg)
+ b_neg = "+%s" % b_neg
+ left = "(%s %s)" % (left, b_neg)
- expressions.append('%s mod %s = 0' % (left, a))
+ expressions.append("%s mod %s = 0" % (left, a))
if len(expressions) > 1:
- template = '(%s)'
+ template = "(%s)"
else:
- template = '%s'
- xpath.add_condition(' and '.join(template % expression
- for expression in expressions))
+ template = "%s"
+ xpath.add_condition(" and ".join(template % expression for expression in expressions))
return xpath
def xpath_nth_last_child_function(self, xpath, function):
return self.xpath_nth_child_function(xpath, function, last=True)
def xpath_nth_of_type_function(self, xpath, function):
- if xpath.element == '*':
- raise ExpressionError(
- "*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(xpath, function,
- add_name_test=False)
+ if xpath.element == "*":
+ raise ExpressionError("*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function, add_name_test=False)
def xpath_nth_last_of_type_function(self, xpath, function):
- if xpath.element == '*':
- raise ExpressionError(
- "*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(xpath, function, last=True,
- add_name_test=False)
+ if xpath.element == "*":
+ raise ExpressionError("*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False)
def xpath_contains_function(self, xpath, function):
# Defined there, removed in later drafts:
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :contains(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :contains(), got %r" % function.arguments
+ )
value = function.arguments[0].value
- return xpath.add_condition(
- 'contains(., %s)' % self.xpath_literal(value))
+ return xpath.add_condition("contains(., %s)" % self.xpath_literal(value))
def xpath_lang_function(self, xpath, function):
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :lang(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :lang(), got %r" % function.arguments
+ )
value = function.arguments[0].value
- return xpath.add_condition(
- "lang(%s)" % (self.xpath_literal(value)))
-
+ return xpath.add_condition("lang(%s)" % (self.xpath_literal(value)))
# Pseudo: dispatch by pseudo-class name
@@ -566,31 +554,28 @@ def xpath_scope_pseudo(self, xpath):
return xpath.add_condition("1")
def xpath_first_child_pseudo(self, xpath):
- return xpath.add_condition('count(preceding-sibling::*) = 0')
+ return xpath.add_condition("count(preceding-sibling::*) = 0")
def xpath_last_child_pseudo(self, xpath):
- return xpath.add_condition('count(following-sibling::*) = 0')
+ return xpath.add_condition("count(following-sibling::*) = 0")
def xpath_first_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:first-of-type is not implemented")
- return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:first-of-type is not implemented")
+ return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element)
def xpath_last_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:last-of-type is not implemented")
- return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:last-of-type is not implemented")
+ return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element)
def xpath_only_child_pseudo(self, xpath):
- return xpath.add_condition('count(parent::*/child::*) = 1')
+ return xpath.add_condition("count(parent::*/child::*) = 1")
def xpath_only_of_type_pseudo(self, xpath):
- if xpath.element == '*':
- raise ExpressionError(
- "*:only-of-type is not implemented")
- return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element)
+ if xpath.element == "*":
+ raise ExpressionError("*:only-of-type is not implemented")
+ return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element)
def xpath_empty_pseudo(self, xpath):
return xpath.add_condition("not(*) and not(string-length())")
@@ -617,61 +602,63 @@ def xpath_attrib_exists(self, xpath, name, value):
return xpath
def xpath_attrib_equals(self, xpath, name, value):
- xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
+ xpath.add_condition("%s = %s" % (name, self.xpath_literal(value)))
return xpath
def xpath_attrib_different(self, xpath, name, value):
# FIXME: this seems like a weird hack...
if value:
- xpath.add_condition('not(%s) or %s != %s'
- % (name, name, self.xpath_literal(value)))
+ xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value)))
else:
- xpath.add_condition('%s != %s'
- % (name, self.xpath_literal(value)))
+ xpath.add_condition("%s != %s" % (name, self.xpath_literal(value)))
return xpath
def xpath_attrib_includes(self, xpath, name, value):
if is_non_whitespace(value):
xpath.add_condition(
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
- % (name, name, self.xpath_literal(' '+value+' ')))
+ % (name, name, self.xpath_literal(" " + value + " "))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_dashmatch(self, xpath, name, value):
# Weird, but true...
- xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
- name,
- name, self.xpath_literal(value),
- name, self.xpath_literal(value + '-')))
+ xpath.add_condition(
+ "%s and (%s = %s or starts-with(%s, %s))"
+ % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-"))
+ )
return xpath
def xpath_attrib_prefixmatch(self, xpath, name, value):
if value:
- xpath.add_condition('%s and starts-with(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ xpath.add_condition(
+ "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_suffixmatch(self, xpath, name, value):
if value:
# Oddly there is a starts-with in XPath 1.0, but not ends-with
xpath.add_condition(
- '%s and substring(%s, string-length(%s)-%s) = %s'
- % (name, name, name, len(value)-1, self.xpath_literal(value)))
+ "%s and substring(%s, string-length(%s)-%s) = %s"
+ % (name, name, name, len(value) - 1, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
def xpath_attrib_substringmatch(self, xpath, name, value):
if value:
# Attribute selectors are case sensitive
- xpath.add_condition('%s and contains(%s, %s)' % (
- name, name, self.xpath_literal(value)))
+ xpath.add_condition(
+ "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value))
+ )
else:
- xpath.add_condition('0')
+ xpath.add_condition("0")
return xpath
@@ -692,7 +679,7 @@ class HTMLTranslator(GenericTranslator):
"""
- lang_attribute = 'lang'
+ lang_attribute = "lang"
def __init__(self, xhtml=False):
self.xhtml = xhtml # Might be useful for sub-classes?
@@ -706,33 +693,36 @@ def xpath_checked_pseudo(self, xpath):
return xpath.add_condition(
"(@selected and name(.) = 'option') or "
"(@checked "
- "and (name(.) = 'input' or name(.) = 'command')"
- "and (@type = 'checkbox' or @type = 'radio'))")
+ "and (name(.) = 'input' or name(.) = 'command')"
+ "and (@type = 'checkbox' or @type = 'radio'))"
+ )
def xpath_lang_function(self, xpath, function):
- if function.argument_types() not in (['STRING'], ['IDENT']):
+ if function.argument_types() not in (["STRING"], ["IDENT"]):
raise ExpressionError(
- "Expected a single string or ident for :lang(), got %r"
- % function.arguments)
+ "Expected a single string or ident for :lang(), got %r" % function.arguments
+ )
value = function.arguments[0].value
return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat("
- # XPath 1.0 has no lower-case function...
- "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
- "'abcdefghijklmnopqrstuvwxyz'), "
- "'-'), %s)]"
- % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-"))
+ )
def xpath_link_pseudo(self, xpath):
- return xpath.add_condition("@href and "
- "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
+ return xpath.add_condition(
+ "@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
+ )
# Links are never visited, the implementation for :visited is the same
# as in GenericTranslator
def xpath_disabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition('''
+ return xpath.add_condition(
+ """
(
@disabled and
(
@@ -754,13 +744,15 @@ def xpath_disabled_pseudo(self, xpath):
)
and ancestor::fieldset[@disabled]
)
- ''')
+ """
+ )
# FIXME: in the second half, add "and is not a descendant of that
# fieldset element's first legend element child, if any."
def xpath_enabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition('''
+ return xpath.add_condition(
+ """
(
@href and (
name(.) = 'a' or
@@ -788,7 +780,8 @@ def xpath_enabled_pseudo(self, xpath):
@disabled or ancestor::optgroup[@disabled]
)
)
- ''')
+ """
+ )
# FIXME: ... or "li elements that are children of menu elements,
# and that have a child element that defines a command, if the first
# such element's Disabled State facet is false (not disabled)".
diff --git a/docs/conf.py b/docs/conf.py
index 62b5202..9dc2575 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -52,7 +52,7 @@
# The full version, including alpha/beta/rc tags.
with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file:
init_py = init_file.read()
-release = re.search("VERSION = '([^']+)'", init_py).group(1)
+release = re.search('VERSION = "([^"]+)"', init_py).group(1)
# The short X.Y version.
version = release.rstrip('dev')
diff --git a/pyproject.toml b/pyproject.toml
index b409f47..57a5583 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,2 @@
[tool.black]
line-length = 99
-exclude = 'cssselect/|tests/'
diff --git a/setup.py b/setup.py
index 3a0bea0..f95721d 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
README = readme_file.read()
with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file:
INIT_PY = init_file.read()
-VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1)
+VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1)
setup(
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
index bd37875..ba46d8a 100644
--- a/tests/test_cssselect.py
+++ b/tests/test_cssselect.py
@@ -21,17 +21,23 @@
import unittest
from lxml import etree, html
-from cssselect import (parse, GenericTranslator, HTMLTranslator,
- SelectorSyntaxError, ExpressionError)
-from cssselect.parser import (tokenize, parse_series, _unicode,
- FunctionalPseudoElement)
+from cssselect import (
+ parse,
+ GenericTranslator,
+ HTMLTranslator,
+ SelectorSyntaxError,
+ ExpressionError,
+)
+from cssselect.parser import tokenize, parse_series, _unicode, FunctionalPseudoElement
from cssselect.xpath import _unicode_safe_getattr, XPathExpr
if sys.version_info[0] < 3:
# Python 2
def u(text):
- return text.decode('utf8')
+ return text.decode("utf8")
+
+
else:
# Python 3
def u(text):
@@ -41,8 +47,8 @@ def u(text):
class TestCssselect(unittest.TestCase):
def test_tokenizer(self):
tokens = [
- _unicode(item) for item in tokenize(
- u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
+ _unicode(item) for item in tokenize(u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))
+ ]
assert tokens == [
u(""),
"",
@@ -69,8 +75,7 @@ def repr_parse(css):
selectors = parse(css)
for selector in selectors:
assert selector.pseudo_element is None
- return [repr(selector.parsed_tree).replace("(u'", "('")
- for selector in selectors]
+ return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors]
def parse_many(first, *others):
result = repr_parse(first)
@@ -78,92 +83,91 @@ def parse_many(first, *others):
assert repr_parse(other) == result
return result
- assert parse_many('*') == ['Element[*]']
- assert parse_many('*|*') == ['Element[*]']
- assert parse_many('*|foo') == ['Element[foo]']
- assert parse_many('|foo') == ['Element[foo]']
- assert parse_many('foo|*') == ['Element[foo|*]']
- assert parse_many('foo|bar') == ['Element[foo|bar]']
+ assert parse_many("*") == ["Element[*]"]
+ assert parse_many("*|*") == ["Element[*]"]
+ assert parse_many("*|foo") == ["Element[foo]"]
+ assert parse_many("|foo") == ["Element[foo]"]
+ assert parse_many("foo|*") == ["Element[foo|*]"]
+ assert parse_many("foo|bar") == ["Element[foo|bar]"]
# This will never match, but it is valid:
- assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
- assert parse_many(
- 'div>.foo',
- 'div> .foo',
- 'div >.foo',
- 'div > .foo',
- 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
- ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
- assert parse_many('td.foo,.bar',
- 'td.foo, .bar',
- 'td.foo\t\r\n\f ,\t\r\n\f .bar'
- ) == [
- 'Class[Element[td].foo]',
- 'Class[Element[*].bar]'
+ assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"]
+ assert (
+ parse_many(
+ "div>.foo",
+ "div> .foo",
+ "div >.foo",
+ "div > .foo",
+ "div \n> \t \t .foo",
+ "div\r>\n\n\n.foo",
+ "div\f>\f.foo",
+ )
+ == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"]
+ )
+ assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [
+ "Class[Element[td].foo]",
+ "Class[Element[*].bar]",
+ ]
+ assert parse_many("div, td.foo, div.bar span") == [
+ "Element[div]",
+ "Class[Element[td].foo]",
+ "CombinedSelector[Class[Element[div].bar] Element[span]]",
+ ]
+ assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"]
+ assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
+ assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
+ assert parse_many("td :first") == [
+ "CombinedSelector[Element[td]