From a08663df3f94a14d4a1c03429649f5ad2e430b7c Mon Sep 17 00:00:00 2001 From: Varialus Date: Wed, 18 Apr 2012 18:28:20 -0600 Subject: [PATCH 001/208] Fixed case sensitive matching on lxml stable, but patched for external cssselect, on Windows with Python 2.7 64-bit. --- cssselect/xpath.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 23a165c..aa8d376 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -249,7 +249,14 @@ def xpath_hash(self, id_selector): def xpath_element(self, selector): """Translate a type or universal selector.""" if selector.namespace == '*': - element = selector.element.lower() + # Fixed case sensitive matching on lxml 2.3.4 patched for external cssselect with Python 2.7 64-bit on Windows. + # Case insensitive matching is not working unless source elements are lower case. + # For HTMLTranslator, I kept the existing behavior of setting the element to lower case. + # "...in HTML, element names are case-insensitive, but in XML they are case-sensitive." + # http://www.w3.org/TR/CSS2/selector.html#pattern-matching + element = selector.element + if isinstance(self, HTMLTranslator): + element = element.lower() else: # FIXME: Should we lowercase here? element = '%s:%s' % (selector.namespace, selector.element) From 35a2f57fefcbc92ecc56719eda59f82b00fc1238 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Apr 2012 15:40:46 +0200 Subject: [PATCH 002/208] Fix case-sensitivity issues * (Functional) pseudo-classes are always case-insensitive * Add the 'xhtml' flag * Element names and attribute names are case sensitive for HTML, but not XHTML or XML. --- CHANGES | 9 +++++ cssselect/__init__.py | 2 +- cssselect/parser.py | 5 +-- cssselect/tests.py | 80 ++++++++++++++++++++++++------------------- cssselect/xpath.py | 80 ++++++++++++++++++++++++++++++++----------- docs/index.rst | 3 -- 6 files changed, 117 insertions(+), 62 deletions(-) diff --git a/CHANGES b/CHANGES index 4583cef..1ffae2d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,15 @@ Changelog ========= +Version 0.5 +----------- + +Not released yet. + +* Fix case sensitivity issues. +* Add the ``xhtml`` parameter for :class:`HTMLTranslator`. + + Version 0.4 ----------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 3129a42..4e044f0 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -17,5 +17,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.4' +VERSION = '0.5' __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index f6b42c8..11ff6be 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -376,7 +376,8 @@ def parse_simple_selector(stream, inside_negation=False): if stream.peek() == '(': stream.next() stream.skip_whitespace() - if ident == 'not': + is_negation = ident.lower() == 'not' + if is_negation: if inside_negation: raise SelectorSyntaxError('Got nested :not()') argument, argument_pseudo_element = parse_simple_selector( @@ -396,7 +397,7 @@ def parse_simple_selector(stream, inside_negation=False): if not next == ')': raise SelectorSyntaxError( "Expected ')', got '%s'" % next) - if ident == 'not': + if is_negation: result = Negation(result, argument) else: result = Function(result, ident, argument) diff --git a/cssselect/tests.py b/cssselect/tests.py index 086f01f..2ee1ef9 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -284,72 +284,72 @@ def xpath(css): return str(GenericTranslator().css_to_xpath(css, prefix='')) assert xpath('*') == "*" - assert xpath('E') == "e" - assert xpath('E[foo]') == "e[@foo]" - assert xpath('E[foo="bar"]') == "e[@foo = 'bar']" - assert xpath('E[foo~="bar"]') == ( + assert xpath('e') == "e" + assert xpath('e[foo]') == "e[@foo]" + assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" + assert xpath('e[foo~="bar"]') == ( "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]") - assert xpath('E[foo^="bar"]') == ( + assert xpath('e[foo^="bar"]') == ( "e[@foo and starts-with(@foo, 'bar')]") - assert xpath('E[foo$="bar"]') == ( + assert xpath('e[foo$="bar"]') == ( "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']") - assert xpath('E[foo*="bar"]') == ( + assert xpath('e[foo*="bar"]') == ( "e[@foo and contains(@foo, 'bar')]") - assert xpath('E[hreflang|="en"]') == ( + assert xpath('e[hreflang|="en"]') == ( "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") - assert xpath('E:nth-child(1)') == ( + assert xpath('e:nth-child(1)') == ( "*/*[name() = 'e' and (position() = 1)]") - assert xpath('E:nth-last-child(1)') == ( + assert xpath('e:nth-last-child(1)') == ( "*/*[name() = 'e' and (position() = last() - 1)]") - assert xpath('E:nth-last-child(2n+2)') == ( + assert xpath('e:nth-last-child(2n+2)') == ( "*/*[name() = 'e' and (" "(position() +2) mod -2 = 0 and position() < (last() -2))]") - assert xpath('E:nth-of-type(1)') == ( + assert xpath('e:nth-of-type(1)') == ( "*/e[position() = 1]") - assert xpath('E:nth-last-of-type(1)') == ( + assert xpath('e:nth-last-of-type(1)') == ( "*/e[position() = last() - 1]") - assert xpath('E:nth-last-of-type(1)') == ( + assert xpath('e:nth-last-of-type(1)') == ( "*/e[position() = last() - 1]") - assert xpath('div E:nth-last-of-type(1) .aclass') == ( + assert xpath('div e:nth-last-of-type(1) .aclass') == ( "div/descendant-or-self::*/e[position() = last() - 1]" "/descendant-or-self::*/*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") - assert xpath('E:first-child') == ( + assert xpath('e:first-child') == ( "*/*[name() = 'e' and (position() = 1)]") - assert xpath('E:last-child') == ( + assert xpath('e:last-child') == ( "*/*[name() = 'e' and (position() = last())]") - assert xpath('E:first-of-type') == ( + assert xpath('e:first-of-type') == ( "*/e[position() = 1]") - assert xpath('E:last-of-type') == ( + assert xpath('e:last-of-type') == ( "*/e[position() = last()]") - assert xpath('E:only-child') == ( + assert xpath('e:only-child') == ( "*/*[name() = 'e' and (last() = 1)]") - assert xpath('E:only-of-type') == ( + assert xpath('e:only-of-type') == ( "e[last() = 1]") - assert xpath('E:empty') == ( + assert xpath('e:empty') == ( "e[not(*) and not(normalize-space())]") - assert xpath('E:root') == ( + assert xpath('e:root') == ( "e[not(parent::*)]") - assert xpath('E:contains("foo")') == ( + assert xpath('e:contains("foo")') == ( "e[contains(string(.), 'foo')]") - assert xpath('E:contains(foo)') == ( + assert xpath('e:contains(foo)') == ( "e[contains(string(.), 'foo')]") - assert xpath('E.warning') == ( + assert xpath('e.warning') == ( "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]") - assert xpath('E#myid') == ( + assert xpath('e#myid') == ( "e[@id = 'myid']") - assert xpath('E:not(:nth-child(odd))') == ( + assert xpath('e:not(:nth-child(odd))') == ( "e[not((position() -1) mod 2 = 0 and position() >= 1)]") - assert xpath('E F') == ( + assert xpath('e f') == ( "e/descendant-or-self::*/f") - assert xpath('E > F') == ( + assert xpath('e > f') == ( "e/f") - assert xpath('E + F') == ( + assert xpath('e + f') == ( "e/following-sibling::*[name() = 'f' and (position() = 1)]") - assert xpath('E ~ F') == ( + assert xpath('e ~ f') == ( "e/following-sibling::f") assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") @@ -426,12 +426,17 @@ def pcss(main, *selectors, **kwargs): return result all_ids = pcss('*') + assert len(all_ids) == 27 assert all_ids[:4] == ['html', 'nil', 'nil', 'outer-div'] assert all_ids[-1:] == ['foobar-span'] assert pcss('div') == ['outer-div', 'li-div', 'foobar-div'] + assert pcss('DIV', html_only=True) == [ + 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML assert pcss('div div') == ['li-div'] assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div'] assert pcss('a[name]') == ['name-anchor'] + assert pcss('a[NAme]', html_only=True) == [ + 'name-anchor'] # case-insensitive in HTML: assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor'] assert pcss('a[rel="tag"]') == ['tag-anchor'] assert pcss('a[href*="localhost"]') == ['tag-anchor'] @@ -441,7 +446,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ 'foobar-div'] assert pcss('div[foobar~="cd"]') == [] - assert pcss('*[lang|="en"]', '*[lang|="en-US"]') == ['second-li'] + assert pcss('*[lang|="en"]', '[lang|="en-US"]') == ['second-li'] assert pcss('*[lang|="e"]') == [] assert pcss('li:nth-child(3)') == ['third-li'] assert pcss('li:nth-child(10)') == [] @@ -471,12 +476,12 @@ def pcss(main, *selectors, **kwargs): self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') self.assertRaises(ExpressionError, pcss, 'p:lang(fr)') assert pcss('p:only-of-type') == ['paragraph'] - assert pcss('a:empty') == ['name-anchor'] + assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] assert pcss('li:empty') == [ 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'] assert pcss(':root', 'html:root') == ['html'] assert pcss('li:root', '* :root') == [] - assert pcss('*:contains("link")') == [ + assert pcss('*:contains("link")', ':CONtains("link")') == [ 'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'] assert pcss('*:contains("LInk")') == [] # case sensitive assert pcss('*:contains("e")') == [ @@ -488,7 +493,6 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [ 'third-li', 'fourth-li'] assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li'] - # Need some tests of :not()'] assert pcss('li div', 'li > div', 'div div') == ['li-div'] assert pcss('div > div') == [] assert pcss('div>.c', 'div > .c') == ['first-ol'] @@ -507,6 +511,10 @@ def pcss(main, *selectors, **kwargs): 'fieldset', 'checkbox-disabled'] assert pcss(':enabled', html_only=True) == [ 'checkbox-unchecked', 'checkbox-checked'] + assert pcss('a:not([href])') == ['name-anchor'] + assert pcss('ol :Not(li[class])') == [ + 'first-li', 'second-li', 'li-div', + 'fifth-li', 'sixth-li', 'seventh-li'] def test_select_shakespeare(self): document = html.document_fromstring(HTML_SHAKESPEARE) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index aa8d376..e31b037 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -93,6 +93,10 @@ def join(self, combiner, other): class GenericTranslator(object): """ Translator for "generic" XML documents. + + Everything is case-sensitive, no assumption is made on the meaning + of element names and attribute names. + """ combinator_mapping = { ' ': 'descendant', @@ -116,6 +120,24 @@ class GenericTranslator(object): #: http://www.w3.org/TR/selectors/#id-selectors id_attribute = 'id' + #: The case sensitivity of document language element names, + #: attribute names, and attribute values in selectors depends + #: on the document language. + #: http://www.w3.org/TR/selectors/#casesens + #: + #: When a document language defines one of these as case-insensitive, + #: cssselect assumes that the document parser makes the parsed values + #: lower-case. Making the selector lower-case too makes the comparaison + #: case-insensitive. + #: + #: In HTML, element names and attributes names (but not attribute values) + #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4 + #: and HTMLParser make them lower-case in their parse result, so + #: the assumption holds. + lower_case_element_names = False + lower_case_attribute_names = False + lower_case_attribute_values = False + def css_to_xpath(self, css, prefix='descendant-or-self::'): """Translate a *group of selectors* to XPath. @@ -201,7 +223,7 @@ def xpath_negation(self, negation): def xpath_function(self, function): """Translate a functional pseudo-class.""" - method = 'xpath_%s_function' % function.name.replace('-', '_') + method = 'xpath_%s_function' % function.name.replace('-', '_').lower() method = getattr(self, method, None) if not method: raise ExpressionError( @@ -210,7 +232,7 @@ def xpath_function(self, function): def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" - method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') + method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_').lower() method = getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? @@ -226,12 +248,19 @@ def xpath_attrib(self, selector): raise ExpressionError( "Unknown attribute operator: %r" % selector.operator) method = getattr(self, 'xpath_attrib_%s' % operator) - # FIXME: what if attrib is *? + if self.lower_case_attribute_names: + name = selector.attrib.lower() + else: + name = selector.attrib if selector.namespace == '*': - name = '@' + selector.attrib + name = '@' + name + else: + name = '@%s:%s' % (selector.namespace, name) + if self.lower_case_attribute_values: + value = selector.value.lower() else: - name = '@%s:%s' % (selector.namespace, selector.attrib) - return method(self.xpath(selector.selector), name, selector.value) + value = selector.value + return method(self.xpath(selector.selector), name, value) def xpath_class(self, class_selector): """Translate a class selector.""" @@ -243,23 +272,18 @@ def xpath_class(self, class_selector): def xpath_hash(self, id_selector): """Translate an ID selector.""" xpath = self.xpath(id_selector.selector) - return xpath.add_condition('@%s = %s' % ( - self.id_attribute, self.xpath_literal(id_selector.id))) + return self.xpath_attrib_equals(xpath, '@id', id_selector.id) def xpath_element(self, selector): """Translate a type or universal selector.""" - if selector.namespace == '*': - # Fixed case sensitive matching on lxml 2.3.4 patched for external cssselect with Python 2.7 64-bit on Windows. - # Case insensitive matching is not working unless source elements are lower case. - # For HTMLTranslator, I kept the existing behavior of setting the element to lower case. - # "...in HTML, element names are case-insensitive, but in XML they are case-sensitive." - # http://www.w3.org/TR/CSS2/selector.html#pattern-matching - element = selector.element - if isinstance(self, HTMLTranslator): - element = element.lower() + if self.lower_case_element_names: + element = selector.element.lower() else: - # FIXME: Should we lowercase here? - element = '%s:%s' % (selector.namespace, selector.element) + element = selector.element + if selector.namespace != '*': + # Namespace prefixes are case-sensitive. + # http://www.w3.org/TR/css3-namespace/#prefixes + element = '%s:%s' % (selector.namespace, element) return XPathExpr(element=element) @@ -465,8 +489,24 @@ def xpath_attrib_substringmatch(self, xpath, name, value): class HTMLTranslator(GenericTranslator): """ - Translator for HTML documents. + Translator for (X)HTML documents. + + Has a more useful implementation of some pseudo-classes, based on + HTML-specific element names and attribute names. + The API is the same as :class:`GenericTranslator`. + + :param xhtml: + If false (the default), element names and attribute names + are case-insensitive. + """ + def __init__(self, xhtml=False): + self.xhtml = xhtml # Might be useful for sub-classes? + if not xhtml: + # See their definition in GenericTranslator. + self.lower_case_element_names = True + self.lower_case_attribute_names = True + def xpath_checked_pseudo(self, xpath): # FIXME: is this really all the elements? return xpath.add_condition( diff --git a/docs/index.rst b/docs/index.rst index 0c060fc..4aed4c2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -57,9 +57,6 @@ selectors. For example, ``div, h1.title + p`` is a group of 2 selectors. :members: css_to_xpath, selector_to_xpath .. autoclass:: HTMLTranslator - - The API is the same as :class:`GenericTranslator`. - .. autoexception:: SelectorError .. autoexception:: SelectorSyntaxError .. autoexception:: ExpressionError From 95e655dfcf4454ab9140ff2decd9c1ad28c38d70 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Apr 2012 19:51:13 +0200 Subject: [PATCH 003/208] Reduce nesting level in the parser. --- cssselect/parser.py | 53 +++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 11ff6be..31b086b 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -373,36 +373,33 @@ def parse_simple_selector(stream, inside_negation=False): # Any new pseudo-element must have two. pseudo_element = ident continue - if stream.peek() == '(': - stream.next() - stream.skip_whitespace() - is_negation = ident.lower() == 'not' - if is_negation: - if inside_negation: - raise SelectorSyntaxError('Got nested :not()') - argument, argument_pseudo_element = parse_simple_selector( - stream, inside_negation=True) - if argument_pseudo_element: - raise SelectorSyntaxError( - 'Pseudo-elements are not allowed inside :not()') - else: - peek = stream.peek() - if isinstance(peek, (Symbol, String)): - argument = stream.next() - else: - raise SelectorSyntaxError( - "Expected argument, got '%s'" % peek) - stream.skip_whitespace() - next = stream.next() - if not next == ')': + if stream.peek() != '(': + result = Pseudo(result, ident) + continue + stream.next() + stream.skip_whitespace() + if ident.lower() == 'not': + if inside_negation: + raise SelectorSyntaxError('Got nested :not()') + argument, argument_pseudo_element = parse_simple_selector( + stream, inside_negation=True) + if argument_pseudo_element: raise SelectorSyntaxError( - "Expected ')', got '%s'" % next) - if is_negation: - result = Negation(result, argument) - else: - result = Function(result, ident, argument) + 'Pseudo-elements are not allowed inside :not()') + result = Negation(result, argument) else: - result = Pseudo(result, ident) + peek = stream.peek() + if isinstance(peek, (Symbol, String)): + argument = stream.next() + else: + raise SelectorSyntaxError( + "Expected argument, got '%s'" % peek) + result = Function(result, ident, argument) + stream.skip_whitespace() + next = stream.next() + if not next == ')': + raise SelectorSyntaxError( + "Expected ')', got '%s'" % next) continue else: raise SelectorSyntaxError( From c6137ceed4e596c313f18a46d9b194fa73a8270d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Apr 2012 19:54:51 +0200 Subject: [PATCH 004/208] Aesthetics. --- docs/index.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 4aed4c2..079d2d7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,7 +45,7 @@ User API ======== In CSS3 terms, a `group of selectors`_ is a sequence of comma-separated -selectors. For example, ``div, h1.title + p`` is a group of 2 selectors. +selectors. For example, ``div, h1.title + p`` is a group of 2 selectors. .. _group of selectors: http://www.w3.org/TR/selectors/#grouping @@ -57,6 +57,10 @@ selectors. For example, ``div, h1.title + p`` is a group of 2 selectors. :members: css_to_xpath, selector_to_xpath .. autoclass:: HTMLTranslator + +Exceptions +---------- + .. autoexception:: SelectorError .. autoexception:: SelectorSyntaxError .. autoexception:: ExpressionError From 3d8fd09aac1578497de0bec7b9fdc164f88af62c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 11:07:25 +0200 Subject: [PATCH 005/208] Implement :link per the HTML5 spec. --- CHANGES | 5 ++++- cssselect/tests.py | 35 +++++++++++++++++++++++++---------- cssselect/xpath.py | 10 +++++++--- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/CHANGES b/CHANGES index 1ffae2d..f7d0c00 100644 --- a/CHANGES +++ b/CHANGES @@ -7,7 +7,10 @@ Version 0.5 Not released yet. * Fix case sensitivity issues. -* Add the ``xhtml`` parameter for :class:`HTMLTranslator`. +* Implement :class:`HTMLTranslator` based on the `HTML5 specification`_ + rather than guessing; add the ``xhtml`` parameter. + +.. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors Version 0.4 diff --git a/cssselect/tests.py b/cssselect/tests.py index 2ee1ef9..b60acf0 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -20,7 +20,7 @@ import operator import unittest -from lxml import html +from lxml import etree, html from cssselect import (parse, GenericTranslator, HTMLTranslator, SelectorSyntaxError, ExpressionError) from cssselect.parser import tokenize, parse_series @@ -401,7 +401,7 @@ def test_series(self): assert parse_series('5') == (0, 5) def test_select(self): - document = html.document_fromstring(HTML_IDS) + document = etree.fromstring(HTML_IDS) sort_key = dict( (el, count) for count, el in enumerate(document.getiterator()) ).__getitem__ @@ -426,8 +426,9 @@ def pcss(main, *selectors, **kwargs): return result all_ids = pcss('*') - assert len(all_ids) == 27 - assert all_ids[:4] == ['html', 'nil', 'nil', 'outer-div'] + assert len(all_ids) == 32 + assert all_ids[:6] == [ + 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'] assert all_ids[-1:] == ['foobar-span'] assert pcss('div') == ['outer-div', 'li-div', 'foobar-div'] assert pcss('DIV', html_only=True) == [ @@ -503,9 +504,16 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li'] assert pcss('#outer-div:first-child') == ['outer-div'] assert pcss('#outer-div :first-child') == [ - 'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled'] + 'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled', + 'area-href'] assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss(':link', html_only=True) == pcss('a[href]') + + + assert pcss(':link', html_only=True) == [ + 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] + assert pcss(':visited', html_only=True) == [] + + assert pcss(':checked', html_only=True) == ['checkbox-checked'] assert pcss(':disabled', html_only=True) == [ 'fieldset', 'checkbox-disabled'] @@ -590,7 +598,10 @@ def count(selector): assert count('div[class~=dialog]') == 51 # ? Seems right HTML_IDS = ''' - + + + +

diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e31b037..0d8a3f6 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -491,10 +491,13 @@ class HTMLTranslator(GenericTranslator): """ Translator for (X)HTML documents. - Has a more useful implementation of some pseudo-classes, based on - HTML-specific element names and attribute names. + Has a more useful implementation of some pseudo-classes based on + HTML-specific element names and attribute names, as described in + the `HTML5 specification`_. It assumes no-quirks mode. The API is the same as :class:`GenericTranslator`. + .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors + :param xhtml: If false (the default), element names and attribute names are case-insensitive. @@ -514,7 +517,8 @@ def xpath_checked_pseudo(self, xpath): "(@checked and name(.) = 'input')") def xpath_link_pseudo(self, xpath): - return xpath.add_condition("@href and name(.) = 'a'") + return xpath.add_condition("@href and " + "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") # Links are never visited, the implementation for :visited is the same # as in GenericTranslator From 1c12e2aa0b57984f8889fb8de617d5461eb88221 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 11:44:46 +0200 Subject: [PATCH 006/208] Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6) --- cssselect/tests.py | 34 ++++++++++++++++++---------------- cssselect/xpath.py | 31 +++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index b60acf0..79d5f19 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -426,7 +426,6 @@ def pcss(main, *selectors, **kwargs): return result all_ids = pcss('*') - assert len(all_ids) == 32 assert all_ids[:6] == [ 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'] assert all_ids[-1:] == ['foobar-span'] @@ -472,8 +471,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol:nth-last-of-type(1)') == ['first-ol'] assert pcss('span:only-child') == ['foobar-span'] assert pcss('li div:only-child') == ['li-div'] - assert pcss('div *:only-child') == [ - 'li-div', 'checkbox-disabled', 'foobar-span'] + assert pcss('div *:only-child') == ['li-div', 'foobar-span'] self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') self.assertRaises(ExpressionError, pcss, 'p:lang(fr)') assert pcss('p:only-of-type') == ['paragraph'] @@ -504,25 +502,25 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li'] assert pcss('#outer-div:first-child') == ['outer-div'] assert pcss('#outer-div :first-child') == [ - 'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled', - 'area-href'] + 'name-anchor', 'first-li', 'li-div', 'p-b', + 'checkbox-fieldset-disabled', 'area-href'] assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] + assert pcss('a:not([href])') == ['name-anchor'] + assert pcss('ol :Not(li[class])') == [ + 'first-li', 'second-li', 'li-div', + 'fifth-li', 'sixth-li', 'seventh-li'] - + # HTML-specific assert pcss(':link', html_only=True) == [ 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] assert pcss(':visited', html_only=True) == [] - + assert pcss(':enabled', html_only=True) == [ + 'link-href', 'tag-anchor', 'nofollow-anchor', + 'checkbox-unchecked', 'checkbox-checked', 'area-href'] + assert pcss(':disabled', html_only=True) == [ + 'checkbox-disabled', 'fieldset', 'checkbox-fieldset-disabled'] assert pcss(':checked', html_only=True) == ['checkbox-checked'] - assert pcss(':disabled', html_only=True) == [ - 'fieldset', 'checkbox-disabled'] - assert pcss(':enabled', html_only=True) == [ - 'checkbox-unchecked', 'checkbox-checked'] - assert pcss('a:not([href])') == ['name-anchor'] - assert pcss('ol :Not(li[class])') == [ - 'first-li', 'second-li', 'li-div', - 'fifth-li', 'sixth-li', 'seventh-li'] def test_select_shakespeare(self): document = html.document_fromstring(HTML_SHAKESPEARE) @@ -624,9 +622,13 @@ def count(selector): hi there guy + + +

- + +

Date: Fri, 20 Apr 2012 11:52:03 +0200 Subject: [PATCH 007/208] Implement :checked per the HTML5 spec. --- cssselect/tests.py | 13 +++++++++---- cssselect/xpath.py | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 79d5f19..c8a0bf4 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -516,11 +516,13 @@ def pcss(main, *selectors, **kwargs): assert pcss(':visited', html_only=True) == [] assert pcss(':enabled', html_only=True) == [ 'link-href', 'tag-anchor', 'nofollow-anchor', - 'checkbox-unchecked', 'checkbox-checked', 'area-href'] + 'checkbox-unchecked', 'text-checked', 'checkbox-checked', + 'area-href'] assert pcss(':disabled', html_only=True) == [ - 'checkbox-disabled', 'fieldset', 'checkbox-fieldset-disabled'] - - assert pcss(':checked', html_only=True) == ['checkbox-checked'] + 'checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', + 'checkbox-fieldset-disabled'] + assert pcss(':checked', html_only=True) == [ + 'checkbox-checked', 'checkbox-disabled-checked'] def test_select_shakespeare(self): document = html.document_fromstring(HTML_SHAKESPEARE) @@ -623,9 +625,12 @@ def count(selector): guy + +

diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 5e25cea..773368d 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -514,7 +514,9 @@ def xpath_checked_pseudo(self, xpath): # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " - "(@checked and name(.) = 'input')") + "(@checked " + "and (name(.) = 'input' or name(.) = 'command')" + "and (@type = 'checkbox' or @type = 'radio'))") def xpath_link_pseudo(self, xpath): return xpath.add_condition("@href and " From 0e55bd6a0540125028e828370863675ba0edfdbc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 14:55:34 +0200 Subject: [PATCH 008/208] Fix specificity for :not() --- .coveragerc | 7 +++++++ .gitignore | 1 + CHANGES | 1 + cssselect/parser.py | 4 ++-- cssselect/tests.py | 7 +++++++ 5 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..3481e01 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +branch = True + +[report] +exclude_lines = + pragma: no cover + def __repr__ diff --git a/.gitignore b/.gitignore index 36120ab..4c89f4c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /MANIFEST /dist /docs/_build +/.coverage diff --git a/CHANGES b/CHANGES index f7d0c00..3808dbf 100644 --- a/CHANGES +++ b/CHANGES @@ -9,6 +9,7 @@ Not released yet. * Fix case sensitivity issues. * Implement :class:`HTMLTranslator` based on the `HTML5 specification`_ rather than guessing; add the ``xhtml`` parameter. +* Bug fix: specificity for :not() raised an exception .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors diff --git a/cssselect/parser.py b/cssselect/parser.py index 31b086b..285e3eb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -14,7 +14,7 @@ import re -try: +try: # pragma: no cover _unicode = unicode _unichr = unichr except NameError: @@ -140,7 +140,7 @@ def __repr__(self): def specificity(self): a1, b1, c1 = self.selector.specificity() - a2, b2, c2 = self.sub_selector.specificity() + a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 diff --git a/cssselect/tests.py b/cssselect/tests.py index c8a0bf4..b2ac856 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -187,6 +187,13 @@ def specificity(css): assert specificity('[baz^="4"]') == (0, 1, 0) assert specificity('#lipsum') == (1, 0, 0) + assert specificity(':not(*)') == (0, 0, 0) + assert specificity(':not(foo)') == (0, 0, 1) + assert specificity(':not(.foo)') == (0, 1, 0) + assert specificity(':not([foo])') == (0, 1, 0) + assert specificity(':not(:empty)') == (0, 1, 0) + assert specificity(':not(#foo)') == (1, 0, 0) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) assert specificity('foo::before') == (0, 0, 2) From 5bc70909690185a17c6b7d763068c52c7443eb7f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 15:08:34 +0200 Subject: [PATCH 009/208] Fix the translation of :not(*) --- .coveragerc | 1 + CHANGES | 3 ++- cssselect/parser.py | 10 ++++------ cssselect/tests.py | 13 +++++++++++++ cssselect/xpath.py | 5 ++++- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.coveragerc b/.coveragerc index 3481e01..e4ab9fb 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,3 +5,4 @@ branch = True exclude_lines = pragma: no cover def __repr__ + if sys.version_info diff --git a/CHANGES b/CHANGES index 3808dbf..f170138 100644 --- a/CHANGES +++ b/CHANGES @@ -9,7 +9,8 @@ Not released yet. * Fix case sensitivity issues. * Implement :class:`HTMLTranslator` based on the `HTML5 specification`_ rather than guessing; add the ``xhtml`` parameter. -* Bug fix: specificity for :not() raised an exception +* Bug fixes: specificity for ``:not()`` (any argument), translation for + ``:not(*)`` .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors diff --git a/cssselect/parser.py b/cssselect/parser.py index 285e3eb..822f028 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -11,14 +11,14 @@ """ +import sys import re -try: # pragma: no cover +if sys.version_info[0] < 3: _unicode = unicode _unichr = unichr -except NameError: - # Python 3 +else: _unicode = str _unichr = chr @@ -276,7 +276,6 @@ def parse(css): try: return list(parse_selector_group(stream)) except SelectorSyntaxError: - import sys e = sys.exc_info()[1] message = "%s at %s -> %r" % ( e, stream.used, stream.peek()) @@ -359,7 +358,7 @@ def parse_simple_selector(stream, inside_negation=False): next = stream.next() if next != ']': raise SelectorSyntaxError( - "] expected, got '%s'" % next) + "Expected ']', got '%s'" % next) continue elif peek == '::': stream.next() @@ -609,7 +608,6 @@ def tokenize_symbol(s, pos): try: result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape') except UnicodeDecodeError: - import sys e = sys.exc_info()[1] raise SelectorSyntaxError( "Bad symbol %r: %s" % (result, e)) diff --git a/cssselect/tests.py b/cssselect/tests.py index b2ac856..77f8658 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -270,6 +270,10 @@ def get_error(css): "Expected selector, got 'a' at " "[Token('[', 0), Symbol('href', 1), Token(']', 5)]" " -> Symbol('a', 6)") + assert get_error('[rel=stylesheet]') == None + assert get_error('[rel=stylesheet') == ( + "Expected ']', got 'None' at [Token('[', 0), Symbol('rel', 1), " + "Token('=', 4), Symbol('stylesheet', 5)] -> None") # Mis-placed pseudo-elements assert get_error('a:before:empty') == ( @@ -284,6 +288,10 @@ def get_error(css): "Pseudo-elements are not allowed inside :not() at " "[Token(':', 0), Symbol('not', 1), Token('(', 4), Token(':', 5)," " Symbol('before', 6)] -> Token(')', 12)") + assert get_error(':not(:not(a))') == ( + "Got nested :not() at [Token(':', 0), Symbol('not', 1), " + "Token('(', 4), Token(':', 5), Symbol('not', 6), Token('(', 9)]" + " -> Symbol('a', 10)") def test_translation(self): @@ -339,6 +347,8 @@ def xpath(css): "e[not(*) and not(normalize-space())]") assert xpath('e:root') == ( "e[not(parent::*)]") + assert xpath('e:hover') == ( + "e[0]") # never matches assert xpath('e:contains("foo")') == ( "e[contains(string(.), 'foo')]") assert xpath('e:contains(foo)') == ( @@ -350,6 +360,8 @@ def xpath(css): "e[@id = 'myid']") assert xpath('e:not(:nth-child(odd))') == ( "e[not((position() -1) mod 2 = 0 and position() >= 1)]") + assert xpath('e:not(*)') == ( + "e[0]") # never matches assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( @@ -512,6 +524,7 @@ def pcss(main, *selectors, **kwargs): 'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-fieldset-disabled', 'area-href'] assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] + assert pcss(':not(*)') == [] assert pcss('a:not([href])') == ['name-anchor'] assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 773368d..6fbdbcf 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -219,7 +219,10 @@ def xpath_negation(self, negation): xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() - return xpath.add_condition('not(%s)' % sub_xpath.condition) + if sub_xpath.condition: + return xpath.add_condition('not(%s)' % sub_xpath.condition) + else: + return xpath.add_condition('0') def xpath_function(self, function): """Translate a functional pseudo-class.""" From ab9992212b9620de03028afe29e404852bad42b3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 15:26:45 +0200 Subject: [PATCH 010/208] Fix some cases for the parsing of series (see #7) --- cssselect/parser.py | 28 ++++++++++++---------------- cssselect/tests.py | 14 +++++++++++++- cssselect/xpath.py | 5 ++++- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 822f028..ae4a155 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -356,10 +356,11 @@ def parse_simple_selector(stream, inside_negation=False): stream.next() result = parse_attrib(result, stream) next = stream.next() - if next != ']': + if next == ']': + continue + else: raise SelectorSyntaxError( "Expected ']', got '%s'" % next) - continue elif peek == '::': stream.next() pseudo_element = stream.next_symbol() @@ -396,10 +397,11 @@ def parse_simple_selector(stream, inside_negation=False): result = Function(result, ident, argument) stream.skip_whitespace() next = stream.next() - if not next == ')': + if next == ')': + continue + else: raise SelectorSyntaxError( "Expected ')', got '%s'" % next) - continue else: raise SelectorSyntaxError( "Expected selector, got '%s'" % peek) @@ -439,16 +441,12 @@ def parse_attrib(selector, stream): def parse_series(s): """ - Parses things like '1n+2', or 'an+b' generally, returning (a, b) + Parses things like '1n+2', or 'an+b' generally + + :raises: :class:`ValueError` + :returns: :``(a, b)`` + """ - if isinstance(s, Element): - s = s._format_element() - if not s or s == '*': - # Happens when there's nothing, which the CSS parser thinks of as * - return (0, 0) - if isinstance(s, int): - # Happens when you just get a number - return (0, s) if s == 'odd': return (2, 1) elif s == 'even': @@ -456,7 +454,7 @@ def parse_series(s): elif s == 'n': return (1, 0) if 'n' not in s: - # Just a b + # Just b return (0, int(s)) a, b = s.split('n', 1) if not a: @@ -467,8 +465,6 @@ def parse_series(s): a = int(a) if not b: b = 0 - elif b == '-' or b == '+': - b = int(b+'1') else: b = int(b) return (a, b) diff --git a/cssselect/tests.py b/cssselect/tests.py index 77f8658..7e24ee9 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -271,9 +271,16 @@ def get_error(css): "[Token('[', 0), Symbol('href', 1), Token(']', 5)]" " -> Symbol('a', 6)") assert get_error('[rel=stylesheet]') == None + assert get_error('[rel:stylesheet]') == ( + "Operator expected, got ':' at [Token('[', 0), Symbol('rel', 1), " + "Token(':', 4)] -> Symbol('stylesheet', 5)") assert get_error('[rel=stylesheet') == ( "Expected ']', got 'None' at [Token('[', 0), Symbol('rel', 1), " "Token('=', 4), Symbol('stylesheet', 5)] -> None") + assert get_error(':lang(fr)') == None + assert get_error(':lang(fr') == ( + "Expected ')', got 'None' at [Token(':', 0), Symbol('lang', 1), " + "Token('(', 5), Symbol('fr', 6)] -> None") # Mis-placed pseudo-elements assert get_error('a:before:empty') == ( @@ -372,7 +379,10 @@ def xpath(css): "e/following-sibling::f") assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") - self.assertRaises(ExpressionError, xpath, 'p *:only-of-type') + self.assertRaises(ExpressionError, xpath, 'p :only-of-type') + self.assertRaises(ExpressionError, xpath, ':lang(fr)') + self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') + def test_unicode(self): if sys.version_info[0] >= 3: @@ -417,6 +427,8 @@ def test_series(self): assert parse_series('even') == (2, 0) assert parse_series('3n') == (3, 0) assert parse_series('n') == (1, 0) + assert parse_series('+n') == (1, 0) + assert parse_series('-n') == (-1, 0) assert parse_series('5') == (0, 5) def test_select(self): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 6fbdbcf..de930d7 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -315,7 +315,10 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): - a, b = parse_series(function.arguments) + try: + a, b = parse_series(function.arguments) + except ValueError: + raise ExpressionError("Invalid series: '%r'" % function.arguments) if not a and not b and not last: # a=0 means nothing is returned... return xpath.add_condition('false() and position() = 0') From b6730b5ab440a06fe8b4d4a897b7988f84760590 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 15:39:08 +0200 Subject: [PATCH 011/208] Remove dead code. --- cssselect/parser.py | 20 +++++++------------- cssselect/tests.py | 7 +++++-- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index ae4a155..19447d3 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -589,18 +589,15 @@ def tokenize_escaped_string(s, pos): def tokenize_symbol(s, pos): start = pos match = _illegal_symbol.search(s, pos=pos) - if not match: - # Goes to end of s - return s[start:], len(s) - if match.start() == pos: - raise SelectorSyntaxError( - "Unexpected symbol: %r" % s[pos]) - if not match: - result = s[start:] - pos = len(s) - else: + if match: + if match.start() == pos: + raise SelectorSyntaxError( + "Unexpected symbol: %r" % s[pos]) result = s[start:match.start()] pos = match.start() + else: + result = s[start:] + pos = len(s) try: result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape') except UnicodeDecodeError: @@ -636,9 +633,6 @@ def next(self): except StopIteration: return None - def __iter__(self): - return iter(self.next, None) - def peek(self): if not self._peeking: try: diff --git a/cssselect/tests.py b/cssselect/tests.py index 7e24ee9..0c3a534 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -281,6 +281,11 @@ def get_error(css): assert get_error(':lang(fr') == ( "Expected ')', got 'None' at [Token(':', 0), Symbol('lang', 1), " "Token('(', 5), Symbol('fr', 6)] -> None") + assert get_error(':contains("foo') == ( + "Expected closing \" for string in: 'foo' at " + "[Token(':', 0), Symbol('contains', 1), Token('(', 9)] -> None") + assert get_error('foo!') == ( + "Unexpected symbol: '!' at [Symbol('foo', 0)] -> None") # Mis-placed pseudo-elements assert get_error('a:before:empty') == ( @@ -300,7 +305,6 @@ def get_error(css): "Token('(', 4), Token(':', 5), Symbol('not', 6), Token('(', 9)]" " -> Symbol('a', 10)") - def test_translation(self): def xpath(css): return str(GenericTranslator().css_to_xpath(css, prefix='')) @@ -383,7 +387,6 @@ def xpath(css): self.assertRaises(ExpressionError, xpath, ':lang(fr)') self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') - def test_unicode(self): if sys.version_info[0] >= 3: css = '.a\xc1b' From b551594ebf75064a7db299a9e61a6ae47d9d4190 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 16:10:11 +0200 Subject: [PATCH 012/208] Better test coverage. --- .coveragerc | 1 + CHANGES | 3 +-- cssselect/parser.py | 6 +++--- cssselect/tests.py | 30 +++++++++++++++++++++++------- cssselect/xpath.py | 45 ++++++++++++++++----------------------------- 5 files changed, 44 insertions(+), 41 deletions(-) diff --git a/.coveragerc b/.coveragerc index e4ab9fb..2ee5ff3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,3 +6,4 @@ exclude_lines = pragma: no cover def __repr__ if sys.version_info + if __name__ == '__main__': diff --git a/CHANGES b/CHANGES index f170138..196da78 100644 --- a/CHANGES +++ b/CHANGES @@ -9,8 +9,7 @@ Not released yet. * Fix case sensitivity issues. * Implement :class:`HTMLTranslator` based on the `HTML5 specification`_ rather than guessing; add the ``xhtml`` parameter. -* Bug fixes: specificity for ``:not()`` (any argument), translation for - ``:not(*)`` +* Several bug fixes and better test coverage. .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors diff --git a/cssselect/parser.py b/cssselect/parser.py index 19447d3..9139459 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -43,7 +43,7 @@ class Selector(object): Represents a selector with an optional pseudo element. """ def __init__(self, tree, pseudo_element=None): - self._tree = tree + self.parsed_tree = tree #: If the selector has a pseudo-element: a string like ``'after'``. #: Otherwise, ``None``. #: Any identifier preceded by ``::`` is accepted as a pseudo-element. @@ -57,7 +57,7 @@ def __repr__(self): else: pseudo_element = '' return '%s[%r%s]' % ( - self.__class__.__name__, self._tree, pseudo_element) + self.__class__.__name__, self.parsed_tree, pseudo_element) def specificity(self): """Return the specificity_ of this selector as a tuple of 3 integers. @@ -65,7 +65,7 @@ def specificity(self): .. _specificity: http://www.w3.org/TR/selectors/#specificity """ - a, b, c = self._tree.specificity() + a, b, c = self.parsed_tree.specificity() if self.pseudo_element: c += 1 return a, b, c diff --git a/cssselect/tests.py b/cssselect/tests.py index 0c3a534..4cb22b3 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -47,7 +47,7 @@ def repr_parse(css): selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [repr(selector._tree).replace("(u'", "('") + return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] def parse_many(first, *others): @@ -132,7 +132,7 @@ def parse_pseudo(css): result = [] for selector in parse(css): result.append(( - repr(selector._tree).replace("(u'", "('"), + repr(selector.parsed_tree).replace("(u'", "('"), selector.pseudo_element)) return result @@ -311,7 +311,10 @@ def xpath(css): assert xpath('*') == "*" assert xpath('e') == "e" + assert xpath('*|e') == "e" + assert xpath('e|f') == "e:f" assert xpath('e[foo]') == "e[@foo]" + assert xpath('e[foo|bar]') == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( "e[@foo and contains(" @@ -383,15 +386,26 @@ def xpath(css): "e/following-sibling::f") assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") - self.assertRaises(ExpressionError, xpath, 'p :only-of-type') + self.assertRaises(ExpressionError, xpath, ':first-of-type') + self.assertRaises(ExpressionError, xpath, ':only-of-type') + self.assertRaises(ExpressionError, xpath, ':last-of-type') + self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)') + self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)') self.assertRaises(ExpressionError, xpath, ':lang(fr)') self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') + self.assertRaises(ExpressionError, xpath, ':after') + self.assertRaises(ExpressionError, xpath, ':lorem-ipsum') + self.assertRaises(ExpressionError, xpath, ':lorem(ipsum)') + self.assertRaises(ExpressionError, xpath, '::lorem-ipsum') + self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) + self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, + 'foo') def test_unicode(self): - if sys.version_info[0] >= 3: - css = '.a\xc1b' - else: + if sys.version_info[0] < 3: css = '.a\xc1b'.decode('ISO-8859-1') + else: + css = '.a\xc1b' xpath = GenericTranslator().css_to_xpath(css) assert css[1:] in xpath @@ -433,6 +447,8 @@ def test_series(self): assert parse_series('+n') == (1, 0) assert parse_series('-n') == (-1, 0) assert parse_series('5') == (0, 5) + self.assertRaises(ValueError, parse_series, 'foo') + self.assertRaises(ValueError, parse_series, 'n+') def test_select(self): document = etree.fromstring(HTML_IDS) @@ -659,7 +675,7 @@ def count(selector): hi there guy - + diff --git a/cssselect/xpath.py b/cssselect/xpath.py index de930d7..0e684d7 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -11,15 +11,16 @@ """ +import sys import re + from cssselect.parser import parse, parse_series, SelectorError -try: +if sys.version_info[0] < 3: _basestring = basestring _unicode = unicode -except NameError: - # Python 3 +else: _basestring = str _unicode = str @@ -36,7 +37,6 @@ def __init__(self, path='', element='*', condition='', star_prefix=False): self.path = path self.element = element self.condition = condition - self.star_prefix = star_prefix def __str__(self): path = _unicode(self.path) + _unicode(self.element) @@ -64,20 +64,15 @@ def add_name_test(self): def add_star_prefix(self): """ - Adds a /* prefix if there is no prefix. This is when you need - to keep context's constrained to a single parent. + Append '*/' to the path to keep the context constrained + to a single parent. """ - if self.path: - self.path += '*/' - else: - self.path = '*/' - self.star_prefix = True + self.path += '*/' def join(self, combiner, other): path = _unicode(self) + combiner - # We don't need a star prefix if we are joining to this other - # prefix; so we'll get rid of it - if not(other.star_prefix and other.path == '*/'): + # Any "star prefix" is redundant when joining. + if other.path != '*/': path += other.path self.path = path self.element = other.element @@ -178,7 +173,10 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - return (prefix or '') + _unicode(self.xpath(selector._tree)) + tree = getattr(selector, 'parsed_tree', None) + if not tree: + raise TypeError('Expected a parsed selector, got %r' % (selector,)) + return (prefix or '') + _unicode(self.xpath(tree)) @staticmethod def xpath_literal(s): @@ -197,9 +195,7 @@ def xpath_literal(s): def xpath(self, parsed_selector): """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, 'xpath_%s' % type_name.lower(), None) - if not method: - raise TypeError('Expected a parsed selector, got %s' % type_name) + method = getattr(self, 'xpath_%s' % type_name.lower()) return method(parsed_selector) @@ -207,10 +203,7 @@ def xpath(self, parsed_selector): def xpath_combinedselector(self, combined): """Translate a combined selector.""" - combinator = self.combinator_mapping.get(combined.combinator) - if not combinator: - raise ExpressionError( - "Unknown combinator: %r" % combined.combinator) + combinator = self.combinator_mapping[combined.combinator] method = getattr(self, 'xpath_%s_combinator' % combinator) return method(self.xpath(combined.selector), self.xpath(combined.subselector)) @@ -246,10 +239,7 @@ def xpath_pseudo(self, pseudo): def xpath_attrib(self, selector): """Translate an attribute selector.""" - operator = self.attribute_operator_mapping.get(selector.operator) - if not operator: - raise ExpressionError( - "Unknown attribute operator: %r" % selector.operator) + operator = self.attribute_operator_mapping[selector.operator] method = getattr(self, 'xpath_attrib_%s' % operator) if self.lower_case_attribute_names: name = selector.attrib.lower() @@ -319,9 +309,6 @@ def xpath_nth_child_function(self, xpath, function, last=False, a, b = parse_series(function.arguments) except ValueError: raise ExpressionError("Invalid series: '%r'" % function.arguments) - if not a and not b and not last: - # a=0 means nothing is returned... - return xpath.add_condition('false() and position() = 0') if add_name_test: xpath.add_name_test() xpath.add_star_prefix() From 444884e71309e0de390ebeac86492dd980bffaba Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 16:15:07 +0200 Subject: [PATCH 013/208] Tag v0.5 --- CHANGES | 2 +- MANIFEST.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 196da78..d4ac88b 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 0.5 ----------- -Not released yet. +Released on 2012-04-20. * Fix case sensitivity issues. * Implement :class:`HTMLTranslator` based on the `HTML5 specification`_ diff --git a/MANIFEST.in b/MANIFEST.in index c8f5dc3..e98d213 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini +include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc recursive-include docs * prune docs/_build From 4667083bc91a3642453e700daf22195d7e508bc1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 17:37:37 +0200 Subject: [PATCH 014/208] Small doc rewrites on selector groups and pseudo-elements. --- cssselect/parser.py | 29 +++++++++++++++++++++++------ docs/index.rst | 5 +++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9139459..83a5258 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -40,15 +40,32 @@ class SelectorSyntaxError(SelectorError, SyntaxError): class Selector(object): """ - Represents a selector with an optional pseudo element. + Represents a parsed selector. + + :meth:`~GenericTranslator.selector_to_xpath` accepts this object, + but ignores :attr:`pseudo_element`. It is the user’s responsibility + to account for pseudo-elements and reject selectors with unknown + or unsupported pseudo-elements. + """ def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree - #: If the selector has a pseudo-element: a string like ``'after'``. - #: Otherwise, ``None``. - #: Any identifier preceded by ``::`` is accepted as a pseudo-element. - #: It is the user’s responsibility to reject selectors with - #: unknown or unsupported pseudo-elements. + #: The identifier for the pseudo-element as a string, or ``None``. + #: + #: +-------------------------+----------------+----------------+ + #: | | Selector | Pseudo-element | + #: +=========================+================+================+ + #: | CSS3 syntax | ``a::before`` | ``'before'`` | + #: +-------------------------+----------------+----------------+ + #: | Older syntax | ``a:before`` | ``'before'`` | + #: +-------------------------+----------------+----------------+ + #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | + #: | not in Selectors3 | | | + #: +-------------------------+----------------+----------------+ + #: | Invalid pseudo-class | ``li:marker`` | ``None`` | + #: +-------------------------+----------------+----------------+ + #: + #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement self.pseudo_element = pseudo_element def __repr__(self): diff --git a/docs/index.rst b/docs/index.rst index 079d2d7..5cf8743 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,8 +44,9 @@ The resulting expression can be used with lxml's `XPath engine`_: User API ======== -In CSS3 terms, a `group of selectors`_ is a sequence of comma-separated -selectors. For example, ``div, h1.title + p`` is a group of 2 selectors. +In CSS3 Selectors terms, the top-level object is a `group of selectors`_, a +sequence of comma-separated selectors. For example, ``div, h1.title + p`` +is a group of two selectors. .. _group of selectors: http://www.w3.org/TR/selectors/#grouping From 7b3903207ad9621d11807a381242279a9a0a6b88 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Apr 2012 18:02:47 +0200 Subject: [PATCH 015/208] Declare all source files as UTF-8 --- cssselect/__init__.py | 1 + cssselect/parser.py | 1 + cssselect/tests.py | 1 + cssselect/xpath.py | 1 + setup.py | 2 ++ 5 files changed, 6 insertions(+) diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 4e044f0..9f17ddb 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,3 +1,4 @@ +# coding: utf8 """ CSS Selectors based on XPath ============================ diff --git a/cssselect/parser.py b/cssselect/parser.py index 83a5258..e38ae07 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,3 +1,4 @@ +# coding: utf8 """ cssselect.parser ================ diff --git a/cssselect/tests.py b/cssselect/tests.py index 4cb22b3..94e8f79 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf8 """ Tests for cssselect =================== diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0e684d7..7a865c3 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,3 +1,4 @@ +# coding: utf8 """ cssselect.xpath =============== diff --git a/setup.py b/setup.py index df95379..af0f004 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +# coding: utf8 + import re import os.path from setuptools import setup From b0cacc2a497b86da80b9533826d9ec809f3d682a Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sat, 21 Apr 2012 10:45:36 +0200 Subject: [PATCH 016/208] fix setup.py to work with plain distutils (makes it work in Py3.3) --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index af0f004..4f9b076 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,12 @@ import re import os.path -from setuptools import setup +try: + from setuptools import setup + extra_kwargs = {'test_suite': 'cssselect.tests'} +except ImportError: + from distutils.core import setup + extra_kwargs = {} ROOT = os.path.dirname(__file__) @@ -24,7 +29,6 @@ url='http://packages.python.org/cssselect/', license='BSD', packages=['cssselect'], - test_suite='cssselect.tests', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -38,4 +42,5 @@ 'Programming Language :: Python :: 3.1', 'Programming Language :: Python :: 3.2', ], + **extra_kwargs ) From 856621a21479fd51daebfce42d5d94ddabef13e3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 24 Apr 2012 17:03:29 +0200 Subject: [PATCH 017/208] Add support for :lang(), close #3 --- CHANGES | 11 +++++++++++ cssselect/__init__.py | 2 +- cssselect/tests.py | 13 ++++++++----- cssselect/xpath.py | 21 ++++++++++++++++----- docs/index.rst | 3 +-- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/CHANGES b/CHANGES index d4ac88b..4df6f02 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,17 @@ Changelog ========= +Version 0.6 +----------- + +* In ``setup.py`` use setuptools/distribute if available, but fall back + on distutils. +* Implement the ``:lang()`` pseudo-class, although it is only based on + ``xml:lang`` or ``lang`` attributes. If the document language is known from + some other meta-data (like a ``Content-Language`` HTTP header or ```` + element), a workaround is to set a lang attribute on the root element. + + Version 0.5 ----------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 9f17ddb..5c70835 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.5' +VERSION = '0.6' __version__ = VERSION diff --git a/cssselect/tests.py b/cssselect/tests.py index 94e8f79..12b43aa 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -18,7 +18,6 @@ """ import sys -import operator import unittest from lxml import etree, html @@ -392,7 +391,6 @@ def xpath(css): self.assertRaises(ExpressionError, xpath, ':last-of-type') self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)') self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)') - self.assertRaises(ExpressionError, xpath, ':lang(fr)') self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') self.assertRaises(ExpressionError, xpath, ':after') self.assertRaises(ExpressionError, xpath, ':lorem-ipsum') @@ -497,8 +495,14 @@ def pcss(main, *selectors, **kwargs): assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ 'foobar-div'] assert pcss('div[foobar~="cd"]') == [] - assert pcss('*[lang|="en"]', '[lang|="en-US"]') == ['second-li'] + assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li'] + # Attribute values are case sensitive + assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] assert pcss('*[lang|="e"]') == [] + # ... :lang() is not. + assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ + 'second-li', 'li-div'] + assert pcss(':lang("e")', html_only=True) == [] assert pcss('li:nth-child(3)') == ['third-li'] assert pcss('li:nth-child(10)') == [] assert pcss('li:nth-child(2n)', 'li:nth-child(even)', @@ -524,7 +528,6 @@ def pcss(main, *selectors, **kwargs): assert pcss('li div:only-child') == ['li-div'] assert pcss('div *:only-child') == ['li-div', 'foobar-span'] self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') - self.assertRaises(ExpressionError, pcss, 'p:lang(fr)') assert pcss('p:only-of-type') == ['paragraph'] assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] assert pcss('li:empty') == [ @@ -661,7 +664,7 @@ def count(selector): link

content
+

Date: Tue, 24 Apr 2012 17:03:53 +0200 Subject: [PATCH 018/208] Tag v0.6 --- CHANGES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 4df6f02..c4d3968 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,8 @@ Changelog Version 0.6 ----------- +Released on 2012-04-24. + * In ``setup.py`` use setuptools/distribute if available, but fall back on distutils. * Implement the ``:lang()`` pseudo-class, although it is only based on From 19443c77dd923f91902fe7a18fe5cea07ce80a43 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 25 Apr 2012 13:14:28 +0200 Subject: [PATCH 019/208] Make sure Selector.pseudo_element is unicode, not a Symbol. --- CHANGES | 9 +++++++++ cssselect/__init__.py | 2 +- cssselect/parser.py | 4 ++-- cssselect/tests.py | 10 ++++++---- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/CHANGES b/CHANGES index c4d3968..c72c2ec 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,15 @@ Changelog ========= +Version 0.6.1 +------------- + +Released on 2012-04-25. + +Make sure that internal token objects do not "leak" into the public API and +:attr:`Selector.pseudo_element` is an unicode string. + + Version 0.6 ----------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 5c70835..08b529e 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.6' +VERSION = '0.6.1' __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index e38ae07..d76742d 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -381,7 +381,7 @@ def parse_simple_selector(stream, inside_negation=False): "Expected ']', got '%s'" % next) elif peek == '::': stream.next() - pseudo_element = stream.next_symbol() + pseudo_element = _unicode(stream.next_symbol()) continue elif peek == ':': stream.next() @@ -389,7 +389,7 @@ def parse_simple_selector(stream, inside_negation=False): if ident in ('first-line', 'first-letter', 'before', 'after'): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. - pseudo_element = ident + pseudo_element = _unicode(ident) continue if stream.peek() != '(': result = Pseudo(result, ident) diff --git a/cssselect/tests.py b/cssselect/tests.py index 12b43aa..fea4b67 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -23,7 +23,7 @@ from lxml import etree, html from cssselect import (parse, GenericTranslator, HTMLTranslator, SelectorSyntaxError, ExpressionError) -from cssselect.parser import tokenize, parse_series +from cssselect.parser import tokenize, parse_series, _unicode class TestCssselect(unittest.TestCase): @@ -131,9 +131,11 @@ def test_pseudo_elements(self): def parse_pseudo(css): result = [] for selector in parse(css): - result.append(( - repr(selector.parsed_tree).replace("(u'", "('"), - selector.pseudo_element)) + pseudo = selector.pseudo_element + # No Symbol here + assert pseudo is None or type(pseudo) is _unicode + selector = repr(selector.parsed_tree).replace("(u'", "('") + result.append((selector, pseudo)) return result def parse_one(css): From 7189f52304f82c10d755b56012a2e61e98e6806b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 7 Jun 2012 17:46:30 +0200 Subject: [PATCH 020/208] Rewrite the tokenizer to conform to the spec grammar. --- cssselect/parser.py | 484 +++++++++++++++++++++++--------------------- cssselect/tests.py | 157 +++++++------- cssselect/xpath.py | 40 ++-- 3 files changed, 355 insertions(+), 326 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d76742d..9ccd66f 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -14,6 +14,7 @@ import sys import re +import operator if sys.version_info[0] < 3: @@ -118,7 +119,11 @@ def __init__(self, selector, name, arguments): def __repr__(self): return '%s[%r:%s(%r)]' % ( - self.__class__.__name__, self.selector, self.name, self.arguments) + self.__class__.__name__, self.selector, self.name, + [token.value for token in self.arguments]) + + def argument_types(self): + return [token.type for token in self.arguments] def specificity(self): a, b, c = self.selector.specificity() @@ -174,10 +179,10 @@ def __init__(self, selector, namespace, attrib, operator, value): self.value = value def __repr__(self): - if self.namespace == '*': - attrib = self.attrib - else: + if self.namespace: attrib = '%s|%s' % (self.namespace, self.attrib) + else: + attrib = self.attrib if self.operator == 'exists': return '%s[%r[%s]]' % ( self.__class__.__name__, self.selector, attrib) @@ -195,24 +200,25 @@ def specificity(self): class Element(object): """ Represents namespace|element + + `None` is for the universal selector '*' + """ - def __init__(self, namespace, element): + def __init__(self, namespace=None, element=None): self.namespace = namespace self.element = element def __repr__(self): - if self.namespace == '*': - element = self.element - else: - element = '%s|%s' % (self.namespace, self.element) - return '%s[%s]' % ( - self.__class__.__name__, element) + element = self.element or '*' + if self.namespace: + element = '%s|%s' % (self.namespace, element) + return '%s[%s]' % (self.__class__.__name__, element) def specificity(self): - if self.element == '*': - return 0, 0, 0 - else: + if self.element: return 0, 0, 1 + else: + return 0, 0, 0 class Hash(object): @@ -256,9 +262,15 @@ def specificity(self): #### Parser -_el_re = re.compile(r'^\s*(\w+)$') -_id_re = re.compile(r'^\s*(\w*)#(\w+)\s*$') -_class_re = re.compile(r'^\s*(\w*)\.(\w+)\s*$') +# foo +_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$') + +# foo#bar or #bar +_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$') + +# foo.bar or .bar +_class_re = re.compile( + r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$') def parse(css): @@ -279,36 +291,35 @@ def parse(css): # Fast path for simple cases match = _el_re.match(css) if match: - return [Selector(Element('*', match.group(1)))] + return [Selector(Element(element=match.group(1)))] match = _id_re.match(css) if match is not None: - return [Selector(Hash(Element( - '*', match.group(1) or '*'), match.group(2)))] + return [Selector(Hash(Element(element=match.group(1) or None), + match.group(2)))] match = _class_re.match(css) if match is not None: - return [Selector(Class(Element( - '*', match.group(1) or '*'), match.group(2)))] + return [Selector(Class(Element(element=match.group(1) or None), + match.group(2)))] stream = TokenStream(tokenize(css)) stream.source = css - try: - return list(parse_selector_group(stream)) - except SelectorSyntaxError: - e = sys.exc_info()[1] - message = "%s at %s -> %r" % ( - e, stream.used, stream.peek()) - e.msg = message - if sys.version_info < (2,6): - e.message = message - e.args = tuple([message]) - raise + return list(parse_selector_group(stream)) +# except SelectorSyntaxError: +# e = sys.exc_info()[1] +# message = "%s at %s -> %r" % ( +# e, stream.used, stream.peek()) +# e.msg = message +# if sys.version_info < (2,6): +# e.message = message +# e.args = tuple([message]) +# raise def parse_selector_group(stream): stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) - if stream.peek() == ',': + if stream.peek() == ('DELIM', ','): stream.next() stream.skip_whitespace() else: @@ -319,14 +330,15 @@ def parse_selector(stream): while 1: stream.skip_whitespace() peek = stream.peek() - if peek == ',' or peek is None: + if peek in (('EOF', None), ('DELIM', ',')): break if pseudo_element: raise SelectorSyntaxError( - 'A pseudo-element must be at the end of a selector') - if peek in ('+', '>', '~'): + 'Got pseudo-element ::%s not at the end of a selector' + % pseudo_element) + if peek.is_delim('+', '>', '~'): # A combinator - combinator = stream.next() + combinator = stream.next().value stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -339,59 +351,54 @@ def parse_selector(stream): def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() + selector_start = len(stream.used) peek = stream.peek() - consumed = len(stream.used) - if peek == '*' or isinstance(peek, Symbol): - next = stream.next() - if stream.peek() == '|': - namespace = next + if peek.type == 'IDENT' or peek == ('DELIM', '*'): + if peek.type == 'IDENT': + namespace = stream.next().value + else: stream.next() - element = stream.next_symbol_or_star() + namespace = None + if stream.peek() == ('DELIM', '|'): + stream.next() + element = stream.next_ident_or_star() else: - namespace = '*' - element = next + element = namespace + namespace = None else: - element = namespace = '*' + element = namespace = None result = Element(namespace, element) pseudo_element = None while 1: peek = stream.peek() - if peek in (None, ' ', ',', '+', '>', '~') or ( - inside_negation and peek == ')'): + if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or ( + inside_negation and peek == ('DELIM', ')')): break if pseudo_element: raise SelectorSyntaxError( - 'A pseudo-element must be at the end of a selector') - if peek == '#': - stream.next() - result = Hash(result, stream.next_symbol()) - continue - elif peek == '.': + 'Got pseudo-element ::%s not at the end of a selector' + % pseudo_element) + if peek.type == 'HASH': + result = Hash(result, stream.next().value) + elif peek == ('DELIM', '.'): stream.next() - result = Class(result, stream.next_symbol()) - continue - elif peek == '[': + result = Class(result, stream.next_ident()) + elif peek == ('DELIM', '['): stream.next() result = parse_attrib(result, stream) - next = stream.next() - if next == ']': - continue - else: - raise SelectorSyntaxError( - "Expected ']', got '%s'" % next) - elif peek == '::': + elif peek == ('DELIM', ':'): stream.next() - pseudo_element = _unicode(stream.next_symbol()) - continue - elif peek == ':': - stream.next() - ident = stream.next_symbol() + if stream.peek() == ('DELIM', ':'): + stream.next() + pseudo_element = stream.next_ident() + continue + ident = stream.next_ident() if ident in ('first-line', 'first-letter', 'before', 'after'): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. pseudo_element = _unicode(ident) continue - if stream.peek() != '(': + if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) continue stream.next() @@ -401,60 +408,90 @@ def parse_simple_selector(stream, inside_negation=False): raise SelectorSyntaxError('Got nested :not()') argument, argument_pseudo_element = parse_simple_selector( stream, inside_negation=True) + next = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - 'Pseudo-elements are not allowed inside :not()') + 'Got pseudo-element ::%s inside :not() at %s' + % (argument_pseudo_element, next.pos)) + if next != ('DELIM', ')'): + raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) else: - peek = stream.peek() - if isinstance(peek, (Symbol, String)): - argument = stream.next() - else: + arguments = [] + while 1: + stream.skip_whitespace() + next = stream.next() + if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ + ('DELIM', '+'), ('DELIM', '-')]: + arguments.append(next) + elif next == ('DELIM', ')'): + break + else: + raise SelectorSyntaxError( + "Expected an argument, got %s" % (next,)) + if not arguments: raise SelectorSyntaxError( - "Expected argument, got '%s'" % peek) - result = Function(result, ident, argument) - stream.skip_whitespace() - next = stream.next() - if next == ')': - continue - else: - raise SelectorSyntaxError( - "Expected ')', got '%s'" % next) + "Expected at least one argument, got %s" % (next,)) + result = Function(result, ident, arguments) else: raise SelectorSyntaxError( - "Expected selector, got '%s'" % peek) - if consumed == len(stream.used): + "Expected selector, got %s" % (peek,)) + if len(stream.used) == selector_start: raise SelectorSyntaxError( - "Expected selector, got '%s'" % stream.peek()) + "Expected selector, got %s" % (stream.peek(),)) return result, pseudo_element def parse_attrib(selector, stream): stream.skip_whitespace() - attrib = stream.next_symbol_or_star() - if attrib == '*' and stream.peek() != '|': + attrib = stream.next_ident_or_star() + if attrib is None and stream.peek() != ('DELIM', '|'): raise SelectorSyntaxError( - "Expected '|', got '%s'" % stream.peek()) - if stream.peek() == '|': - namespace = attrib + "Expected '|', got %s" % (stream.peek(),)) + if stream.peek() == ('DELIM', '|'): stream.next() - attrib = stream.next_symbol() + if stream.peek() == ('DELIM', '='): + namespace = None + stream.next() + op = '|=' + else: + namespace = attrib + attrib = stream.next_ident() + op = None else: - namespace = '*' - stream.skip_whitespace() - if stream.peek() == ']': - return Attrib(selector, namespace, attrib, 'exists', None) - op = stream.next() - if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='): - raise SelectorSyntaxError( - "Operator expected, got '%s'" % op) + namespace = op = None + if op is None: + stream.skip_whitespace() + next = stream.next() + if next == ('DELIM', ']'): + return Attrib(selector, namespace, attrib, 'exists', None) + elif next == ('DELIM', '='): + op = '=' + elif next.is_delim('^', '$', '*', '~', '|', '!') and ( + stream.peek() == ('DELIM', '=')): + op = next.value + '=' + stream.next() + else: + raise SelectorSyntaxError( + "Operator expected, got %s" % (next,)) stream.skip_whitespace() value = stream.next() - if not isinstance(value, (Symbol, String)): + if value.type not in ('IDENT', 'STRING'): raise SelectorSyntaxError( - "Expected string or symbol, got '%s'" % value) + "Expected string or ident, got %s" % (value,)) stream.skip_whitespace() - return Attrib(selector, namespace, attrib, op, value) + next = stream.next() + if next != ('DELIM', ']'): + raise SelectorSyntaxError( + "Expected ']', got %s" % (next,)) + return Attrib(selector, namespace, attrib, op, value.value) + + +def parse_series_from_tokens(tokens): + for token in tokens: + if token.type == 'STRING': + raise ValueError('String tokens not allowed in series.') + return parse_series(''.join(token.value for token in tokens)) def parse_series(s): @@ -465,6 +502,7 @@ def parse_series(s): :returns: :``(a, b)`` """ + s = s.strip() if s == 'odd': return (2, 1) elif s == 'even': @@ -490,139 +528,136 @@ def parse_series(s): #### Token objects -class _UniToken(_unicode): - def __new__(cls, contents, pos): - obj = _unicode.__new__(cls, contents) +class Token(tuple): + def __new__(cls, type_, value, pos): + obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj def __repr__(self): - return '%s(%s, %r)' % ( - self.__class__.__name__, - _unicode.__repr__(self), - self.pos) + return '<%s %r at %i>' % (self.type, self.value, self.pos) -class Symbol(_UniToken): - pass + def is_delim(self, *values): + return self.type == 'DELIM' and self.value in values -class String(_UniToken): - pass + type = property(operator.itemgetter(0)) + value = property(operator.itemgetter(1)) -class Token(_UniToken): - pass + +class EOFToken(Token): + def __new__(cls, pos): + return Token.__new__(cls, 'EOF', None, pos) + + def __repr__(self): + return '<%s at %i>' % (self.type, self.pos) #### Tokenizer -_match_whitespace = re.compile(r'\s+', re.UNICODE).match -_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub +class TokenMacros: + unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?' + escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]' + string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape + nonascii = r'[^\0-\177]' + nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii) + nmstart = '[_a-z]|%s|%s' % (escape, nonascii) + +def _compile(pattern): + return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match + +_match_whitespace = _compile(r'[ \t\r\n\f]+') +_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') +_match_hash = _compile('#(?:%(nmchar)s)+') +_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') +_match_string_by_quote = { + "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), + '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), +} + +_sub_simple_escape = re.compile(r'\\(.)').sub +_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub +_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub + +# Same as r'\1', but faster on CPython +_replace_simple = operator.methodcaller('group', 1) + +def _replace_unicode(match): + codepoint = int(match.group(1), 16) + if codepoint > sys.maxunicode: + codepoint = 0xFFFD + return _unichr(codepoint) + + +def unescape_ident(value): + value = _sub_unicode_escape(_replace_unicode, value) + value = _sub_simple_escape(_replace_simple, value) + return value -_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match def tokenize(s): pos = 0 - s = _replace_comments('', s) len_s = len(s) while pos < len_s: match = _match_whitespace(s, pos=pos) if match: - yield Token(' ', pos) + yield Token('S', ' ', pos) pos = match.end() continue - match = _match_count_number(s, pos=pos) - if match and match.group() != 'n': - sym = s[pos:match.end()] - yield Symbol(sym, pos) + + match = _match_ident(s, pos=pos) + if match: + value = _sub_simple_escape(_replace_simple, + _sub_unicode_escape(_replace_unicode, match.group())) + yield Token('IDENT', value, pos) pos = match.end() continue - c = s[pos] - c2 = s[pos:pos+2] - if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='): - yield Token(c2, pos) - pos += 2 - continue - if c in '>+~,.*=[]()|:#': - yield Token(c, pos) - pos += 1 - continue - if c == '"' or c == "'": - # Quoted string - old_pos = pos - sym, pos = tokenize_escaped_string(s, pos) - yield String(sym, old_pos) - continue - old_pos = pos - sym, pos = tokenize_symbol(s, pos) - yield Symbol(sym, old_pos) - continue -split_at_string_escapes = re.compile(r'(\\(?:%s))' - % '|'.join(['[A-Fa-f0-9]{1,6}(?:\r\n|\s)?', - '[^A-Fa-f0-9]'])).split + match = _match_hash(s, pos=pos) + if match: + value = _sub_simple_escape(_replace_simple, + _sub_unicode_escape(_replace_unicode, match.group()[1:])) + yield Token('HASH', value, pos) + pos = match.end() + continue + quote = s[pos] + if quote in _match_string_by_quote: + match = _match_string_by_quote[quote](s, pos=pos + 1) + assert match, 'Should have found at least an empty match' + end_pos = match.end() + if end_pos == len_s: + raise SelectorSyntaxError('Unclosed string at %s' % pos) + if s[end_pos] != quote: + raise SelectorSyntaxError('Invalid string at %s' % next_pos) + value = _sub_simple_escape(_replace_simple, + _sub_unicode_escape(_replace_unicode, + _sub_newline_escape('', match.group()))) + yield Token('STRING', value, pos) + pos = end_pos + 1 + continue -def unescape_string_literal(literal): - substrings = [] - for substring in split_at_string_escapes(literal): - if not substring: + match = _match_number(s, pos=pos) + if match: + value = match.group() + yield Token('NUMBER', value, pos) + pos = match.end() continue - elif '\\' in substring: - if substring[0] == '\\' and len(substring) > 1: - substring = substring[1:] - if substring[0] in '0123456789ABCDEFabcdef': - # int() correctly ignores the potentially trailing whitespace - substring = _unichr(int(substring, 16)) + + pos2 = pos + 2 + if s[pos:pos2] == '/*': + pos = s.find('*/', pos2) + if pos == -1: + pos = len_s else: - raise SelectorSyntaxError( - "Invalid escape sequence %r in string %r" - % (substring.split('\\')[1], literal)) - substrings.append(substring) - return ''.join(substrings) - - -def tokenize_escaped_string(s, pos): - quote = s[pos] - assert quote in ('"', "'") - pos = pos+1 - start = pos - while 1: - next = s.find(quote, pos) - if next == -1: - raise SelectorSyntaxError( - "Expected closing %s for string in: %r" - % (quote, s[start:])) - result = s[start:next] - if result.endswith('\\'): - # next quote character is escaped - pos = next+1 + pos += 2 continue - if '\\' in result: - result = unescape_string_literal(result) - return result, next+1 - -_illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE) + yield Token('DELIM', s[pos], pos) + pos += 1 -def tokenize_symbol(s, pos): - start = pos - match = _illegal_symbol.search(s, pos=pos) - if match: - if match.start() == pos: - raise SelectorSyntaxError( - "Unexpected symbol: %r" % s[pos]) - result = s[start:match.start()] - pos = match.start() - else: - result = s[start:] - pos = len(s) - try: - result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape') - except UnicodeDecodeError: - e = sys.exc_info()[1] - raise SelectorSyntaxError( - "Bad symbol %r: %s" % (result, e)) - return result, pos + assert pos == len_s + yield EOFToken(pos) class TokenStream(object): @@ -644,36 +679,33 @@ def next(self): self.used.append(self.peeked) return self.peeked else: - try: - next = self.next_token() - self.used.append(next) - return next - except StopIteration: - return None + next = self.next_token() + self.used.append(next) + return next def peek(self): if not self._peeking: - try: - self.peeked = self.next_token() - except StopIteration: - return None + self.peeked = self.next_token() self._peeking = True return self.peeked - def next_symbol(self): + def next_ident(self): next = self.next() - if not isinstance(next, Symbol): - raise SelectorSyntaxError( - "Expected symbol, got '%s'" % next) - return next + if next.type != 'IDENT': + raise SelectorSyntaxError('Expected ident, got %s' % (next,)) + return next.value - def next_symbol_or_star(self): + def next_ident_or_star(self): next = self.next() - if next != '*' and not isinstance(next, Symbol): + if next.type == 'IDENT': + return next.value + elif next == ('DELIM', '*'): + return None + else: raise SelectorSyntaxError( - "Expected symbol or '*', got '%s'" % next) - return next + "Expected ident or '*', got %s" % (next,)) def skip_whitespace(self): - if self.peek() == ' ': + peek = self.peek() + if peek.type == 'S': self.next() diff --git a/cssselect/tests.py b/cssselect/tests.py index fea4b67..ff0effa 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -26,21 +26,42 @@ from cssselect.parser import tokenize, parse_series, _unicode +if sys.version_info[0] < 3: + # Python 2 + def u(text): + return text.decode('utf8') +else: + # Python 3 + def u(text): + return text + + class TestCssselect(unittest.TestCase): def test_tokenizer(self): - tokens = [repr(item).replace("u'", "'") - for item in tokenize('E > f[a~="y\\"x"]')] + tokens = [ + repr(item).replace("u'", "'") # Py 2/3 + for item in tokenize( + u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] assert tokens == [ - "Symbol('E', 0)", - "Token(' ', 1)", - "Token('>', 2)", - "Token(' ', 3)", - "Symbol('f', 4)", - "Token('[', 5)", - "Symbol('a', 6)", - "Token('~=', 7)", - "String('y\"x', 9)", - "Token(']', 15)"] + "", + "", + "' at 5>", + "", + # the no-break space is not whitespace in CSS + r"", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ] def test_parser(self): def repr_parse(css): @@ -98,28 +119,26 @@ def parse_many(first, *others): 'Attrib[Element[a][name]]'] assert parse_many('a [name]') == [ 'CombinedSelector[Element[a] Attrib[Element[*][name]]]'] - assert parse_many('a[rel="include"]') == [ - "Attrib[Element[a][rel = String('include', 6)]]"] - assert parse_many('a[rel = include]') == [ - "Attrib[Element[a][rel = Symbol('include', 8)]]"] - assert parse_many("a[hreflang |= 'en']") == [ - "Attrib[Element[a][hreflang |= String('en', 14)]]"] + assert parse_many('a[rel="include"]', 'a[rel = include]') == [ + "Attrib[Element[a][rel = 'include']]"] + assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ + "Attrib[Element[a][hreflang |= 'en']]"] assert parse_many('div:nth-child(10)') == [ - "Function[Element[div]:nth-child(Symbol('10', 14))]"] + "Function[Element[div]:nth-child(['10'])]"] assert parse_many(':nth-child(2n+2)') == [ - "Function[Element[*]:nth-child(Symbol('2n+2', 11))]"] + "Function[Element[*]:nth-child(['2', 'n', '+2'])]"] assert parse_many('div:nth-of-type(10)') == [ - "Function[Element[div]:nth-of-type(Symbol('10', 16))]"] + "Function[Element[div]:nth-of-type(['10'])]"] assert parse_many('div div:nth-of-type(10) .aclass') == [ 'CombinedSelector[CombinedSelector[Element[div] ' - "Function[Element[div]:nth-of-type(Symbol('10', 20))]] " + "Function[Element[div]:nth-of-type(['10'])]] " ' Class[Element[*].aclass]]'] assert parse_many('label:only') == [ 'Pseudo[Element[label]:only]'] assert parse_many('a:lang(fr)') == [ - "Function[Element[a]:lang(Symbol('fr', 7))]"] + "Function[Element[a]:lang(['fr'])]"] assert parse_many('div:contains("foo")') == [ - "Function[Element[div]:contains(String('foo', 13))]"] + "Function[Element[div]:contains(['foo'])]"] assert parse_many('div#foobar') == [ 'Hash[Element[div]#foobar]'] assert parse_many('div:not(div.foo)') == [ @@ -213,99 +232,65 @@ def get_error(css): return str(sys.exc_info()[1]).replace("(u'", "('") assert get_error('attributes(href)/html/body/a') == ( - "Expected selector, got '(' at " - "[Symbol('attributes', 0)] -> Token('(', 10)") + "Expected selector, got ") assert get_error('attributes(href)') == ( - "Expected selector, got '(' at " - "[Symbol('attributes', 0)] -> Token('(', 10)") + "Expected selector, got ") assert get_error('html/body/a') == ( - "Unexpected symbol: '/' at [Symbol('html', 0)] -> None") + "Expected selector, got ") assert get_error(' ') == ( - "Expected selector, got 'None' at [Token(' ', 0)] -> None") + "Expected selector, got ") assert get_error('div, ') == ( - "Expected selector, got 'None' at " - "[Symbol('div', 0), Token(',', 3), Token(' ', 4)] -> None") + "Expected selector, got ") assert get_error(' , div') == ( - "Expected selector, got ',' at " - "[Token(' ', 0)] -> Token(',', 1)") + "Expected selector, got ") assert get_error('p, , div') == ( - "Expected selector, got ',' at " - "[Symbol('p', 0), Token(',', 1), Token(' ', 2)] -> Token(',', 3)") + "Expected selector, got ") assert get_error('div > ') == ( - "Expected selector, got 'None' at " - "[Symbol('div', 0), Token(' ', 3), Token('>', 4), Token(' ', 5)]" - " -> None") + "Expected selector, got ") assert get_error(' > div') == ( - "Expected selector, got '>' at [Token(' ', 0)] -> Token('>', 2)") + "Expected selector, got ' at 2>") assert get_error('foo|#bar') == ( - "Expected symbol or '*', got '#' at " - "[Symbol('foo', 0), Token('|', 3), " - "Token('#', 4)] -> Symbol('bar', 5)") + "Expected ident or '*', got ") assert get_error('#.foo') == ( - "Expected symbol, got '.' at " - "[Token('#', 0), Token('.', 1)] -> Symbol('foo', 2)") + "Expected selector, got ") assert get_error('.#foo') == ( - "Expected symbol, got '#' at " - "[Token('.', 0), Token('#', 1)] -> Symbol('foo', 2)") + "Expected ident, got ") assert get_error(':#foo') == ( - "Expected symbol, got '#' at " - "[Token(':', 0), Token('#', 1)] -> Symbol('foo', 2)") + "Expected ident, got ") assert get_error('[*]') == ( - "Expected '|', got ']' at " - "[Token('[', 0), Token('*', 1)] -> Token(']', 2)") + "Expected '|', got ") assert get_error('[foo|]') == ( - "Expected symbol, got ']' at " - "[Token('[', 0), Symbol('foo', 1), Token('|', 4), Token(']', 5)]" - " -> None") + "Expected ident, got ") assert get_error('[#]') == ( - "Expected symbol or '*', got '#' at " - "[Token('[', 0), Token('#', 1)] -> Token(']', 2)") + "Expected ident or '*', got ") assert get_error('[foo=#]') == ( - "Expected string or symbol, got '#' at " - "[Token('[', 0), Symbol('foo', 1), Token('=', 4), Token('#', 5)]" - " -> Token(']', 6)") + "Expected string or ident, got ") assert get_error(':nth-child()') == ( - "Expected argument, got ')' at " - "[Token(':', 0), Symbol('nth-child', 1), Token('(', 10)]" - " -> Token(')', 11)") + "Expected at least one argument, got ") assert get_error('[href]a') == ( - "Expected selector, got 'a' at " - "[Token('[', 0), Symbol('href', 1), Token(']', 5)]" - " -> Symbol('a', 6)") + "Expected selector, got ") assert get_error('[rel=stylesheet]') == None assert get_error('[rel:stylesheet]') == ( - "Operator expected, got ':' at [Token('[', 0), Symbol('rel', 1), " - "Token(':', 4)] -> Symbol('stylesheet', 5)") + "Operator expected, got ") assert get_error('[rel=stylesheet') == ( - "Expected ']', got 'None' at [Token('[', 0), Symbol('rel', 1), " - "Token('=', 4), Symbol('stylesheet', 5)] -> None") + "Expected ']', got ") assert get_error(':lang(fr)') == None assert get_error(':lang(fr') == ( - "Expected ')', got 'None' at [Token(':', 0), Symbol('lang', 1), " - "Token('(', 5), Symbol('fr', 6)] -> None") + "Expected an argument, got ") assert get_error(':contains("foo') == ( - "Expected closing \" for string in: 'foo' at " - "[Token(':', 0), Symbol('contains', 1), Token('(', 9)] -> None") + "Unclosed string at 10") assert get_error('foo!') == ( - "Unexpected symbol: '!' at [Symbol('foo', 0)] -> None") + "Expected selector, got ") # Mis-placed pseudo-elements assert get_error('a:before:empty') == ( - "A pseudo-element must be at the end of a selector at " - "[Symbol('a', 0), Token(':', 1), Symbol('before', 2)] " - "-> Token(':', 8)") + "Got pseudo-element ::before not at the end of a selector") assert get_error('li:before a') == ( - "A pseudo-element must be at the end of a selector at " - "[Symbol('li', 0), Token(':', 2), Symbol('before', 3), " - "Token(' ', 9)] -> Symbol('a', 10)") + "Got pseudo-element ::before not at the end of a selector") assert get_error(':not(:before)') == ( - "Pseudo-elements are not allowed inside :not() at " - "[Token(':', 0), Symbol('not', 1), Token('(', 4), Token(':', 5)," - " Symbol('before', 6)] -> Token(')', 12)") + "Got pseudo-element ::before inside :not() at 12") assert get_error(':not(:not(a))') == ( - "Got nested :not() at [Token(':', 0), Symbol('not', 1), " - "Token('(', 4), Token(':', 5), Symbol('not', 6), Token('(', 9)]" - " -> Symbol('a', 10)") + "Got nested :not()") def test_translation(self): def xpath(css): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 53ba40b..2c11fc5 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -15,7 +15,7 @@ import sys import re -from cssselect.parser import parse, parse_series, SelectorError +from cssselect.parser import parse, parse_series_from_tokens, SelectorError if sys.version_info[0] < 3: @@ -250,10 +250,10 @@ def xpath_attrib(self, selector): name = selector.attrib.lower() else: name = selector.attrib - if selector.namespace == '*': - name = '@' + name - else: + if selector.namespace: name = '@%s:%s' % (selector.namespace, name) + else: + name = '@' + name if self.lower_case_attribute_values: value = selector.value.lower() else: @@ -274,11 +274,12 @@ def xpath_hash(self, id_selector): def xpath_element(self, selector): """Translate a type or universal selector.""" - if self.lower_case_element_names: - element = selector.element.lower() - else: - element = selector.element - if selector.namespace != '*': + element = selector.element + if not element: + element = '*' + elif self.lower_case_element_names: + element = element.lower() + if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes element = '%s:%s' % (selector.namespace, element) @@ -311,7 +312,7 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): try: - a, b = parse_series(function.arguments) + a, b = parse_series_from_tokens(function.arguments) except ValueError: raise ExpressionError("Invalid series: '%r'" % function.arguments) if add_name_test: @@ -367,18 +368,29 @@ def xpath_nth_last_of_type_function(self, xpath, function): add_name_test=False) def xpath_contains_function(self, xpath, function): - return xpath.add_condition('contains(string(.), %s)' - % self.xpath_literal(function.arguments)) + # Defined there, removed in later drafts: + # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors + if function.argument_types() not in (['STRING'], ['IDENT']): + raise ExpressionError( + "Expected a single string or ident for :contains(), got %r" + % function.arguments) + value = function.arguments[0].value + return xpath.add_condition( + 'contains(string(.), %s)' % self.xpath_literal(value)) def xpath_lang_function(self, xpath, function): + if function.argument_types() not in (['STRING'], ['IDENT']): + raise ExpressionError( + "Expected a single string or ident for :lang(), got %r" + % function.arguments) + value = function.arguments[0].value return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal( - function.arguments.lower() + '-'))) + % (self.lang_attribute, self.xpath_literal(value.lower() + '-'))) # Pseudo: dispatch by pseudo-class name From c221b7bdc5328368279732ad5e82736639a1c066 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 10:37:54 +0200 Subject: [PATCH 021/208] Workaround element/attribute names with special characters For element names, these are equivalent in XPath: foo *[name() = "foo"] And for attribute names: @foo attribute:*[name() = "foo"] The former is faster but some characters are not allowed in it. Since I am not sure which characters, only use it for "safe" names that match ^[a-zA-Z_][a-zA-Z0-9_.-]*$ This is overly restrictive, but should cover every name actually used in XML, HTML, SVG, etc. --- cssselect/tests.py | 14 ++++++++++++++ cssselect/xpath.py | 32 +++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index ff0effa..ab3c558 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -373,6 +373,17 @@ def xpath(css): "e/following-sibling::f") assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") + + # Invalid characters in XPath element names + assert xpath(r'di\a0 v') == ( + "*[name() = 'di\xa0v']") + assert xpath(r'di\[v') == ( + "*[name() = 'di[v']") + assert xpath(r'[h\a0 ref]') == ( + "*[attribute::*[name() = 'h\xa0ref']]") + assert xpath(r'[h\]ref]') == ( + "*[attribute::*[name() = 'h]ref']]") + self.assertRaises(ExpressionError, xpath, ':first-of-type') self.assertRaises(ExpressionError, xpath, ':only-of-type') self.assertRaises(ExpressionError, xpath, ':last-of-type') @@ -551,6 +562,9 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + # Invalid characters in XPath element names, should not crash + assert pcss(r'di\a0 v', r'div\[') == [] + assert pcss(r'[h\a0 ref]', r'[h\]ref]') == [] # HTML-specific assert pcss(':link', html_only=True) == [ diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 2c11fc5..d479510 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -83,6 +83,11 @@ def join(self, combiner, other): split_at_single_quotes = re.compile("('+)").split +# The spec is actually more permissive than that, but don’t bother. +# This is just for the fast path. +# http://www.w3.org/TR/REC-xml/#NT-NameStartChar +is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match + #### Translation @@ -181,7 +186,9 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'): tree = getattr(selector, 'parsed_tree', None) if not tree: raise TypeError('Expected a parsed selector, got %r' % (selector,)) - return (prefix or '') + _unicode(self.xpath(tree)) + xpath = self.xpath(tree) + assert isinstance(xpath, XPathExpr) # help debug a missing 'return' + return (prefix or '') + _unicode(xpath) @staticmethod def xpath_literal(s): @@ -250,15 +257,19 @@ def xpath_attrib(self, selector): name = selector.attrib.lower() else: name = selector.attrib + safe = is_safe_name(name) if selector.namespace: - name = '@%s:%s' % (selector.namespace, name) + name = '%s:%s' % (selector.namespace, name) + safe = safe and is_safe_name(selector.namespace) + if safe: + attrib = '@' + name else: - name = '@' + name + attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name) if self.lower_case_attribute_values: value = selector.value.lower() else: value = selector.value - return method(self.xpath(selector.selector), name, value) + return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector): """Translate a class selector.""" @@ -277,13 +288,20 @@ def xpath_element(self, selector): element = selector.element if not element: element = '*' - elif self.lower_case_element_names: - element = element.lower() + safe = True + else: + safe = is_safe_name(element) + if self.lower_case_element_names: + element = element.lower() if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes element = '%s:%s' % (selector.namespace, element) - return XPathExpr(element=element) + safe = safe and is_safe_name(selector.namespace) + xpath = XPathExpr(element=element) + if not safe: + xpath.add_name_test() + return xpath # CombinedSelector: dispatch by combinator From d405f8930b76d39e8f70f6394575f889343b5477 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 12:22:10 +0200 Subject: [PATCH 022/208] Add tests for series with whitespace Together with the previous 2 commits, this fixes #2 and #7 --- cssselect/parser.py | 18 +++++++----------- cssselect/tests.py | 37 ++++++++++++++++++++++++++----------- cssselect/xpath.py | 4 ++-- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9ccd66f..a7d9889 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -487,22 +487,18 @@ def parse_attrib(selector, stream): return Attrib(selector, namespace, attrib, op, value.value) -def parse_series_from_tokens(tokens): - for token in tokens: - if token.type == 'STRING': - raise ValueError('String tokens not allowed in series.') - return parse_series(''.join(token.value for token in tokens)) - - -def parse_series(s): +def parse_series(tokens): """ - Parses things like '1n+2', or 'an+b' generally + Parses the arguments for :nth-child() and friends. - :raises: :class:`ValueError` + :raises: A list of tokens :returns: :``(a, b)`` """ - s = s.strip() + for token in tokens: + if token.type == 'STRING': + raise ValueError('String tokens not allowed in series.') + s = ''.join(token.value for token in tokens).strip() if s == 'odd': return (2, 1) elif s == 'even': diff --git a/cssselect/tests.py b/cssselect/tests.py index ab3c558..796537b 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -435,17 +435,32 @@ def test_unicode_escapes(self): '''descendant-or-self::*[@aval = "' '"]''') def test_series(self): - assert parse_series('1n+3') == (1, 3) - assert parse_series('n-5') == (1, -5) - assert parse_series('odd') == (2, 1) - assert parse_series('even') == (2, 0) - assert parse_series('3n') == (3, 0) - assert parse_series('n') == (1, 0) - assert parse_series('+n') == (1, 0) - assert parse_series('-n') == (-1, 0) - assert parse_series('5') == (0, 5) - self.assertRaises(ValueError, parse_series, 'foo') - self.assertRaises(ValueError, parse_series, 'n+') + def series(css): + selector, = parse(':nth-child(%s)' % css) + args = selector.parsed_tree.arguments + try: + return parse_series(args) + except ValueError: + return None + + assert series('1n+3') == (1, 3) + assert series('1n +3') == (1, 3) + assert series('1n + 3') == (1, 3) + assert series('1n+ 3') == (1, 3) + assert series('1n-3') == (1, -3) + assert series('1n -3') == (1, -3) + assert series('1n - 3') == (1, -3) + assert series('1n- 3') == (1, -3) + assert series('n-5') == (1, -5) + assert series('odd') == (2, 1) + assert series('even') == (2, 0) + assert series('3n') == (3, 0) + assert series('n') == (1, 0) + assert series('+n') == (1, 0) + assert series('-n') == (-1, 0) + assert series('5') == (0, 5) + assert series('foo') == None + assert series('n+') == None def test_select(self): document = etree.fromstring(HTML_IDS) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index d479510..bc42077 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -15,7 +15,7 @@ import sys import re -from cssselect.parser import parse, parse_series_from_tokens, SelectorError +from cssselect.parser import parse, parse_series, SelectorError if sys.version_info[0] < 3: @@ -330,7 +330,7 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): try: - a, b = parse_series_from_tokens(function.arguments) + a, b = parse_series(function.arguments) except ValueError: raise ExpressionError("Invalid series: '%r'" % function.arguments) if add_name_test: From e48ecc642d48cf545a7169ec20b211cdc105694e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 16:52:19 +0200 Subject: [PATCH 023/208] Fix #10: '~=', '^=' and '*=' attribute operators with an empty string --- cssselect/tests.py | 5 +++++ cssselect/xpath.py | 42 ++++++++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 796537b..42312f9 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -502,11 +502,16 @@ def pcss(main, *selectors, **kwargs): assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor'] assert pcss('a[rel="tag"]') == ['tag-anchor'] assert pcss('a[href*="localhost"]') == ['tag-anchor'] + assert pcss('a[href*=""]') == [] assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor'] assert pcss('a[href^="http:"]') == ['tag-anchor'] + assert pcss('a[href^=""]') == [] assert pcss('a[href$="org"]') == ['nofollow-anchor'] + assert pcss('a[href$=""]') == [] assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ 'foobar-div'] + assert pcss('[foobar~="ab bc"]', + '[foobar~=""]', '[foobar~=" \t"]') == [] assert pcss('div[foobar~="cd"]') == [] assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li'] # Attribute values are case sensitive diff --git a/cssselect/xpath.py b/cssselect/xpath.py index bc42077..efdc1e1 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -88,6 +88,9 @@ def join(self, combiner, other): # http://www.w3.org/TR/REC-xml/#NT-NameStartChar is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match +# Test that the string is not empty and does not contain whitespace +is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match + #### Translation @@ -490,9 +493,12 @@ def xpath_attrib_different(self, xpath, name, value): return xpath def xpath_attrib_includes(self, xpath, name, value): - xpath.add_condition( - "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(' '+value+' '))) + if is_non_whitespace(value): + xpath.add_condition( + "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" + % (name, name, self.xpath_literal(' '+value+' '))) + else: + xpath.add_condition('0') return xpath def xpath_attrib_dashmatch(self, xpath, name, value): @@ -504,19 +510,31 @@ def xpath_attrib_dashmatch(self, xpath, name, value): return xpath def xpath_attrib_prefixmatch(self, xpath, name, value): - return xpath.add_condition('%s and starts-with(%s, %s)' % ( - name, name, self.xpath_literal(value))) + if value: + xpath.add_condition('%s and starts-with(%s, %s)' % ( + name, name, self.xpath_literal(value))) + else: + xpath.add_condition('0') + return xpath def xpath_attrib_suffixmatch(self, xpath, name, value): - # Oddly there is a starts-with in XPath 1.0, but not ends-with - return xpath.add_condition( - '%s and substring(%s, string-length(%s)-%s) = %s' - % (name, name, name, len(value)-1, self.xpath_literal(value))) + if value: + # Oddly there is a starts-with in XPath 1.0, but not ends-with + xpath.add_condition( + '%s and substring(%s, string-length(%s)-%s) = %s' + % (name, name, name, len(value)-1, self.xpath_literal(value))) + else: + xpath.add_condition('0') + return xpath def xpath_attrib_substringmatch(self, xpath, name, value): - # Attribute selectors are case sensitive - return xpath.add_condition('%s and contains(%s, %s)' % ( - name, name, self.xpath_literal(value))) + if value: + # Attribute selectors are case sensitive + xpath.add_condition('%s and contains(%s, %s)' % ( + name, name, self.xpath_literal(value))) + else: + xpath.add_condition('0') + return xpath class HTMLTranslator(GenericTranslator): From 13023edb0d2dac9fa3bca2db54877b5fab57862b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 17:00:15 +0200 Subject: [PATCH 024/208] Changelog for 0.7 --- CHANGES | 16 ++++++++++++++++ cssselect/__init__.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index c72c2ec..908787b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,22 @@ Changelog ========= +Version 0.7 +----------- + +Released on 2012-06-14. + +Bug fix release: see #2, #7 and #10 on GitHub. + +* The tokenizer and parser have been rewritten to be much closer to the + specified grammar. In particular, non-ASCII characters and backslash-escapes + are now handled correctly. +* Special characters are protected in the output so that generated XPath + exrpessions should always be valid +* The ``~=``, ``^=`` and ``*=`` attribute operators now correctly never match + when used with an empty string. + + Version 0.6.1 ------------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 08b529e..2ea6409 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.6.1' +VERSION = '0.7' __version__ = VERSION From c192fcb38d4a147b83cc46e32cfad4ba800ed180 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 17:33:25 +0200 Subject: [PATCH 025/208] Make pseudo-elements lower-case in the ASCII range. See http://www.w3.org/TR/selectors/#casesens Pseudo-classes were already case-insensitive, but the lower-casing was moved to the parser. --- cssselect/parser.py | 14 +++++++++++--- cssselect/tests.py | 24 +++++++++++++----------- cssselect/xpath.py | 4 ++-- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index a7d9889..46c1c98 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -25,6 +25,11 @@ _unichr = chr +def ascii_lower(string): + """Lower-case, but only in the ASCII range.""" + return string.encode('utf8').lower().decode('utf8') + + class SelectorError(Exception): """Common parent for :class:`SelectorSyntaxError` and :class:`ExpressionError`. @@ -52,6 +57,8 @@ class Selector(object): """ def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree + if pseudo_element is not None: + pseudo_element = ascii_lower(pseudo_element) #: The identifier for the pseudo-element as a string, or ``None``. #: #: +-------------------------+----------------+----------------+ @@ -114,7 +121,7 @@ class Function(object): """ def __init__(self, selector, name, arguments): self.selector = selector - self.name = name + self.name = ascii_lower(name) self.arguments = arguments def __repr__(self): @@ -137,7 +144,7 @@ class Pseudo(object): """ def __init__(self, selector, ident): self.selector = selector - self.ident = ident + self.ident = ascii_lower(ident) def __repr__(self): return '%s[%r:%s]' % ( @@ -393,7 +400,8 @@ def parse_simple_selector(stream, inside_negation=False): pseudo_element = stream.next_ident() continue ident = stream.next_ident() - if ident in ('first-line', 'first-letter', 'before', 'after'): + if ident.lower() in ('first-line', 'first-letter', + 'before', 'after'): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. pseudo_element = _unicode(ident) diff --git a/cssselect/tests.py b/cssselect/tests.py index 42312f9..7170c2a 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -167,17 +167,17 @@ def parse_one(css): assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) # Special cases for CSS 2.1 pseudo-elements - assert parse_one(':before') == ('Element[*]', 'before') - assert parse_one(':after') == ('Element[*]', 'after') - assert parse_one(':first-line') == ('Element[*]', 'first-line') - assert parse_one(':first-letter') == ('Element[*]', 'first-letter') + assert parse_one(':BEfore') == ('Element[*]', 'before') + assert parse_one(':aftER') == ('Element[*]', 'after') + assert parse_one(':First-Line') == ('Element[*]', 'first-line') + assert parse_one(':First-Letter') == ('Element[*]', 'first-letter') - assert parse_one('::before') == ('Element[*]', 'before') - assert parse_one('::after') == ('Element[*]', 'after') - assert parse_one('::first-line') == ('Element[*]', 'first-line') - assert parse_one('::first-letter') == ('Element[*]', 'first-letter') + assert parse_one('::befoRE') == ('Element[*]', 'before') + assert parse_one('::AFter') == ('Element[*]', 'after') + assert parse_one('::firsT-linE') == ('Element[*]', 'first-line') + assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter') - assert parse_one('::selection') == ('Element[*]', 'selection') + assert parse_one('::Selection') == ('Element[*]', 'selection') assert parse_one('foo:after') == ('Element[foo]', 'after') assert parse_one('foo::selection') == ('Element[foo]', 'selection') assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == ( @@ -346,13 +346,15 @@ def xpath(css): "e[last() = 1]") assert xpath('e:empty') == ( "e[not(*) and not(normalize-space())]") + assert xpath('e:EmPTY') == ( + "e[not(*) and not(normalize-space())]") assert xpath('e:root') == ( "e[not(parent::*)]") assert xpath('e:hover') == ( "e[0]") # never matches assert xpath('e:contains("foo")') == ( "e[contains(string(.), 'foo')]") - assert xpath('e:contains(foo)') == ( + assert xpath('e:ConTains(foo)') == ( "e[contains(string(.), 'foo')]") assert xpath('e.warning') == ( "e[@class and contains(" @@ -361,7 +363,7 @@ def xpath(css): "e[@id = 'myid']") assert xpath('e:not(:nth-child(odd))') == ( "e[not((position() -1) mod 2 = 0 and position() >= 1)]") - assert xpath('e:not(*)') == ( + assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e f') == ( "e/descendant-or-self::*/f") diff --git a/cssselect/xpath.py b/cssselect/xpath.py index efdc1e1..8f77c86 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -234,7 +234,7 @@ def xpath_negation(self, negation): def xpath_function(self, function): """Translate a functional pseudo-class.""" - method = 'xpath_%s_function' % function.name.replace('-', '_').lower() + method = 'xpath_%s_function' % function.name.replace('-', '_') method = getattr(self, method, None) if not method: raise ExpressionError( @@ -243,7 +243,7 @@ def xpath_function(self, function): def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" - method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_').lower() + method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') method = getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? From 12e04521b5615cb22e9fe5966b2e243c62e319eb Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 23:33:56 +0200 Subject: [PATCH 026/208] Fix lack for operator.methodcaller in Python <2.6 --- cssselect/parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 46c1c98..67da69c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -584,7 +584,12 @@ def _compile(pattern): _sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub # Same as r'\1', but faster on CPython -_replace_simple = operator.methodcaller('group', 1) +if hasattr(operator, 'methodcaller'): + # Python 2.6+ + _replace_simple = operator.methodcaller('group', 1) +else: + def _replace_simple(match): + return match.group(1) def _replace_unicode(match): codepoint = int(match.group(1), 16) From 06da45755992e1ea80364f254c064e4a6e36af07 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 23:34:41 +0200 Subject: [PATCH 027/208] Fix unicode in tests with Python 2.x --- cssselect/parser.py | 2 +- cssselect/tests.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 67da69c..f423f30 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -539,7 +539,7 @@ def __new__(cls, type_, value, pos): return obj def __repr__(self): - return '<%s %r at %i>' % (self.type, self.value, self.pos) + return "<%s '%s' at %i>" % (self.type, self.value, self.pos) def is_delim(self, *values): return self.type == 'DELIM' and self.value in values diff --git a/cssselect/tests.py b/cssselect/tests.py index 7170c2a..eaf165c 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -39,16 +39,15 @@ def u(text): class TestCssselect(unittest.TestCase): def test_tokenizer(self): tokens = [ - repr(item).replace("u'", "'") # Py 2/3 - for item in tokenize( + _unicode(item) for item in tokenize( u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] assert tokens == [ - "", + u(""), "", "' at 5>", "", # the no-break space is not whitespace in CSS - r"", + u(""), # f\xa0 "", "", "", @@ -294,7 +293,7 @@ def get_error(css): def test_translation(self): def xpath(css): - return str(GenericTranslator().css_to_xpath(css, prefix='')) + return _unicode(GenericTranslator().css_to_xpath(css, prefix='')) assert xpath('*') == "*" assert xpath('e') == "e" @@ -378,11 +377,11 @@ def xpath(css): # Invalid characters in XPath element names assert xpath(r'di\a0 v') == ( - "*[name() = 'di\xa0v']") + u("*[name() = 'di v']")) # di\xa0v assert xpath(r'di\[v') == ( "*[name() = 'di[v']") assert xpath(r'[h\a0 ref]') == ( - "*[attribute::*[name() = 'h\xa0ref']]") + u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref assert xpath(r'[h\]ref]') == ( "*[attribute::*[name() = 'h]ref']]") From 4f1fb912e808ca8838eaa6d7d51dbcdef23d651b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 23:36:49 +0200 Subject: [PATCH 028/208] Changelog for 0.7.1 --- CHANGES | 12 ++++++++++++ cssselect/__init__.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 908787b..c278667 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,18 @@ Changelog ========= +Version 0.7.1 +------------- + +Released on 2012-06-14. Code name *remember-to-test-with-tox*. + +0.7 broke the parser in Python 2.4 and 2.5; the tests in 2.x. +Now all is well again. + +Also, pseudo-elements are now correctly made lower-case. (They are supposed +to be case-insensitive.) + + Version 0.7 ----------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 2ea6409..f1c00b0 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.7' +VERSION = '0.7.1' __version__ = VERSION From b0b462453c002c55dfd4cca7dd6ebbb53fc339e7 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 14 Jun 2012 23:40:40 +0200 Subject: [PATCH 029/208] Add a config file for Travis CI. --- .travis.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..487a6ee --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +language: python + +python: + - "2.5" + - "2.6" + - "2.7" + - "3.1" + - "3.2" + +install: + - pip install --use-mirrors lxml -e . + +script: py.test From 3e5abd8e33db470edc487a518a06f44478b7c9c1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 29 Jun 2012 15:13:58 +0200 Subject: [PATCH 030/208] Do the right with non-ASCII pseudo-classes. Fix #14 Make sure that getattr() with a default does not raise an UnicodeError or a TypeError on either Py2 or 3. Instead, all non-ASCII pseudo-classes are invalid selectors (as an inexistant pseudo-class should.) --- cssselect/tests.py | 1 + cssselect/xpath.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/cssselect/tests.py b/cssselect/tests.py index eaf165c..8fce905 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -385,6 +385,7 @@ def xpath(css): assert xpath(r'[h\]ref]') == ( "*[attribute::*[name() = 'h]ref']]") + self.assertRaises(ExpressionError, xpath, u(':fİrst-child')) self.assertRaises(ExpressionError, xpath, ':first-of-type') self.assertRaises(ExpressionError, xpath, ':only-of-type') self.assertRaises(ExpressionError, xpath, ':last-of-type') diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 8f77c86..e48dc52 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -235,6 +235,8 @@ def xpath_negation(self, negation): def xpath_function(self, function): """Translate a functional pseudo-class.""" method = 'xpath_%s_function' % function.name.replace('-', '_') + # getattr() with a non-ASCII name fails on Python 2.x + method = method.encode('ascii', 'replace').decode('ascii') method = getattr(self, method, None) if not method: raise ExpressionError( @@ -244,6 +246,8 @@ def xpath_function(self, function): def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') + # getattr() with a non-ASCII name fails on Python 2.x + method = method.encode('ascii', 'replace').decode('ascii') method = getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? From 9846271380f1187221d5b404409a54e81a7e84bf Mon Sep 17 00:00:00 2001 From: Simon Potter Date: Thu, 15 Nov 2012 21:44:32 +1300 Subject: [PATCH 031/208] Use XPath 'lang()' in XML docs. --- cssselect/tests.py | 38 ++++++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 21 +++++++++++++++------ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 8fce905..c89e8ce 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -464,6 +464,30 @@ def series(css): assert series('foo') == None assert series('n+') == None + def test_lang(self): + document = etree.fromstring(XMLLANG_IDS) + sort_key = dict( + (el, count) for count, el in enumerate(document.getiterator()) + ).__getitem__ + css_to_xpath = GenericTranslator().css_to_xpath + + def langid(selector): + xpath = css_to_xpath(selector) + items = document.xpath(xpath) + items.sort(key=sort_key) + return [element.get('id', 'nil') for element in items] + + assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth'] + assert langid(':lang("en-us")') == ['second', 'fourth'] + assert langid(':lang(en-nz)') == ['third'] + assert langid(':lang(fr)') == ['fifth'] + assert langid(':lang(ru)') == ['sixth'] + assert langid(":lang('ZH')") == ['eighth'] + assert langid(':lang(de) :lang(zh)') == ['eighth'] + assert langid(':lang(en), :lang(zh)') == [ + 'first', 'second', 'third', 'fourth', 'eighth'] + assert langid(':lang(es)') == [] + def test_select(self): document = etree.fromstring(HTML_IDS) sort_key = dict( @@ -675,6 +699,20 @@ def count(selector): assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right +XMLLANG_IDS = ''' + + a + b + c + d + e + f + + + + +''' + HTML_IDS = ''' diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e48dc52..c67c0e6 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -410,12 +410,7 @@ def xpath_lang_function(self, xpath, function): % function.arguments) value = function.arguments[0].value return xpath.add_condition( - "ancestor-or-self::*[@lang][1][starts-with(concat(" - # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " - "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + '-'))) + "lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name @@ -575,6 +570,20 @@ def xpath_checked_pseudo(self, xpath): "and (name(.) = 'input' or name(.) = 'command')" "and (@type = 'checkbox' or @type = 'radio'))") + def xpath_lang_function(self, xpath, function): + if function.argument_types() not in (['STRING'], ['IDENT']): + raise ExpressionError( + "Expected a single string or ident for :lang(), got %r" + % function.arguments) + value = function.arguments[0].value + return xpath.add_condition( + "ancestor-or-self::*[@lang][1][starts-with(concat(" + # XPath 1.0 has no lower-case function... + "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + "'abcdefghijklmnopqrstuvwxyz'), " + "'-'), %s)]" + % (self.lang_attribute, self.xpath_literal(value.lower() + '-'))) + def xpath_link_pseudo(self, xpath): return xpath.add_condition("@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") From 7712c2a124910bfb9126f64aad4f2f84e0ee6e58 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 15 Nov 2012 09:58:18 +0100 Subject: [PATCH 032/208] Add Simon Potter to authors. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 8c69e8f..a4ae5f1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,5 @@ Ian Bicking Laurence Rowe +Simon Potter Simon Sapin Stefan Behnel From ac10a368052b2975384f65541d4473c46769b96e Mon Sep 17 00:00:00 2001 From: Simon Potter Date: Thu, 15 Nov 2012 23:07:06 +1300 Subject: [PATCH 033/208] Using string-length() to test for emptiness of text nodes. Whitespace is *not* empty. --- cssselect/tests.py | 6 +++--- cssselect/xpath.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index c89e8ce..4c64275 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -344,9 +344,9 @@ def xpath(css): assert xpath('e:only-of-type') == ( "e[last() = 1]") assert xpath('e:empty') == ( - "e[not(*) and not(normalize-space())]") + "e[not(*) and not(string-length())]") assert xpath('e:EmPTY') == ( - "e[not(*) and not(normalize-space())]") + "e[not(*) and not(string-length())]") assert xpath('e:root') == ( "e[not(parent::*)]") assert xpath('e:hover') == ( @@ -575,7 +575,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('p:only-of-type') == ['paragraph'] assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] assert pcss('li:empty') == [ - 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'] + 'third-li', 'fourth-li', 'fifth-li', 'sixth-li'] assert pcss(':root', 'html:root') == ['html'] assert pcss('li:root', '* :root') == [] assert pcss('*:contains("link")', ':CONtains("link")') == [ diff --git a/cssselect/xpath.py b/cssselect/xpath.py index c67c0e6..a56b697 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -454,7 +454,7 @@ def xpath_only_of_type_pseudo(self, xpath): return xpath.add_condition('last() = 1') def xpath_empty_pseudo(self, xpath): - return xpath.add_condition("not(*) and not(normalize-space())") + return xpath.add_condition("not(*) and not(string-length())") def pseudo_never_matches(self, xpath): """Common implementation for pseudo-classes that never match.""" From 91e752d4994f3d95b89b850b96672f47418623d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Thu, 10 Jan 2013 14:27:46 -0200 Subject: [PATCH 034/208] Let extended translators override what XPathExpr class is used GenericTranslator offers an excelent way to support custom selectors trough method hooks and allowing to return a *new* XPathExpr from this hooks. The main problem is that returning extended `XPathExpr` instances fail for combiners because `XPathExpr.join()` assume a fixed XPathExpr instance attributes (element, path and condition) to copy from `other` to `self` `XPathExpr.join()` can be extended in subclass but needs that `left` xpath instance to be of the extended class too, and right now we can only control `right` xpath type. The problem can be mitigated by recasting all xpath returned from `GenericTranslator.xpath_element()` that only works because it is the only hook that cast `XPathExpr` instances. The proposed change allow projects extending GenericTranslator to also safely extend `XPathExpr` to correctly support combiners in extended features. --- cssselect/xpath.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a56b697..4b74997 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -146,6 +146,9 @@ class GenericTranslator(object): lower_case_attribute_names = False lower_case_attribute_values = False + # class used to represent and xpath expression + xpathexpr_cls = XPathExpr + def css_to_xpath(self, css, prefix='descendant-or-self::'): """Translate a *group of selectors* to XPath. @@ -190,7 +193,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'): if not tree: raise TypeError('Expected a parsed selector, got %r' % (selector,)) xpath = self.xpath(tree) - assert isinstance(xpath, XPathExpr) # help debug a missing 'return' + assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' return (prefix or '') + _unicode(xpath) @staticmethod @@ -305,7 +308,7 @@ def xpath_element(self, selector): # http://www.w3.org/TR/css3-namespace/#prefixes element = '%s:%s' % (selector.namespace, element) safe = safe and is_safe_name(selector.namespace) - xpath = XPathExpr(element=element) + xpath = self.xpathexpr_cls(element=element) if not safe: xpath.add_name_test() return xpath From eac05a4743a52a4e09de181b3db362bb21daa672 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 10 Jan 2013 19:07:24 +0100 Subject: [PATCH 035/208] =?UTF-8?q?HERE=C2=A0BE=C2=A0DRAGONS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cssselect/xpath.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 4b74997..69e636d 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -102,6 +102,19 @@ class GenericTranslator(object): of element names and attribute names. """ + + #### + #### HERE BE DRAGONS + #### + #### You are welcome to hook into this to change some behavior, + #### but do so at your own risks. + #### Until is has recieved a lot more work and review, + #### I reserve the right to change this API in backward-incompatible ways + #### with any minor version of cssselect. + #### See https://github.com/SimonSapin/cssselect/pull/22 + #### -- Simon Sapin. + #### + combinator_mapping = { ' ': 'descendant', '>': 'child', From 1b95a44cd990abe3682b8c2ec250478d3673fd97 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 15 Mar 2013 16:53:28 +0100 Subject: [PATCH 036/208] Changelog for 0.8 --- CHANGES | 24 ++++++++++++++++++++++++ cssselect/__init__.py | 2 +- tox.ini | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index c278667..fdc1615 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,30 @@ Changelog ========= +Version 0.8 +----------- + +Released on 2013-03-15. + +Improvements: + +* `#22 `_ + Let extended translators override what XPathExpr class is used +* `#19 `_ + Use the built-in ``lang()`` XPath function + for implementing the ``:lang()`` pseudo-class + with XML documents. + This is probably faster than ``ancestor-or-self::``. + +Bug fixes: + +* `#14 `_ + Fix non-ASCII pseudo-classes. (Invalid selector instead of crash.) +* `#20 `_ + As per the spec, elements containing only whitespace are not considered empty + for the ``:empty`` pseudo-class. + + Version 0.7.1 ------------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index f1c00b0..fd341ab 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.7.1' +VERSION = '0.8' __version__ = VERSION diff --git a/tox.ini b/tox.ini index 9a552c2..ad83007 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py24,py25,py26,py27,py31,py32 +envlist = py24,py25,py26,py27,py31,py32,py33 [testenv] deps=lxml From e6de035685c666eda774deda8db4b69a43cca64d Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Sun, 15 Sep 2013 11:10:42 +0200 Subject: [PATCH 037/208] Travis-CI config: remove Python 3.1, add Python 3.3 Python 3.1 not supported anymore in Travis http://about.travis-ci.org/docs/user/ci-environment/#Python-VM-images --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 487a6ee..4c5fdf7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,8 @@ python: - "2.5" - "2.6" - "2.7" - - "3.1" - "3.2" + - "3.3" install: - pip install --use-mirrors lxml -e . From 9fff95b04e89d2afa12cd1c73a0bc7da475ccff2 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Sun, 15 Sep 2013 11:02:09 +0200 Subject: [PATCH 038/208] Remove string() conversion of element in "contains" function translation --- cssselect/tests.py | 4 ++-- cssselect/xpath.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 4c64275..851cc85 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -352,9 +352,9 @@ def xpath(css): assert xpath('e:hover') == ( "e[0]") # never matches assert xpath('e:contains("foo")') == ( - "e[contains(string(.), 'foo')]") + "e[contains(., 'foo')]") assert xpath('e:ConTains(foo)') == ( - "e[contains(string(.), 'foo')]") + "e[contains(., 'foo')]") assert xpath('e.warning') == ( "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]") diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 69e636d..4821099 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -417,7 +417,7 @@ def xpath_contains_function(self, xpath, function): % function.arguments) value = function.arguments[0].value return xpath.add_condition( - 'contains(string(.), %s)' % self.xpath_literal(value)) + 'contains(., %s)' % self.xpath_literal(value)) def xpath_lang_function(self, xpath, function): if function.argument_types() not in (['STRING'], ['IDENT']): From ecda4b9a1d55102f09eb27606b8ff04f6b4a2854 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 15 Sep 2013 10:32:27 +0200 Subject: [PATCH 039/208] Nicer exception on unknown node type in the parsed tree --- cssselect/xpath.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 69e636d..4b42614 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -226,7 +226,9 @@ def xpath_literal(s): def xpath(self, parsed_selector): """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, 'xpath_%s' % type_name.lower()) + method = getattr(self, 'xpath_%s' % type_name.lower(), None) + if method is None: + raise ExpressionError('%s is not supported.' % type_name) return method(parsed_selector) From 039a844bceb1584306d28647b4bf6170a324b69b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 15 Sep 2013 22:58:45 +0200 Subject: [PATCH 040/208] Add Paul Tremberth to authors. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index a4ae5f1..43be02e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,5 +1,6 @@ Ian Bicking Laurence Rowe +Paul Tremberth Simon Potter Simon Sapin Stefan Behnel From a4b12ae07c1d7ef71b7aae15034b259d209d7960 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 15 Sep 2013 23:54:54 +0200 Subject: [PATCH 041/208] Add parser support for functional pseudo-elements. See #29. --- cssselect/parser.py | 63 +++++++++++++++++++++++++++---------- cssselect/tests.py | 76 +++++++++++++++++++++++++++++++++++++++++++-- cssselect/xpath.py | 34 +++++++++++++------- 3 files changed, 141 insertions(+), 32 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index f423f30..217ecd5 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -57,7 +57,8 @@ class Selector(object): """ def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree - if pseudo_element is not None: + if pseudo_element is not None and not isinstance( + pseudo_element, FunctionalPseudoElement): pseudo_element = ascii_lower(pseudo_element) #: The identifier for the pseudo-element as a string, or ``None``. #: @@ -78,6 +79,8 @@ def __init__(self, tree, pseudo_element=None): self.pseudo_element = pseudo_element def __repr__(self): + if isinstance(self.pseudo_element, FunctionalPseudoElement): + pseudo_element = repr(self.pseudo_element) if self.pseudo_element: pseudo_element = '::%s' % self.pseudo_element else: @@ -115,6 +118,28 @@ def specificity(self): return a, b, c +class FunctionalPseudoElement(object): + """ + Represents selector::name(expr) + """ + def __init__(self, name, arguments): + self.name = ascii_lower(name) + self.arguments = arguments + + def __repr__(self): + return '%s[::%s(%r)]' % ( + self.__class__.__name__, self.name, + [token.value for token in self.arguments]) + + def argument_types(self): + return [token.type for token in self.arguments] + + def specificity(self): + a, b, c = self.selector.specificity() + b += 1 + return a, b, c + + class Function(object): """ Represents selector:name(expr) @@ -398,6 +423,10 @@ def parse_simple_selector(stream, inside_negation=False): if stream.peek() == ('DELIM', ':'): stream.next() pseudo_element = stream.next_ident() + if stream.peek() == ('DELIM', '('): + stream.next() + pseudo_element = FunctionalPseudoElement( + pseudo_element, parse_arguments(stream)) continue ident = stream.next_ident() if ident.lower() in ('first-line', 'first-letter', @@ -425,22 +454,7 @@ def parse_simple_selector(stream, inside_negation=False): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) else: - arguments = [] - while 1: - stream.skip_whitespace() - next = stream.next() - if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ - ('DELIM', '+'), ('DELIM', '-')]: - arguments.append(next) - elif next == ('DELIM', ')'): - break - else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) - if not arguments: - raise SelectorSyntaxError( - "Expected at least one argument, got %s" % (next,)) - result = Function(result, ident, arguments) + result = Function(result, ident, parse_arguments(stream)) else: raise SelectorSyntaxError( "Expected selector, got %s" % (peek,)) @@ -450,6 +464,21 @@ def parse_simple_selector(stream, inside_negation=False): return result, pseudo_element +def parse_arguments(stream): + arguments = [] + while 1: + stream.skip_whitespace() + next = stream.next() + if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ + ('DELIM', '+'), ('DELIM', '-')]: + arguments.append(next) + elif next == ('DELIM', ')'): + return arguments + else: + raise SelectorSyntaxError( + "Expected an argument, got %s" % (next,)) + + def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() diff --git a/cssselect/tests.py b/cssselect/tests.py index 851cc85..7665733 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -23,7 +23,9 @@ from lxml import etree, html from cssselect import (parse, GenericTranslator, HTMLTranslator, SelectorSyntaxError, ExpressionError) -from cssselect.parser import tokenize, parse_series, _unicode +from cssselect.parser import (tokenize, parse_series, _unicode, + FunctionalPseudoElement) +from cssselect.xpath import _unicode_safe_getattr, XPathExpr if sys.version_info[0] < 3: @@ -150,6 +152,7 @@ def parse_pseudo(css): result = [] for selector in parse(css): pseudo = selector.pseudo_element + pseudo = _unicode(pseudo) if pseudo else pseudo # No Symbol here assert pseudo is None or type(pseudo) is _unicode selector = repr(selector.parsed_tree).replace("(u'", "('") @@ -176,6 +179,10 @@ def parse_one(css): assert parse_one('::firsT-linE') == ('Element[*]', 'first-line') assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter') + assert parse_one('::text-content') == ('Element[*]', 'text-content') + assert parse_one('::attr(name)') == ( + "Element[*]", "FunctionalPseudoElement[::attr(['name'])]") + assert parse_one('::Selection') == ('Element[*]', 'selection') assert parse_one('foo:after') == ('Element[foo]', 'after') assert parse_one('foo::selection') == ('Element[foo]', 'selection') @@ -264,8 +271,6 @@ def get_error(css): "Expected ident or '*', got ") assert get_error('[foo=#]') == ( "Expected string or ident, got ") - assert get_error(':nth-child()') == ( - "Expected at least one argument, got ") assert get_error('[href]a') == ( "Expected selector, got ") assert get_error('[rel=stylesheet]') == None @@ -436,6 +441,71 @@ def test_unicode_escapes(self): assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == ( '''descendant-or-self::*[@aval = "' '"]''') + def test_xpath_pseudo_elements(self): + class CustomTranslator(GenericTranslator): + def xpath_pseudo_element(self, xpath, pseudo_element): + if isinstance(pseudo_element, FunctionalPseudoElement): + method = 'xpath_%s_functional_pseudo_element' % ( + pseudo_element.name.replace('-', '_')) + method = _unicode_safe_getattr(self, method, None) + if not method: + raise ExpressionError( + "The functional pseudo-element ::%s() is unknown" + % functional.name) + xpath = method(xpath, pseudo_element.arguments) + else: + method = 'xpath_%s_simple_pseudo_element' % ( + pseudo_element.replace('-', '_')) + method = _unicode_safe_getattr(self, method, None) + if not method: + raise ExpressionError( + "The pseudo-element ::%s is unknown" + % pseudo_element) + xpath = method(xpath) + return xpath + + # functional pseudo-class: + # elements that have a certain number of attributes + def xpath_nb_attr_function(self, xpath, function): + nb_attributes = int(function.arguments[0].value) + return xpath.add_condition( + "count(@*)=%d" % nb_attributes) + + # pseudo-class: + # elements that have 5 attributes + def xpath_five_attributes_pseudo(self, xpath): + return xpath.add_condition("count(@*)=5") + + # functional pseudo-element: + # element's attribute by name + def xpath_attr_functional_pseudo_element(self, xpath, arguments): + attribute_name = arguments[0].value + other = XPathExpr('@%s' % attribute_name, '', ) + return xpath.join('/', other) + + # pseudo-element: + # element's text() nodes + def xpath_text_node_simple_pseudo_element(self, xpath): + other = XPathExpr('text()', '', ) + return xpath.join('/', other) + + # pseudo-element: + # element's href attribute + def xpath_attr_href_simple_pseudo_element(self, xpath): + other = XPathExpr('@href', '', ) + return xpath.join('/', other) + + def xpath(css): + return _unicode(CustomTranslator().css_to_xpath(css)) + + assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]" + assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]" + assert xpath('::attr(href)') == "descendant-or-self::*/@href" + assert xpath('::text-node') == "descendant-or-self::*/text()" + assert xpath('::attr-href') == "descendant-or-self::*/@href" + assert xpath('p img::attr(src)') == ( + "descendant-or-self::p/descendant-or-self::*/img/@src") + def test_series(self): def series(css): selector, = parse(':nth-child(%s)' % css) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 3e742bf..e37a742 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -26,6 +26,12 @@ _unicode = str +def _unicode_safe_getattr(obj, name, default=None): + # getattr() with a non-ASCII name fails on Python 2.x + name = name.encode('ascii', 'replace').decode('ascii') + return getattr(obj, name, default) + + class ExpressionError(SelectorError, RuntimeError): """Unknown or unsupported selector (eg. pseudo-class).""" @@ -178,14 +184,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - selectors = parse(css) - for selector in selectors: - if selector.pseudo_element: - raise ExpressionError('Pseudo-elements are not supported.') - return ' | '.join( self.selector_to_xpath(selector, prefix) - for selector in selectors) + for selector in parse(css)) def selector_to_xpath(self, selector, prefix='descendant-or-self::'): """Translate a parsed selector to XPath. @@ -207,8 +208,21 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'): raise TypeError('Expected a parsed selector, got %r' % (selector,)) xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' + if selector.pseudo_element: + xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) return (prefix or '') + _unicode(xpath) + def xpath_pseudo_element(self, xpath, pseudo_element): + """Translate a pseudo-element. + + Defaults to not supporting pseudo-elements at all, + but can be overridden by sub-classes. + + """ + if pseudo_element: + raise ExpressionError('Pseudo-elements are not supported.') + return xpath + @staticmethod def xpath_literal(s): s = _unicode(s) @@ -253,9 +267,7 @@ def xpath_negation(self, negation): def xpath_function(self, function): """Translate a functional pseudo-class.""" method = 'xpath_%s_function' % function.name.replace('-', '_') - # getattr() with a non-ASCII name fails on Python 2.x - method = method.encode('ascii', 'replace').decode('ascii') - method = getattr(self, method, None) + method = _unicode_safe_getattr(self, method, None) if not method: raise ExpressionError( "The pseudo-class :%s() is unknown" % function.name) @@ -264,9 +276,7 @@ def xpath_function(self, function): def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') - # getattr() with a non-ASCII name fails on Python 2.x - method = method.encode('ascii', 'replace').decode('ascii') - method = getattr(self, method, None) + method = _unicode_safe_getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? raise ExpressionError( From f8a89bfae5f76499aa8795fe97b7fff8841ed729 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 15 Sep 2013 23:58:21 +0200 Subject: [PATCH 042/208] Document functional pseudo-elements. --- cssselect/parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 217ecd5..1f9e7df 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -60,7 +60,9 @@ def __init__(self, tree, pseudo_element=None): if pseudo_element is not None and not isinstance( pseudo_element, FunctionalPseudoElement): pseudo_element = ascii_lower(pseudo_element) - #: The identifier for the pseudo-element as a string, or ``None``. + #: A :class:`FunctionalPseudoElement`, + #: or the identifier for the pseudo-element as a string, + # or ``None``. #: #: +-------------------------+----------------+----------------+ #: | | Selector | Pseudo-element | From e1a0f0def44aff9c4769cbd2fe924e80e2c45c1b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 16 Sep 2013 00:01:48 +0200 Subject: [PATCH 043/208] Remove unnecessary check. --- cssselect/xpath.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e37a742..a5d3b9b 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -219,9 +219,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): but can be overridden by sub-classes. """ - if pseudo_element: - raise ExpressionError('Pseudo-elements are not supported.') - return xpath + raise ExpressionError('Pseudo-elements are not supported.') @staticmethod def xpath_literal(s): From d7e78ee9359407bdc96417d6ecf730f7af02889b Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Mon, 16 Sep 2013 13:36:04 +0200 Subject: [PATCH 044/208] Fix exception message in functional pseudo-element test example --- cssselect/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 7665733..e7c0193 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -451,7 +451,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): if not method: raise ExpressionError( "The functional pseudo-element ::%s() is unknown" - % functional.name) + % pseudo_element.name) xpath = method(xpath, pseudo_element.arguments) else: method = 'xpath_%s_simple_pseudo_element' % ( From 06ca3147aedd9d4b1d01aac9e2b1de627dbe1827 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 11 Oct 2013 16:16:29 +0100 Subject: [PATCH 045/208] Document FunctionalPseudoElement. --- cssselect/__init__.py | 4 ++-- cssselect/parser.py | 41 ++++++++++++++++++++++++++++------------- docs/index.rst | 2 ++ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/cssselect/__init__.py b/cssselect/__init__.py index fd341ab..bb0d59d 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -13,8 +13,8 @@ """ -from cssselect.parser import (parse, Selector, SelectorError, - SelectorSyntaxError) +from cssselect.parser import (parse, Selector, FunctionalPseudoElement, + SelectorError, SelectorSyntaxError) from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError diff --git a/cssselect/parser.py b/cssselect/parser.py index 1f9e7df..f2b32b7 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -64,18 +64,20 @@ def __init__(self, tree, pseudo_element=None): #: or the identifier for the pseudo-element as a string, # or ``None``. #: - #: +-------------------------+----------------+----------------+ - #: | | Selector | Pseudo-element | - #: +=========================+================+================+ - #: | CSS3 syntax | ``a::before`` | ``'before'`` | - #: +-------------------------+----------------+----------------+ - #: | Older syntax | ``a:before`` | ``'before'`` | - #: +-------------------------+----------------+----------------+ - #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | - #: | not in Selectors3 | | | - #: +-------------------------+----------------+----------------+ - #: | Invalid pseudo-class | ``li:marker`` | ``None`` | - #: +-------------------------+----------------+----------------+ + #: +-------------------------+----------------+--------------------------------+ + #: | | Selector | Pseudo-element | + #: +=========================+================+================================+ + #: | CSS3 syntax | ``a::before`` | ``'before'`` | + #: +-------------------------+----------------+--------------------------------+ + #: | Older syntax | ``a:before`` | ``'before'`` | + #: +-------------------------+----------------+--------------------------------+ + #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | + #: | not in Selectors3 | | | + #: +-------------------------+----------------+--------------------------------+ + #: | Invalid pseudo-class | ``li:marker`` | ``None`` | + #: +-------------------------+----------------+--------------------------------+ + #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | + #: +-------------------------+----------------+--------------------------------+ #: #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement self.pseudo_element = pseudo_element @@ -122,7 +124,20 @@ def specificity(self): class FunctionalPseudoElement(object): """ - Represents selector::name(expr) + Represents selector::name(arguments) + + .. attribute:: name + + The name (identifier) of the pseudo-element, as a string. + + .. attribute:: arguments + + The arguments of the pseudo-element, as a list of tokens. + + **Note:** tokens are not part of the public API, + and may change between cssselect versions. + Use at your own risks. + """ def __init__(self, name, arguments): self.name = ascii_lower(name) diff --git a/docs/index.rst b/docs/index.rst index 9aec19e..4ac7401 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,6 +54,8 @@ is a group of two selectors. .. autoclass:: Selector() :members: +.. autoclass:: FunctionalPseudoElement + .. autoclass:: GenericTranslator :members: css_to_xpath, selector_to_xpath From 12c00f5a14f3e68064bd15af15466fa8a27e0026 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 11 Oct 2013 16:17:00 +0100 Subject: [PATCH 046/208] Changelog for 0.9 Releasing on PyPI fixes #33. --- CHANGES | 8 ++++++++ cssselect/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index fdc1615..5527a2e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,14 @@ Changelog ========= +Version 0.9 +----------- + +Released on 2013-10-11. + +Add parser support for :attr:`functional pseudo-elements `. + + Version 0.8 ----------- diff --git a/cssselect/__init__.py b/cssselect/__init__.py index bb0d59d..1d0438b 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.8' +VERSION = '0.9' __version__ = VERSION From 4230c8d210c6ff30e27869a75bc39524f8d4e246 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Tue, 15 Oct 2013 17:15:30 +0600 Subject: [PATCH 047/208] Fix tox.ini for Python 2.5 See https://bitbucket.org/hpk42/tox/issue/117/tox-160-breaks-when-running-tests-under --- tox.ini | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tox.ini b/tox.ini index ad83007..dde0507 100644 --- a/tox.ini +++ b/tox.ini @@ -4,3 +4,7 @@ envlist = py24,py25,py26,py27,py31,py32,py33 [testenv] deps=lxml commands = python cssselect/tests.py + +[testenv:py25] +setenv = + PIP_INSECURE = 1 From c4adf6424d243b5141bc62f920ff078fead4939e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 15 Oct 2013 15:56:53 +0100 Subject: [PATCH 048/208] Drop official support for Python 2.4 and 3.1. Fix #35 Fix #36 --- CHANGES | 10 ++++++++++ README.rst | 2 +- setup.py | 2 -- tox.ini | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index 5527a2e..d8aadeb 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,16 @@ Changelog ========= +Version 0.10 +------------ + +Not released yet. + +Drop official support for Python 2.4 and 3.1, as testing was becoming difficult. +Nothing will break overnight, but future releases may on may not work on these versions. +Older releases will remain available on PyPI. + + Version 0.9 ----------- diff --git a/README.rst b/README.rst index fa53a5b..f523c7f 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.4+ and 3.x +* Compatible with Python 2.5+ and 3.2+ * Latest documentation `on python.org `_ * Source, issues and pull requests `on Github `_ diff --git a/setup.py b/setup.py index 4f9b076..bd1e385 100644 --- a/setup.py +++ b/setup.py @@ -34,12 +34,10 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.4', 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.1', 'Programming Language :: Python :: 3.2', ], **extra_kwargs diff --git a/tox.ini b/tox.ini index dde0507..ca053d8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py24,py25,py26,py27,py31,py32,py33 +envlist = py25,py26,py27,py32,py33 [testenv] deps=lxml From efc1f7c2485eba5355c0e3c2662a7ba9a5f39c93 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 15 Oct 2013 15:58:39 +0100 Subject: [PATCH 049/208] Ack some contributors. --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index 43be02e..bf826b9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,6 +1,9 @@ +Daniel Graña Ian Bicking Laurence Rowe +Mikhail Korobov Paul Tremberth Simon Potter Simon Sapin Stefan Behnel +Varialus From d29ac49aa67fa24ada2ffeacb9ccc6e3d56e0c27 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 17 Oct 2013 14:28:02 +0100 Subject: [PATCH 050/208] Switch back to default to ignoring pseudo-elements ... rather than rejecting them. Fix Kozea/WeasyPrint#128 --- cssselect/tests.py | 5 +++++ cssselect/xpath.py | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index e7c0193..8b69740 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -380,6 +380,11 @@ def xpath(css): assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") + selector, = parse('e:after') + assert selector.pseudo_element == 'after' + # Pseudo-element is ignored: + assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e" + # Invalid characters in XPath element names assert xpath(r'di\a0 v') == ( u("*[name() = 'di v']")) # di\xa0v diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a5d3b9b..8d8b1d3 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -184,19 +184,23 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - return ' | '.join( - self.selector_to_xpath(selector, prefix) - for selector in parse(css)) + return ' | '.join(self.selector_to_xpath(selector, prefix, + translate_pseudo_elements=True) + for selector in parse(css)) - def selector_to_xpath(self, selector, prefix='descendant-or-self::'): + def selector_to_xpath(self, selector, prefix='descendant-or-self::', + translate_pseudo_elements=False): """Translate a parsed selector to XPath. - The :attr:`~Selector.pseudo_element` attribute of the selector - is ignored. It is the caller's responsibility to reject selectors - with pseudo-elements, or to account for them somehow. :param selector: A parsed :class:`Selector` object. + :param translate_pseudo_elements: + Unless this is set to ``True`` (as :meth:`css_to_xpath` does), + the :attr:`~Selector.pseudo_element` attribute of the selector + is ignored. + It is the caller's responsibility to reject selectors + with pseudo-elements, or to account for them somehow. :raises: :class:`ExpressionError` on unknown/unsupported selectors. :returns: @@ -208,7 +212,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'): raise TypeError('Expected a parsed selector, got %r' % (selector,)) xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' - if selector.pseudo_element: + if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) return (prefix or '') + _unicode(xpath) From 070cc0dfb266f96d9cfa5b75f13949f38fa7b7e3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 17 Oct 2013 14:38:45 +0100 Subject: [PATCH 051/208] Document the 'prefix' parameter. --- cssselect/xpath.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 8d8b1d3..e5e74b9 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -176,6 +176,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): :param css: A *group of selectors* as an Unicode string. + :param prefix: + This string is prepended to the XPath expression for each selector. + The default makes selectors scoped to the context node’s subtree. :raises: :class:`SelectorSyntaxError` on invalid selectors, :class:`ExpressionError` on unknown/unsupported selectors, @@ -195,6 +198,9 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::', :param selector: A parsed :class:`Selector` object. + :param prefix: + This string is prepended to the resulting XPath expression. + The default makes selectors scoped to the context node’s subtree. :param translate_pseudo_elements: Unless this is set to ``True`` (as :meth:`css_to_xpath` does), the :attr:`~Selector.pseudo_element` attribute of the selector From 2bec9474eca74c4e1ca62a5ba1ca3bf781eda482 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 17 Oct 2013 14:38:59 +0100 Subject: [PATCH 052/208] Changelog for 0.9.1 --- CHANGES | 27 ++++++++++++++++++++------- cssselect/__init__.py | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/CHANGES b/CHANGES index d8aadeb..edbbaca 100644 --- a/CHANGES +++ b/CHANGES @@ -1,14 +1,21 @@ Changelog ========= -Version 0.10 ------------- +Version 0.9.1 +------------- -Not released yet. +Released on 2013-10-17. -Drop official support for Python 2.4 and 3.1, as testing was becoming difficult. -Nothing will break overnight, but future releases may on may not work on these versions. -Older releases will remain available on PyPI. +* **Backward incompatible change from 0.9**: + :meth:`~GenericTranslator.selector_to_xpath` defaults to + ignoring pseudo-elements, + as it did in 0.8 and previous versions. + (:meth:`~GenericTranslator.css_to_xpath` doesn’t change.) +* Drop official support for Python 2.4 and 3.1, + as testing was becoming difficult. + Nothing will break overnight, + but future releases may on may not work on these versions. + Older releases will remain available on PyPI. Version 0.9 @@ -16,7 +23,13 @@ Version 0.9 Released on 2013-10-11. -Add parser support for :attr:`functional pseudo-elements `. +Add parser support for :attr:`functional +pseudo-elements `. + +*Update:* +This version accidentally introduced a **backward incompatible** change: +:meth:`~GenericTranslator.selector_to_xpath` defaults to +rejecting pseudo-elements instead of ignoring them. Version 0.8 diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 1d0438b..871f1b2 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.9' +VERSION = '0.9.1' __version__ = VERSION From 2db1cd30a2e8ff9e6f53963b2cc4f98a12ba3a2b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 17 Oct 2013 17:54:15 +0100 Subject: [PATCH 053/208] Fix #39: Selector.__repr__ with functional pseudo-elements --- cssselect/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index f2b32b7..8426ab0 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -85,7 +85,7 @@ def __init__(self, tree, pseudo_element=None): def __repr__(self): if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) - if self.pseudo_element: + elif self.pseudo_element: pseudo_element = '::%s' % self.pseudo_element else: pseudo_element = '' From fd5944a9490f50c94385d6c949fc1d6f39070d18 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 17 Oct 2013 17:58:28 +0100 Subject: [PATCH 054/208] Fix #40: broken reporting on selector syntax error --- cssselect/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 8426ab0..d71fdda 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -684,7 +684,7 @@ def tokenize(s): if end_pos == len_s: raise SelectorSyntaxError('Unclosed string at %s' % pos) if s[end_pos] != quote: - raise SelectorSyntaxError('Invalid string at %s' % next_pos) + raise SelectorSyntaxError('Invalid string at %s' % pos) value = _sub_simple_escape(_replace_simple, _sub_unicode_escape(_replace_unicode, _sub_newline_escape('', match.group()))) From 38e2edb4a2bebbce1972b24d9cabaedfa14459b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Thu, 17 Oct 2013 15:21:59 -0200 Subject: [PATCH 055/208] Improve selector_to_xpath tests on pseudo-elements --- cssselect/tests.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cssselect/tests.py b/cssselect/tests.py index 8b69740..a1fdc9e 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -196,6 +196,21 @@ def parse_one(css): ('Element[bar]', None), ('Element[baz]', 'after')] + # Special cases for CSS 2.1 pseudo-elements are ignored by default + for pseudo in ('after', 'before', 'first-line', 'first-letter'): + selector, = parse('e:%s' % pseudo) + assert selector.pseudo_element == pseudo + assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e" + + # Pseudo Elements are ignored by default, but if allowed they are not + # supported by GenericTranslator + tr = GenericTranslator() + selector, = parse('e::foo') + assert selector.pseudo_element == 'foo' + assert tr.selector_to_xpath(selector, prefix='') == "e" + self.assertRaises(ExpressionError, tr.selector_to_xpath, selector, + translate_pseudo_elements=True) + def test_specificity(self): def specificity(css): selectors = parse(css) @@ -380,11 +395,6 @@ def xpath(css): assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") - selector, = parse('e:after') - assert selector.pseudo_element == 'after' - # Pseudo-element is ignored: - assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e" - # Invalid characters in XPath element names assert xpath(r'di\a0 v') == ( u("*[name() = 'di v']")) # di\xa0v From 9c9aa138b874eec92808fbe4018322bd4c127367 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 25 Jul 2014 10:02:05 +0200 Subject: [PATCH 056/208] Remove Python 2.5 as Travis CI Py2.5 support ended See http://blog.travis-ci.com/2013-11-18-upcoming-build-environment-updates/ and https://github.com/travis-ci/travis-ci/issues/1668#issuecomment-29151484 Current supported versions: - 2.6 - 2.7 - 3.2 - 3.3 - 3.4 - pypy (Source: http://docs.travis-ci.com/user/ci-environment/#Python-VM-images) --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4c5fdf7..93ad08a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: python python: - - "2.5" - "2.6" - "2.7" - "3.2" From 4112258c92e594fc71f4749954aef6d868e30b24 Mon Sep 17 00:00:00 2001 From: James Salter Date: Mon, 26 Jan 2015 13:57:30 +0000 Subject: [PATCH 057/208] change coding: utf8 to utf-8 --- cssselect/__init__.py | 2 +- cssselect/parser.py | 2 +- cssselect/tests.py | 2 +- cssselect/xpath.py | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 871f1b2..544a058 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,4 +1,4 @@ -# coding: utf8 +# coding: utf-8 """ CSS Selectors based on XPath ============================ diff --git a/cssselect/parser.py b/cssselect/parser.py index d71fdda..1383c8c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,4 +1,4 @@ -# coding: utf8 +# coding: utf-8 """ cssselect.parser ================ diff --git a/cssselect/tests.py b/cssselect/tests.py index a1fdc9e..ec77c6d 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# coding: utf8 +# coding: utf-8 """ Tests for cssselect =================== diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e5e74b9..1f2bdd5 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,4 +1,4 @@ -# coding: utf8 +# coding: utf-8 """ cssselect.xpath =============== diff --git a/setup.py b/setup.py index bd1e385..42bde1f 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# coding: utf8 +# coding: utf-8 import re import os.path From 26e321fa1746f3ad91e1e578e6790566eed68e54 Mon Sep 17 00:00:00 2001 From: Nik Nyby Date: Fri, 19 Jun 2015 00:35:26 -0400 Subject: [PATCH 058/208] Add python 3.4 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 93ad08a..ba56d9a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ python: - "2.7" - "3.2" - "3.3" + - "3.4" install: - pip install --use-mirrors lxml -e . From 5aceab8a2d35e2df4f6277586bceab5cbc0edf16 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Fri, 2 Oct 2015 00:53:35 +0100 Subject: [PATCH 059/208] Support universal wheels --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index ccddf11..7a3317f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,3 +8,6 @@ upload-dir = docs/_build/html [pytest] python_files=tests.py + +[bdist_wheel] +universal = 1 From 4234fa7ffe55b323a68e82b41328e90028d45b5f Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 15:58:22 +0200 Subject: [PATCH 060/208] Amend source code encoding Following https://docs.python.org/3/tutorial/interpreter.html#source-code-encoding https://docs.python.org/2/tutorial/interpreter.html#source-code-encoding --- cssselect/__init__.py | 2 +- cssselect/parser.py | 2 +- cssselect/tests.py | 2 +- cssselect/xpath.py | 4 ++-- setup.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 544a058..d31e164 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- """ CSS Selectors based on XPath ============================ diff --git a/cssselect/parser.py b/cssselect/parser.py index 1383c8c..d155252 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- """ cssselect.parser ================ diff --git a/cssselect/tests.py b/cssselect/tests.py index ec77c6d..567e3c5 100755 --- a/cssselect/tests.py +++ b/cssselect/tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# coding: utf-8 +# -*- coding: utf-8 -*- """ Tests for cssselect =================== diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 1f2bdd5..7e34f7f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- """ cssselect.xpath =============== @@ -108,7 +108,7 @@ class GenericTranslator(object): of element names and attribute names. """ - + #### #### HERE BE DRAGONS #### diff --git a/setup.py b/setup.py index 42bde1f..208eef6 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- import re import os.path From 71e2bd767915faa44b39654c230ebd1c9aabc4fe Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 16:50:52 +0200 Subject: [PATCH 061/208] Add Python 3.5 env to Travis CI --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index ba56d9a..acb3eab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.2" - "3.3" - "3.4" + - "3.5" install: - pip install --use-mirrors lxml -e . From f47fcc111ffc44970a2ca3b9403f0b495b2bc3e6 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 17:27:12 +0200 Subject: [PATCH 062/208] Update setup.py and README with new links --- README.rst | 8 ++++---- cssselect/xpath.py | 4 ++-- docs/index.rst | 2 +- setup.py | 9 ++++++--- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index f523c7f..2e7964c 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,8 @@ to find the matching elements in an XML or HTML document. This module used to live inside of lxml as ``lxml.cssselect`` before it was extracted as a stand-alone project. -.. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/ -.. _XPath 1.0: http://www.w3.org/TR/xpath/ +.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/ +.. _XPath 1.0: https://www.w3.org/TR/xpath/ .. _lxml: http://lxml.de/ @@ -18,8 +18,8 @@ Quick facts: * Free software: BSD licensed * Compatible with Python 2.5+ and 3.2+ -* Latest documentation `on python.org `_ +* Latest documentation `on python.org `_ * Source, issues and pull requests `on Github - `_ + `_ * Releases `on PyPI `_ * Install with ``pip install cssselect`` diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 1f2bdd5..f387239 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -108,7 +108,7 @@ class GenericTranslator(object): of element names and attribute names. """ - + #### #### HERE BE DRAGONS #### @@ -117,7 +117,7 @@ class GenericTranslator(object): #### Until is has recieved a lot more work and review, #### I reserve the right to change this API in backward-incompatible ways #### with any minor version of cssselect. - #### See https://github.com/SimonSapin/cssselect/pull/22 + #### See https://github.com/scrapy/cssselect/pull/22 #### -- Simon Sapin. #### diff --git a/docs/index.rst b/docs/index.rst index 4ac7401..fe473f7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -139,7 +139,7 @@ and their signature. You can look at the `source code`_ to see how it works. However, be aware that this API is not very stable yet. It might change and break your sub-class. -.. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py +.. _source code: https://github.com/scrapy/cssselect/blob/master/cssselect/xpath.py Namespaces diff --git a/setup.py b/setup.py index 42bde1f..464d6f5 100644 --- a/setup.py +++ b/setup.py @@ -21,12 +21,12 @@ version=VERSION, author='Ian Bicking', author_email='ianb@colorstudy.com', - maintainer='Simon Sapin', - maintainer_email='simon.sapin@exyr.org', + maintainer='Paul Tremberth', + maintainer_email='paul.tremberth@gmail.com', description= 'cssselect parses CSS3 Selectors and translates them to XPath 1.0', long_description=README, - url='http://packages.python.org/cssselect/', + url='https://pythonhosted.org/cssselect/', license='BSD', packages=['cssselect'], classifiers=[ @@ -39,6 +39,9 @@ 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', ], **extra_kwargs ) From 279a361db001812a8339b543b715281312799805 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 17:35:55 +0200 Subject: [PATCH 063/208] Use bumpversion for versioning --- .bumpversion.cfg | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .bumpversion.cfg diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..a576861 --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,6 @@ +[bumpversion] +current_version = 0.9.1 +commit = True +tag = True + +[bumpversion:file:cssselect/__init__.py] From f4273b06ca9711a7e9e85f682fa756b73341b204 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 18:04:42 +0200 Subject: [PATCH 064/208] Update changelog for upcoming 0.9.2 release --- CHANGES | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES b/CHANGES index edbbaca..5ae9a39 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,17 @@ Changelog ========= +Version 0.9.2 +------------- + +Released on 2016-06-15. + +* Distribute as universal wheel. +* Add support for Python 3.3, 3.4 and 3.5. +* Drop support for Python 2.5 as testing is getting difficult. +* Improve tests on pseudo-elements. + + Version 0.9.1 ------------- From 46728304b93888edb672ad6bd05bccfb6b5f7124 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 18:05:34 +0200 Subject: [PATCH 065/208] Drop Python 2.5 support --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index fc40517..b4d0941 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,6 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', From 97e6a3a6b5a932a80de7456b4dca9ad36feabf43 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 18:10:33 +0200 Subject: [PATCH 066/208] Update authors list --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index bf826b9..70ca409 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,9 +1,12 @@ Daniel Graña Ian Bicking +James Salter Laurence Rowe Mikhail Korobov +Nik Nyby Paul Tremberth Simon Potter Simon Sapin Stefan Behnel +Thomas Grainger Varialus From e687f1eeb97316bbbbdac25cd8a7bf6dfe56700f Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 18:14:32 +0200 Subject: [PATCH 067/208] Update minimal Python 2.x version compatibility in README (now 2.6+) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2e7964c..155149d 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.5+ and 3.2+ +* Compatible with Python 2.6+ and 3.2+ * Latest documentation `on python.org `_ * Source, issues and pull requests `on Github `_ From 07fdcccf220f8003c4ef44b898e94ecb144d3528 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 18:26:47 +0200 Subject: [PATCH 068/208] =?UTF-8?q?Bump=20version:=200.9.1=20=E2=86=92=200?= =?UTF-8?q?.9.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 3 ++- cssselect/__init__.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a576861..a674e10 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,6 +1,7 @@ [bumpversion] -current_version = 0.9.1 +current_version = 0.9.2 commit = True tag = True [bumpversion:file:cssselect/__init__.py] + diff --git a/cssselect/__init__.py b/cssselect/__init__.py index d31e164..ed330ac 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.9.1' +VERSION = '0.9.2' __version__ = VERSION From af30afc98b928b381a2885e0567c019c130db9d8 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 15 Jun 2016 22:43:09 +0200 Subject: [PATCH 069/208] Use "classic" theme for Sphinx docs --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 22e6032..b2612d0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -95,7 +95,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -#html_theme = 'agogo' +html_theme = 'classic' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the From ebd6f42459e12532233d6c44c71ae6b36a582288 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Thu, 16 Jun 2016 13:16:42 +0200 Subject: [PATCH 070/208] Move tests file + add codecov on Travis --- .coveragerc | 1 + .travis.yml | 10 +++++++++- setup.cfg | 2 +- tests/__init__.py | 0 cssselect/tests.py => tests/test_cssselect.py | 0 tox.ini | 9 +++++++-- 6 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 tests/__init__.py rename cssselect/tests.py => tests/test_cssselect.py (100%) mode change 100755 => 100644 diff --git a/.coveragerc b/.coveragerc index 2ee5ff3..ed1fac6 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,6 @@ [run] branch = True +source = cssselect [report] exclude_lines = diff --git a/.travis.yml b/.travis.yml index acb3eab..8a4af19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,5 +10,13 @@ python: install: - pip install --use-mirrors lxml -e . + - pip install -U codecov pytest-cov + - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; + then pip uninstall -y coverage && pip install "coverage<4"; + fi -script: py.test +script: + py.test --cov-report term --cov=cssselect + +after_success: + codecov diff --git a/setup.cfg b/setup.cfg index 7a3317f..270daee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,7 @@ build-dir = docs/_build upload-dir = docs/_build/html [pytest] -python_files=tests.py +testpaths = tests [bdist_wheel] universal = 1 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cssselect/tests.py b/tests/test_cssselect.py old mode 100755 new mode 100644 similarity index 100% rename from cssselect/tests.py rename to tests/test_cssselect.py diff --git a/tox.ini b/tox.ini index ca053d8..a971384 100644 --- a/tox.ini +++ b/tox.ini @@ -2,8 +2,13 @@ envlist = py25,py26,py27,py32,py33 [testenv] -deps=lxml -commands = python cssselect/tests.py +deps= + lxml + pytest + pytest-cov + +commands = + py.test --cov-report term --cov=cssselect [testenv:py25] setenv = From 7c7d1a6786e3c2cb9012bb2abb1fee24a402a0bb Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Wed, 23 Jul 2014 14:32:00 +0200 Subject: [PATCH 071/208] Fix :nth-*(an+b) pseudo-classes selectors for negative a's Fix :nth-last-child(1)/:nth-last-of-type(1) translations (fixes #15) --- cssselect/xpath.py | 82 +++++++++++++++++++++++++++++++---------- tests/test_cssselect.py | 38 +++++++++++++------ 2 files changed, 89 insertions(+), 31 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index b0913ab..49e60ce 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -379,37 +379,81 @@ def xpath_nth_child_function(self, xpath, function, last=False, if add_name_test: xpath.add_name_test() xpath.add_star_prefix() - if a == 0: - if last: - b = 'last() - %s' % b - return xpath.add_condition('position() = %s' % b) + # non-last + # -------- + # position() = an+b + # -> position() - b = an + # + # if a < 0: + # position() - b <= 0 + # -> position() <= b + # + # last + # ---- + # last() - position() = an+b -1 + # -> last() - position() - b +1 = an + # + # if a < 0: + # last() - position() - b +1 <= 0 + # -> position() >= last() - b +1 + # + # -b +1 = -(b-1) if last: - # FIXME: I'm not sure if this is right - a = -a - b = -b + b = b - 1 if b > 0: b_neg = str(-b) else: b_neg = '+%s' % (-b) + if a == 0: + if last: + # http://www.w3.org/TR/selectors/#nth-last-child-pseudo + # The :nth-last-child(an+b) pseudo-class notation represents + # an element that has an+b-1 siblings after it in the document tree + # + # last() - position() = an+b-1 + # -> position() = last() -b +1 (for a==0) + # + if b == 0: + b = 'last()' + else: + b = 'last() %s' % b_neg + return xpath.add_condition('position() = %s' % b) if a != 1: - expr = ['(position() %s) mod %s = 0' % (b_neg, a)] + # last() - position() - b +1 = an + if last: + left = 'last() - position()' + # position() - b = an + else: + left = 'position()' + if b != 0: + left = '%s %s' % (left, b_neg) + if last or b != 0: + left = '(%s)' % left + expr = ['%s mod %s = 0' % (left, a)] else: expr = [] - if b >= 0: - expr.append('position() >= %s' % b) - elif b < 0 and last: - expr.append('position() < (last() %s)' % b) + if last: + if b == 0: + right = 'last()' + else: + right = 'last() %s' % b_neg + if a > 0: + expr.append('(position() <= %s)' % right) + else: + expr.append('(position() >= %s)' % right) + else: + # position() > 0 so if b < 0, then position() > b + # also, position() >= 1 always + if b > 1: + if a > 0: + expr.append('position() >= %s' % b) + else: + expr.append('position() <= %s' % b) + expr = ' and '.join(expr) if expr: xpath.add_condition(expr) return xpath - # FIXME: handle an+b, odd, even - # an+b means every-a, plus b, e.g., 2n+1 means odd - # 0n+b means b - # n+0 means a=1, i.e., all elements - # an means every a elements, i.e., 2n means even - # -n means -1n - # -1n+6 means elements 6 and previous def xpath_nth_last_child_function(self, xpath, function): return self.xpath_nth_child_function(xpath, function, last=True) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 567e3c5..97b9202 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -336,19 +336,30 @@ def xpath(css): "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") assert xpath('e:nth-child(1)') == ( "*/*[name() = 'e' and (position() = 1)]") + assert xpath('e:nth-child(3n+2)') == ( + "*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]") + assert xpath('e:nth-child(3n-2)') == ( + "*/*[name() = 'e' and ((position() +2) mod 3 = 0)]") + assert xpath('e:nth-child(-n+6)') == ( + "*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]") assert xpath('e:nth-last-child(1)') == ( - "*/*[name() = 'e' and (position() = last() - 1)]") + "*/*[name() = 'e' and (position() = last())]") + assert xpath('e:nth-last-child(2n)') == ( + "*/*[name() = 'e' and (" + "(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]") assert xpath('e:nth-last-child(2n+2)') == ( "*/*[name() = 'e' and (" - "(position() +2) mod -2 = 0 and position() < (last() -2))]") + "(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]") + # represents the two last e elements + assert xpath('e:nth-last-child(-n+2)') == ( + "*/*[name() = 'e' and (" + "(last() - position() -1) mod -1 = 0 and (position() >= last() -1))]") assert xpath('e:nth-of-type(1)') == ( "*/e[position() = 1]") assert xpath('e:nth-last-of-type(1)') == ( - "*/e[position() = last() - 1]") - assert xpath('e:nth-last-of-type(1)') == ( - "*/e[position() = last() - 1]") + "*/e[position() = last()]") assert xpath('div e:nth-last-of-type(1) .aclass') == ( - "div/descendant-or-self::*/e[position() = last() - 1]" + "div/descendant-or-self::*/e[position() = last()]" "/descendant-or-self::*/*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") assert xpath('e:first-child') == ( @@ -381,7 +392,7 @@ def xpath(css): assert xpath('e#myid') == ( "e[@id = 'myid']") assert xpath('e:not(:nth-child(odd))') == ( - "e[not((position() -1) mod 2 = 0 and position() >= 1)]") + "e[not((position() -1) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e f') == ( @@ -643,16 +654,19 @@ def pcss(main, *selectors, **kwargs): # FIXME: I'm not 100% sure this is right: assert pcss('li:nth-child(3n+1)') == [ 'first-li', 'fourth-li', 'seventh-li'] - assert pcss('li:nth-last-child(0)') == [ - 'seventh-li'] + assert pcss('li:nth-child(-n+3)') == [ + 'first-li', 'second-li', 'third-li'] + assert pcss('li:nth-child(-2n+4)') == ['second-li', 'fourth-li'] + assert pcss('li:nth-last-child(0)') == [] + assert pcss('li:nth-last-child(1)') == ['seventh-li'] assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [ 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-last-child(2n+2)') == ['second-li', 'fourth-li'] + assert pcss('li:nth-last-child(2n+2)') == [ + 'second-li', 'fourth-li', 'sixth-li'] assert pcss('ol:first-of-type') == ['first-ol'] assert pcss('ol:nth-child(1)') == [] assert pcss('ol:nth-of-type(2)') == ['second-ol'] - # FIXME: like above', '(1) or (2)? - assert pcss('ol:nth-last-of-type(1)') == ['first-ol'] + assert pcss('ol:nth-last-of-type(1)') == ['second-ol'] assert pcss('span:only-child') == ['foobar-span'] assert pcss('li div:only-child') == ['li-div'] assert pcss('div *:only-child') == ['li-div', 'foobar-span'] From fa02721c896301de4bfcfac4f705cf83f3425179 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Thu, 7 Jul 2016 23:49:26 +0200 Subject: [PATCH 072/208] Add more tests for *-last-*(an+1) --- tests/test_cssselect.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 97b9202..2203d02 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -347,9 +347,15 @@ def xpath(css): assert xpath('e:nth-last-child(2n)') == ( "*/*[name() = 'e' and (" "(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]") + assert xpath('e:nth-last-child(2n+1)') == ( + "*/*[name() = 'e' and (" + "(last() - position()) mod 2 = 0 and (position() <= last()))]") assert xpath('e:nth-last-child(2n+2)') == ( "*/*[name() = 'e' and (" "(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]") + assert xpath('e:nth-last-child(3n+1)') == ( + "*/*[name() = 'e' and (" + "(last() - position()) mod 3 = 0 and (position() <= last()))]") # represents the two last e elements assert xpath('e:nth-last-child(-n+2)') == ( "*/*[name() = 'e' and (" @@ -661,8 +667,12 @@ def pcss(main, *selectors, **kwargs): assert pcss('li:nth-last-child(1)') == ['seventh-li'] assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [ 'second-li', 'fourth-li', 'sixth-li'] + assert pcss('li:nth-last-child(2n+1)') == [ + 'first-li', 'third-li', 'fifth-li', 'seventh-li'] assert pcss('li:nth-last-child(2n+2)') == [ 'second-li', 'fourth-li', 'sixth-li'] + assert pcss('li:nth-last-child(3n+1)') == [ + 'first-li', 'fourth-li', 'seventh-li'] assert pcss('ol:first-of-type') == ['first-ol'] assert pcss('ol:nth-child(1)') == [] assert pcss('ol:nth-of-type(2)') == ['second-ol'] From cca952942b1149075fa386f67a7f928811cf73c9 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 8 Jul 2016 00:14:58 +0200 Subject: [PATCH 073/208] Remove FIXME comment --- tests/test_cssselect.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2203d02..2638ed6 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -657,7 +657,6 @@ def pcss(main, *selectors, **kwargs): assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [ 'first-li', 'third-li', 'fifth-li', 'seventh-li'] assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li'] - # FIXME: I'm not 100% sure this is right: assert pcss('li:nth-child(3n+1)') == [ 'first-li', 'fourth-li', 'seventh-li'] assert pcss('li:nth-child(-n+3)') == [ From f0e100c7bfd3ae93b29a6f1af7e4388a97ddf4f8 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 12 Jul 2016 13:23:11 +0200 Subject: [PATCH 074/208] Count siblings instead of using context position() --- cssselect/xpath.py | 143 +++++++++++++++++++++++----------------- tests/test_cssselect.py | 54 +++++++++------ 2 files changed, 118 insertions(+), 79 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 49e60ce..f6ef64c 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -376,79 +376,102 @@ def xpath_nth_child_function(self, xpath, function, last=False, a, b = parse_series(function.arguments) except ValueError: raise ExpressionError("Invalid series: '%r'" % function.arguments) + + # for the siblings count node-test, + # `add_name_test` boolean is inverted and somewhat counter-intuitive: + # + # nth_of_type() calls nth_child(add_name_test=False) if add_name_test: + nodetest = '*' xpath.add_name_test() + else: + nodetest = '%s' % xpath.element + xpath.add_star_prefix() - # non-last - # -------- - # position() = an+b - # -> position() - b = an + + # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # - # if a < 0: - # position() - b <= 0 - # -> position() <= b + # :nth-child(an+b) + # an+b-1 siblings before + # + # :nth-last-child(an+b) + # an+b-1 siblings after + # + # :nth-of-type(an+b) + # an+b-1 siblings with the same expanded element name before + # + # :nth-last-of-type(an+b) + # an+b-1 siblings with the same expanded element name after + # + # So, + # for :nth-child and :nth-of-type + # + # count(preceding-sibling::) = an+b-1 + # + # for :nth-last-child and :nth-last-of-type + # + # count(following-sibling::) = an+b-1 # - # last - # ---- - # last() - position() = an+b -1 - # -> last() - position() - b +1 = an + # therefore, + # count(...) - (b-1) ≡ 0 (mod a) + # + # if a == 0: + # ~~~~~~~~~~ + # count(...) = b-1 # # if a < 0: - # last() - position() - b +1 <= 0 - # -> position() >= last() - b +1 + # ~~~~~~~~~ + # count(...) - b +1 <= 0 + # -> count(...) <= b-1 # - # -b +1 = -(b-1) - if last: - b = b - 1 - if b > 0: - b_neg = str(-b) + # if a > 0: + # ~~~~~~~~~ + # count(...) - b +1 >= 0 + # -> count(...) >= b-1 + + # count siblings before or after the element + if not last: + siblings_count = 'count(preceding-sibling::%s)' % nodetest else: - b_neg = '+%s' % (-b) + siblings_count = 'count(following-sibling::%s)' % nodetest + + # work with b-1 instead + b = b - 1 + + # if a == 0: + # ~~~~~~~~~~ + # count(...) = b-1 if a == 0: - if last: - # http://www.w3.org/TR/selectors/#nth-last-child-pseudo - # The :nth-last-child(an+b) pseudo-class notation represents - # an element that has an+b-1 siblings after it in the document tree - # - # last() - position() = an+b-1 - # -> position() = last() -b +1 (for a==0) - # - if b == 0: - b = 'last()' - else: - b = 'last() %s' % b_neg - return xpath.add_condition('position() = %s' % b) - if a != 1: - # last() - position() - b +1 = an - if last: - left = 'last() - position()' - # position() - b = an - else: - left = 'position()' - if b != 0: - left = '%s %s' % (left, b_neg) - if last or b != 0: - left = '(%s)' % left - expr = ['%s mod %s = 0' % (left, a)] - else: + return xpath.add_condition('%s = %s' % (siblings_count, b)) + + # special case for operations modulo 1 + if abs(a) == 1: expr = [] - if last: - if b == 0: - right = 'last()' - else: - right = 'last() %s' % b_neg - if a > 0: - expr.append('(position() <= %s)' % right) - else: - expr.append('(position() >= %s)' % right) else: - # position() > 0 so if b < 0, then position() > b - # also, position() >= 1 always - if b > 1: - if a > 0: - expr.append('position() >= %s' % b) + # count(...) - (b-1) ≡ 0 (mod a) + left = siblings_count + b_neg = -b + + # this is to simplify things like "(... +3) % -3" + if a != 0: + b_neg = b_neg % abs(a) + + if b_neg != 0: + if b_neg < 0: + b_neg = str(b_neg) else: - expr.append('position() <= %s' % b) + b_neg = '+%s' % (b_neg) + left = '(%s %s)' % (left, b_neg) + + expr = ['%s mod %s = 0' % (left, a)] + + if a > 0: + # siblings count is always > 0 + # so the following predicate only matter for b > 0 + if b > 0: + expr.append('%s >= %s' % (siblings_count, b)) + else: + expr.append('%s <= %s' % (siblings_count, b)) expr = ' and '.join(expr) if expr: diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2638ed6..fd14934 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -335,37 +335,37 @@ def xpath(css): "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") assert xpath('e:nth-child(1)') == ( - "*/*[name() = 'e' and (position() = 1)]") + "*/*[name() = 'e' and (count(preceding-sibling::*) = 0)]") assert xpath('e:nth-child(3n+2)') == ( - "*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]") + "*/*[name() = 'e' and (" + "(count(preceding-sibling::*) +2) mod 3 = 0 and " + "count(preceding-sibling::*) >= 1)]") assert xpath('e:nth-child(3n-2)') == ( - "*/*[name() = 'e' and ((position() +2) mod 3 = 0)]") + "*/*[name() = 'e' and (" + "count(preceding-sibling::*) mod 3 = 0)]") assert xpath('e:nth-child(-n+6)') == ( - "*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]") + "*/*[name() = 'e' and (count(preceding-sibling::*) <= 5)]") assert xpath('e:nth-last-child(1)') == ( - "*/*[name() = 'e' and (position() = last())]") + "*/*[name() = 'e' and (count(following-sibling::*) = 0)]") assert xpath('e:nth-last-child(2n)') == ( - "*/*[name() = 'e' and (" - "(last() - position() +1) mod 2 = 0 and (position() <= last() +1))]") + "*/*[name() = 'e' and ((count(following-sibling::*) +1) mod 2 = 0)]") assert xpath('e:nth-last-child(2n+1)') == ( - "*/*[name() = 'e' and (" - "(last() - position()) mod 2 = 0 and (position() <= last()))]") + "*/*[name() = 'e' and (count(following-sibling::*) mod 2 = 0)]") assert xpath('e:nth-last-child(2n+2)') == ( "*/*[name() = 'e' and (" - "(last() - position() -1) mod 2 = 0 and (position() <= last() -1))]") + "(count(following-sibling::*) +1) mod 2 = 0 and " + "count(following-sibling::*) >= 1)]") assert xpath('e:nth-last-child(3n+1)') == ( - "*/*[name() = 'e' and (" - "(last() - position()) mod 3 = 0 and (position() <= last()))]") + "*/*[name() = 'e' and (count(following-sibling::*) mod 3 = 0)]") # represents the two last e elements assert xpath('e:nth-last-child(-n+2)') == ( - "*/*[name() = 'e' and (" - "(last() - position() -1) mod -1 = 0 and (position() >= last() -1))]") + "*/*[name() = 'e' and (count(following-sibling::*) <= 1)]") assert xpath('e:nth-of-type(1)') == ( - "*/e[position() = 1]") + "*/e[count(preceding-sibling::e) = 0]") assert xpath('e:nth-last-of-type(1)') == ( - "*/e[position() = last()]") + "*/e[count(following-sibling::e) = 0]") assert xpath('div e:nth-last-of-type(1) .aclass') == ( - "div/descendant-or-self::*/e[position() = last()]" + "div/descendant-or-self::*/e[count(following-sibling::e) = 0]" "/descendant-or-self::*/*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") assert xpath('e:first-child') == ( @@ -398,7 +398,7 @@ def xpath(css): assert xpath('e#myid') == ( "e[@id = 'myid']") assert xpath('e:not(:nth-child(odd))') == ( - "e[not((position() -1) mod 2 = 0)]") + "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e f') == ( @@ -409,6 +409,8 @@ def xpath(css): "e/following-sibling::*[name() = 'f' and (position() = 1)]") assert xpath('e ~ f') == ( "e/following-sibling::f") + assert xpath('e ~ f:nth-child(3)') == ( + "e/following-sibling::*[name() = 'f' and (count(preceding-sibling::*) = 2)]") assert xpath('div#container p') == ( "div[@id = 'container']/descendant-or-self::*/p") @@ -649,7 +651,8 @@ def pcss(main, *selectors, **kwargs): assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ 'second-li', 'li-div'] assert pcss(':lang("e")', html_only=True) == [] - assert pcss('li:nth-child(3)') == ['third-li'] + assert pcss('li:nth-child(3)', + '#first-li ~ :nth-child(3)') == ['third-li'] assert pcss('li:nth-child(10)') == [] assert pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)') == [ @@ -676,6 +679,17 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol:nth-child(1)') == [] assert pcss('ol:nth-of-type(2)') == ['second-ol'] assert pcss('ol:nth-last-of-type(1)') == ['second-ol'] + + # "+" and "~" tests + assert pcss('ol#first-ol li + li:nth-child(4)') == ['fourth-li'] + assert pcss('li + li:nth-child(1)') == [] + assert pcss('li ~ li:nth-child(2n+1)') == [ + 'third-li', 'fifth-li', 'seventh-li' + ] # all but the first + assert pcss('li ~ li:nth-last-child(2n+1)') == [ + 'third-li', 'fifth-li', 'seventh-li' + ] # all but the first + assert pcss('span:only-child') == ['foobar-span'] assert pcss('li div:only-child') == ['li-div'] assert pcss('div *:only-child') == ['li-div', 'foobar-span'] @@ -716,6 +730,8 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] + # Invalid characters in XPath element names, should not crash assert pcss(r'di\a0 v', r'div\[') == [] assert pcss(r'[h\a0 ref]', r'[h\]ref]') == [] From d86287dc211b5b75c549aa2febb0ad4ece0ead02 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 12 Jul 2016 16:21:06 +0200 Subject: [PATCH 075/208] Remove '*/'-prefix and use siblings counts for :only-* pseudo-classes --- cssselect/xpath.py | 25 ++++++------------- tests/test_cssselect.py | 55 +++++++++++++++++++---------------------- 2 files changed, 33 insertions(+), 47 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f6ef64c..ad6decb 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -351,7 +351,7 @@ def xpath_element(self, selector): def xpath_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left""" - return left.join('/descendant-or-self::*/', right) + return left.join('/descendant::', right) def xpath_child_combinator(self, left, right): """right is an immediate child of left""" @@ -383,12 +383,9 @@ def xpath_nth_child_function(self, xpath, function, last=False, # nth_of_type() calls nth_child(add_name_test=False) if add_name_test: nodetest = '*' - xpath.add_name_test() else: nodetest = '%s' % xpath.element - xpath.add_star_prefix() - # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # # :nth-child(an+b) @@ -522,39 +519,31 @@ def xpath_root_pseudo(self, xpath): return xpath.add_condition("not(parent::*)") def xpath_first_child_pseudo(self, xpath): - xpath.add_star_prefix() - xpath.add_name_test() - return xpath.add_condition('position() = 1') + return xpath.add_condition('count(preceding-sibling::*) = 0') def xpath_last_child_pseudo(self, xpath): - xpath.add_star_prefix() - xpath.add_name_test() - return xpath.add_condition('position() = last()') + return xpath.add_condition('count(following-sibling::*) = 0') def xpath_first_of_type_pseudo(self, xpath): if xpath.element == '*': raise ExpressionError( "*:first-of-type is not implemented") - xpath.add_star_prefix() - return xpath.add_condition('position() = 1') + return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element) def xpath_last_of_type_pseudo(self, xpath): if xpath.element == '*': raise ExpressionError( "*:last-of-type is not implemented") - xpath.add_star_prefix() - return xpath.add_condition('position() = last()') + return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element) def xpath_only_child_pseudo(self, xpath): - xpath.add_name_test() - xpath.add_star_prefix() - return xpath.add_condition('last() = 1') + return xpath.add_condition('count(parent::*/child::*) = 1') def xpath_only_of_type_pseudo(self, xpath): if xpath.element == '*': raise ExpressionError( "*:only-of-type is not implemented") - return xpath.add_condition('last() = 1') + return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element) def xpath_empty_pseudo(self, xpath): return xpath.add_condition("not(*) and not(string-length())") diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index fd14934..fe564c0 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -335,51 +335,48 @@ def xpath(css): "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") assert xpath('e:nth-child(1)') == ( - "*/*[name() = 'e' and (count(preceding-sibling::*) = 0)]") + "e[count(preceding-sibling::*) = 0]") assert xpath('e:nth-child(3n+2)') == ( - "*/*[name() = 'e' and (" - "(count(preceding-sibling::*) +2) mod 3 = 0 and " - "count(preceding-sibling::*) >= 1)]") + "e[(count(preceding-sibling::*) +2) mod 3 = 0 and " + "count(preceding-sibling::*) >= 1]") assert xpath('e:nth-child(3n-2)') == ( - "*/*[name() = 'e' and (" - "count(preceding-sibling::*) mod 3 = 0)]") + "e[count(preceding-sibling::*) mod 3 = 0]") assert xpath('e:nth-child(-n+6)') == ( - "*/*[name() = 'e' and (count(preceding-sibling::*) <= 5)]") + "e[count(preceding-sibling::*) <= 5]") assert xpath('e:nth-last-child(1)') == ( - "*/*[name() = 'e' and (count(following-sibling::*) = 0)]") + "e[count(following-sibling::*) = 0]") assert xpath('e:nth-last-child(2n)') == ( - "*/*[name() = 'e' and ((count(following-sibling::*) +1) mod 2 = 0)]") + "e[(count(following-sibling::*) +1) mod 2 = 0]") assert xpath('e:nth-last-child(2n+1)') == ( - "*/*[name() = 'e' and (count(following-sibling::*) mod 2 = 0)]") + "e[count(following-sibling::*) mod 2 = 0]") assert xpath('e:nth-last-child(2n+2)') == ( - "*/*[name() = 'e' and (" - "(count(following-sibling::*) +1) mod 2 = 0 and " - "count(following-sibling::*) >= 1)]") + "e[(count(following-sibling::*) +1) mod 2 = 0 and " + "count(following-sibling::*) >= 1]") assert xpath('e:nth-last-child(3n+1)') == ( - "*/*[name() = 'e' and (count(following-sibling::*) mod 3 = 0)]") + "e[count(following-sibling::*) mod 3 = 0]") # represents the two last e elements assert xpath('e:nth-last-child(-n+2)') == ( - "*/*[name() = 'e' and (count(following-sibling::*) <= 1)]") + "e[count(following-sibling::*) <= 1]") assert xpath('e:nth-of-type(1)') == ( - "*/e[count(preceding-sibling::e) = 0]") + "e[count(preceding-sibling::e) = 0]") assert xpath('e:nth-last-of-type(1)') == ( - "*/e[count(following-sibling::e) = 0]") + "e[count(following-sibling::e) = 0]") assert xpath('div e:nth-last-of-type(1) .aclass') == ( - "div/descendant-or-self::*/e[count(following-sibling::e) = 0]" - "/descendant-or-self::*/*[@class and contains(" + "div/descendant::e[count(following-sibling::e) = 0]" + "/descendant::*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") assert xpath('e:first-child') == ( - "*/*[name() = 'e' and (position() = 1)]") + "e[count(preceding-sibling::*) = 0]") assert xpath('e:last-child') == ( - "*/*[name() = 'e' and (position() = last())]") + "e[count(following-sibling::*) = 0]") assert xpath('e:first-of-type') == ( - "*/e[position() = 1]") + "e[count(preceding-sibling::e) = 0]") assert xpath('e:last-of-type') == ( - "*/e[position() = last()]") + "e[count(following-sibling::e) = 0]") assert xpath('e:only-child') == ( - "*/*[name() = 'e' and (last() = 1)]") + "e[count(parent::*/child::*) = 1]") assert xpath('e:only-of-type') == ( - "e[last() = 1]") + "e[count(parent::*/child::e) = 1]") assert xpath('e:empty') == ( "e[not(*) and not(string-length())]") assert xpath('e:EmPTY') == ( @@ -402,7 +399,7 @@ def xpath(css): assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e f') == ( - "e/descendant-or-self::*/f") + "e/descendant::f") assert xpath('e > f') == ( "e/f") assert xpath('e + f') == ( @@ -410,9 +407,9 @@ def xpath(css): assert xpath('e ~ f') == ( "e/following-sibling::f") assert xpath('e ~ f:nth-child(3)') == ( - "e/following-sibling::*[name() = 'f' and (count(preceding-sibling::*) = 2)]") + "e/following-sibling::f[count(preceding-sibling::*) = 2]") assert xpath('div#container p') == ( - "div[@id = 'container']/descendant-or-self::*/p") + "div[@id = 'container']/descendant::p") # Invalid characters in XPath element names assert xpath(r'di\a0 v') == ( @@ -538,7 +535,7 @@ def xpath(css): assert xpath('::text-node') == "descendant-or-self::*/text()" assert xpath('::attr-href') == "descendant-or-self::*/@href" assert xpath('p img::attr(src)') == ( - "descendant-or-self::p/descendant-or-self::*/img/@src") + "descendant-or-self::p/descendant::img/@src") def test_series(self): def series(css): From ae09a4c409ed2003273383aabaee53e1a8515015 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 12 Jul 2016 16:59:54 +0200 Subject: [PATCH 076/208] Simplify a/b if branches --- cssselect/xpath.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index ad6decb..908b226 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -441,23 +441,22 @@ def xpath_nth_child_function(self, xpath, function, last=False, if a == 0: return xpath.add_condition('%s = %s' % (siblings_count, b)) - # special case for operations modulo 1 + # operations modulo 1 or -1 are simpler, one only needs to verify: + # count(...) - (b-1) = 0, 1, 2, 3, etc., i.e. count(...) >= (b-1) + # or + # count(...) - (b-1) = 0, -1, -2, -3, etc., , i.e. count(...) <= (b-1) if abs(a) == 1: expr = [] else: # count(...) - (b-1) ≡ 0 (mod a) left = siblings_count - b_neg = -b - # this is to simplify things like "(... +3) % -3" - if a != 0: - b_neg = b_neg % abs(a) + # use modulo on 2nd term -(b-1) to simplify things like "(... +6) % -3", + # and also make it positive with |a| + b_neg = (-b) % abs(a) if b_neg != 0: - if b_neg < 0: - b_neg = str(b_neg) - else: - b_neg = '+%s' % (b_neg) + b_neg = '+%s' % (b_neg) left = '(%s %s)' % (left, b_neg) expr = ['%s mod %s = 0' % (left, a)] From 7fdcf083fa8632da8a7e54ee57a22a6cabae8e30 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 12 Jul 2016 17:27:38 +0200 Subject: [PATCH 077/208] Correct comment --- cssselect/xpath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 908b226..535b6d0 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -462,8 +462,8 @@ def xpath_nth_child_function(self, xpath, function, last=False, expr = ['%s mod %s = 0' % (left, a)] if a > 0: - # siblings count is always > 0 - # so the following predicate only matter for b > 0 + # siblings count is always >= 0, + # so the following predicate only matters for b > 0 if b > 0: expr.append('%s >= %s' % (siblings_count, b)) else: From f0c56955e11bb58d825f884a14008485ee37b998 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Sat, 16 Jul 2016 00:10:56 +0200 Subject: [PATCH 078/208] Rearrange a/b branches and add "early-exit" conditions --- cssselect/xpath.py | 94 ++++++++++++++++++++++++++--------------- tests/test_cssselect.py | 42 ++++++++++++++++-- 2 files changed, 97 insertions(+), 39 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 535b6d0..418413c 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -377,15 +377,6 @@ def xpath_nth_child_function(self, xpath, function, last=False, except ValueError: raise ExpressionError("Invalid series: '%r'" % function.arguments) - # for the siblings count node-test, - # `add_name_test` boolean is inverted and somewhat counter-intuitive: - # - # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: - nodetest = '*' - else: - nodetest = '%s' % xpath.element - # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # # :nth-child(an+b) @@ -426,52 +417,85 @@ def xpath_nth_child_function(self, xpath, function, last=False, # count(...) - b +1 >= 0 # -> count(...) >= b-1 + # work with b-1 instead + b_min_1 = b - 1 + + # early-exit condition 1: + # ~~~~~~~~~~~~~~~~~~~~~~~ + # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, + # and since n ∈ {0, 1, 2, ...}, if b-1<=0, + # there is always an "n" matching any number of siblings (maybe none) + if a == 1 and b_min_1 <=0: + return xpath + + # early-exit condition 2: + # ~~~~~~~~~~~~~~~~~~~~~~~ + # an+b-1 siblings with a<0 and (b-1)<0 is not possible + if a < 0 and b_min_1 < 0: + return xpath.add_condition('0') + + # `add_name_test` boolean is inverted and somewhat counter-intuitive: + # + # nth_of_type() calls nth_child(add_name_test=False) + if add_name_test: + nodetest = '*' + else: + nodetest = '%s' % xpath.element + # count siblings before or after the element if not last: siblings_count = 'count(preceding-sibling::%s)' % nodetest else: siblings_count = 'count(following-sibling::%s)' % nodetest - # work with b-1 instead - b = b - 1 - + # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ - # count(...) = b-1 + # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition('%s = %s' % (siblings_count, b)) + return xpath.add_condition('%s = %s' % (siblings_count, b_min_1)) - # operations modulo 1 or -1 are simpler, one only needs to verify: - # count(...) - (b-1) = 0, 1, 2, 3, etc., i.e. count(...) >= (b-1) - # or - # count(...) - (b-1) = 0, -1, -2, -3, etc., , i.e. count(...) <= (b-1) - if abs(a) == 1: - expr = [] + expr = [] + + if a > 0: + # siblings count, an+b-1, is always >= 0, + # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, + # therefore, the predicate is only interesting if (b-1)>0 + if b_min_1 > 0: + expr.append('%s >= %s' % (siblings_count, b_min_1)) else: - # count(...) - (b-1) ≡ 0 (mod a) + # if a<0, and (b-1)<0, no "n" satisfies this, + # this is tested above as an early exist condition + # otherwise, + expr.append('%s <= %s' % (siblings_count, b_min_1)) + + # operations modulo 1 or -1 are simpler, one only needs to verify: + # + # - either: + # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc., + # i.e. count(***-sibling::***) >= (b-1) + # + # - or: + # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc., + # i.e. count(***-sibling::***) <= (b-1) + # we we just did above. + # + if abs(a) != 1: + # count(***-sibling::***) - (b-1) ≡ 0 (mod a) left = siblings_count - # use modulo on 2nd term -(b-1) to simplify things like "(... +6) % -3", + # apply "modulo a" on 2nd term, -(b-1), + # to simplify things like "(... +6) % -3", # and also make it positive with |a| - b_neg = (-b) % abs(a) + b_neg = (-b_min_1) % abs(a) if b_neg != 0: b_neg = '+%s' % (b_neg) left = '(%s %s)' % (left, b_neg) - expr = ['%s mod %s = 0' % (left, a)] - - if a > 0: - # siblings count is always >= 0, - # so the following predicate only matters for b > 0 - if b > 0: - expr.append('%s >= %s' % (siblings_count, b)) - else: - expr.append('%s <= %s' % (siblings_count, b)) + expr.append('%s mod %s = 0' % (left, a)) - expr = ' and '.join(expr) - if expr: - xpath.add_condition(expr) + xpath.add_condition(' and '.join(expr)) return xpath def xpath_nth_last_child_function(self, xpath, function): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index fe564c0..cdd8d8b 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -334,15 +334,37 @@ def xpath(css): assert xpath('e[hreflang|="en"]') == ( "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") + + # --- nth-* and nth-last-* ------------------------------------- assert xpath('e:nth-child(1)') == ( "e[count(preceding-sibling::*) = 0]") + + # always true + assert xpath('e:nth-child(n)') == ( + "e") + assert xpath('e:nth-child(n+1)') == ( + "e") + # always true too + assert xpath('e:nth-child(n-10)') == ( + "e") + # b=2 is the limit... + assert xpath('e:nth-child(n+2)') == ( + "e[count(preceding-sibling::*) >= 1]") + # always false + assert xpath('e:nth-child(-n)') == ( + "e[0]") + # equivalent to first child + assert xpath('e:nth-child(-n+1)') == ( + "e[count(preceding-sibling::*) <= 0]") + assert xpath('e:nth-child(3n+2)') == ( - "e[(count(preceding-sibling::*) +2) mod 3 = 0 and " - "count(preceding-sibling::*) >= 1]") + "e[count(preceding-sibling::*) >= 1 and " + "(count(preceding-sibling::*) +2) mod 3 = 0]") assert xpath('e:nth-child(3n-2)') == ( "e[count(preceding-sibling::*) mod 3 = 0]") assert xpath('e:nth-child(-n+6)') == ( "e[count(preceding-sibling::*) <= 5]") + assert xpath('e:nth-last-child(1)') == ( "e[count(following-sibling::*) = 0]") assert xpath('e:nth-last-child(2n)') == ( @@ -350,13 +372,14 @@ def xpath(css): assert xpath('e:nth-last-child(2n+1)') == ( "e[count(following-sibling::*) mod 2 = 0]") assert xpath('e:nth-last-child(2n+2)') == ( - "e[(count(following-sibling::*) +1) mod 2 = 0 and " - "count(following-sibling::*) >= 1]") + "e[count(following-sibling::*) >= 1 and " + "(count(following-sibling::*) +1) mod 2 = 0]") assert xpath('e:nth-last-child(3n+1)') == ( "e[count(following-sibling::*) mod 3 = 0]") # represents the two last e elements assert xpath('e:nth-last-child(-n+2)') == ( "e[count(following-sibling::*) <= 1]") + assert xpath('e:nth-of-type(1)') == ( "e[count(preceding-sibling::e) = 0]") assert xpath('e:nth-last-of-type(1)') == ( @@ -365,6 +388,7 @@ def xpath(css): "div/descendant::e[count(following-sibling::e) = 0]" "/descendant::*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") + assert xpath('e:first-child') == ( "e[count(preceding-sibling::*) = 0]") assert xpath('e:last-child') == ( @@ -648,6 +672,16 @@ def pcss(main, *selectors, **kwargs): assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ 'second-li', 'li-div'] assert pcss(':lang("e")', html_only=True) == [] + + # --- nth-* and nth-last-* ------------------------------------- + + # select nothing + assert pcss('li:nth-child(-n)') == [] + # select all children + assert pcss('li:nth-child(n)') == [ + 'first-li', 'second-li', 'third-li', 'fourth-li', + 'fifth-li', 'sixth-li', 'seventh-li'] + assert pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)') == ['third-li'] assert pcss('li:nth-child(10)') == [] From 10dbd58073327554599cfde453a97d8b4ba2b60f Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Thu, 8 Sep 2016 19:34:38 +0500 Subject: [PATCH 079/208] TST don't use unsupported pip option --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8a4af19..e666cf7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: - "3.5" install: - - pip install --use-mirrors lxml -e . + - pip install lxml -e . - pip install -U codecov pytest-cov - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip uninstall -y coverage && pip install "coverage<4"; From b5d095316acd59d0364f5d8b6cd3c997bedecf36 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 9 Sep 2016 11:16:47 +0200 Subject: [PATCH 080/208] Force py.test version before 3.0 py.test 3.0 dropped support for Python 3.2 https://github.com/pytest-dev/pytest/issues/1627 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a971384..7a3359a 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = py25,py26,py27,py32,py33 [testenv] deps= lxml - pytest + pytest<3 pytest-cov commands = From 16ce549f59d893fd07382e48640e957639459a52 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 9 Sep 2016 11:42:47 +0200 Subject: [PATCH 081/208] Travis: uninstall pytest before forcing version < 3 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e666cf7..a89d5b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ install: - pip install lxml -e . - pip install -U codecov pytest-cov - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; - then pip uninstall -y coverage && pip install "coverage<4"; + then pip uninstall -y coverage pytest && pip install "coverage<4" && pip install "pytest<3"; fi script: From e84f1b50803522a528314d8a39fdf5c728d0e004 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Mon, 12 Sep 2016 18:17:12 +0200 Subject: [PATCH 082/208] Revert descendant combinator change --- cssselect/xpath.py | 2 +- tests/test_cssselect.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 418413c..698748a 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -351,7 +351,7 @@ def xpath_element(self, selector): def xpath_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left""" - return left.join('/descendant::', right) + return left.join('/descendant-or-self::*/', right) def xpath_child_combinator(self, left, right): """right is an immediate child of left""" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index cdd8d8b..4a0bd39 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -385,8 +385,8 @@ def xpath(css): assert xpath('e:nth-last-of-type(1)') == ( "e[count(following-sibling::e) = 0]") assert xpath('div e:nth-last-of-type(1) .aclass') == ( - "div/descendant::e[count(following-sibling::e) = 0]" - "/descendant::*[@class and contains(" + "div/descendant-or-self::*/e[count(following-sibling::e) = 0]" + "/descendant-or-self::*/*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' aclass ')]") assert xpath('e:first-child') == ( @@ -423,7 +423,7 @@ def xpath(css): assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e f') == ( - "e/descendant::f") + "e/descendant-or-self::*/f") assert xpath('e > f') == ( "e/f") assert xpath('e + f') == ( @@ -433,7 +433,7 @@ def xpath(css): assert xpath('e ~ f:nth-child(3)') == ( "e/following-sibling::f[count(preceding-sibling::*) = 2]") assert xpath('div#container p') == ( - "div[@id = 'container']/descendant::p") + "div[@id = 'container']/descendant-or-self::*/p") # Invalid characters in XPath element names assert xpath(r'di\a0 v') == ( @@ -559,7 +559,7 @@ def xpath(css): assert xpath('::text-node') == "descendant-or-self::*/text()" assert xpath('::attr-href') == "descendant-or-self::*/@href" assert xpath('p img::attr(src)') == ( - "descendant-or-self::p/descendant::img/@src") + "descendant-or-self::p/descendant-or-self::*/img/@src") def test_series(self): def series(css): From 9a1a071e0cb30193d578087d68887cff097ad1cf Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Mon, 17 Oct 2016 12:00:32 +0200 Subject: [PATCH 083/208] Update changelog for upcoming 1.0 release --- CHANGES | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES b/CHANGES index 5ae9a39..aac466c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,16 @@ Changelog ========= +Version 1.0.0 +------------- + +Released on YYYY-MM-DD. + +* Add code coverage reports. +* Fix ``:nth-*(an+b)`` pseudo-classes selectors. + (except ``*:nth-child()`` which looks untranslatable to XPath 1.0.) + + Version 0.9.2 ------------- From e1b501c02289fecdb1a4f17498161a49d11d1871 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 21 Oct 2016 12:35:15 +0200 Subject: [PATCH 084/208] Set date for 1.0 release --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index aac466c..94abe77 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 1.0.0 ------------- -Released on YYYY-MM-DD. +Released on 2016-10-21. * Add code coverage reports. * Fix ``:nth-*(an+b)`` pseudo-classes selectors. From 4d59c719b2eca9062dd4deccb1e985c9c182fb37 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 21 Oct 2016 12:48:44 +0200 Subject: [PATCH 085/208] =?UTF-8?q?Bump=20version:=200.9.2=20=E2=86=92=201?= =?UTF-8?q?.0.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cssselect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a674e10..426ea28 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.9.2 +current_version = 1.0.0 commit = True tag = True diff --git a/cssselect/__init__.py b/cssselect/__init__.py index ed330ac..f46a0e4 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '0.9.2' +VERSION = '1.0.0' __version__ = VERSION From 9c2cdb3c601eed13cc2118a0aff8c0b83b9991d7 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 9 Sep 2016 10:53:29 +0200 Subject: [PATCH 086/208] Travis: remove py32 build after py.test>=3.0 dropped support for it See http://doc.pytest.org/en/latest/changelog.html and https://github.com/pytest-dev/pytest/pull/1627 --- .travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index a89d5b3..bf21f78 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ language: python python: - "2.6" - "2.7" - - "3.2" - "3.3" - "3.4" - "3.5" @@ -11,9 +10,6 @@ python: install: - pip install lxml -e . - pip install -U codecov pytest-cov - - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; - then pip uninstall -y coverage pytest && pip install "coverage<4" && pip install "pytest<3"; - fi script: py.test --cov-report term --cov=cssselect From f01843dcedc00804f437b7dfaff5ccae34abfa5c Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 9 Sep 2016 11:06:39 +0200 Subject: [PATCH 087/208] Remove Py3.2 classifier from setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index b4d0941..5d5ec02 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', From ecbe1a86f920d83bc76966b5538ff5875ff34b3d Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 6 Jan 2017 18:17:27 +0100 Subject: [PATCH 088/208] Add Python 3.6 build on Travis CI --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a89d5b3..a1fcdb1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" install: - pip install lxml -e . From a448648cf7bf69085be53f23db0cbf9ee9dce96c Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 6 Jan 2017 18:33:31 +0100 Subject: [PATCH 089/208] Add Py3.6 classifier to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5d5ec02..6651017 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,7 @@ 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], **extra_kwargs ) From 8bfdcc65319537b74467e6d7cc2ce616edd77425 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 6 Jan 2017 18:34:07 +0100 Subject: [PATCH 090/208] Update README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 155149d..9ccfc4d 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.6+ and 3.2+ +* Compatible with Python 2.6+ and 3.3+ * Latest documentation `on python.org `_ * Source, issues and pull requests `on Github `_ From df496e7be1924e27df469d7a1d77525ce501ffa8 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 15:02:15 +0100 Subject: [PATCH 091/208] Move docs to ReadTheDocs --- README.rst | 2 +- docs/conf.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 9ccfc4d..587c2d7 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ Quick facts: * Free software: BSD licensed * Compatible with Python 2.6+ and 3.3+ -* Latest documentation `on python.org `_ +* Latest documentation `on Read the Docs `_ * Source, issues and pull requests `on Github `_ * Releases `on PyPI `_ diff --git a/docs/conf.py b/docs/conf.py index b2612d0..aa897ef 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,7 +43,7 @@ # General information about the project. project = 'cssselect' -copyright = '2012, Simon Sapin' +copyright = '2012-2017, Simon Sapin, Scrapy developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/setup.py b/setup.py index 6651017..199ffc7 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ description= 'cssselect parses CSS3 Selectors and translates them to XPath 1.0', long_description=README, - url='https://pythonhosted.org/cssselect/', + url='https://github.com/scrapy/cssselect', license='BSD', packages=['cssselect'], classifiers=[ From 3987b7c957edc105fde0b4c022a50bd060be6afe Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 15:40:39 +0100 Subject: [PATCH 092/208] Add automatic PyPI deploy to Travis CI config --- .travis.yml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index e9dadc2..ca91911 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,11 @@ language: python - python: - - "2.6" - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" + - '2.6' + - '2.7' + - '3.3' + - '3.4' + - '3.5' + - '3.6' install: - pip install lxml -e . @@ -17,3 +16,13 @@ script: after_success: codecov + +deploy: + provider: pypi + distributions: sdist bdist_wheel + user: redapple + password: + secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU= + on: + tags: true + repo: scrapy/cssselect From 5824741722f841dcf2ffe5818d0b426312e857bb Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 15:51:31 +0100 Subject: [PATCH 093/208] Only deploy from Python 3.6 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index ca91911..5ddb1fd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,3 +26,4 @@ deploy: on: tags: true repo: scrapy/cssselect + condition: "$TRAVIS_PYTHON_VERSION == '3.6'" From 18d38aefc0334918eb8fa2b896009478a7175859 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 16:01:41 +0100 Subject: [PATCH 094/208] Update pytest section in setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 270daee..b8c93b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ build-dir = docs/_build [upload_sphinx] # Sphinx-PyPI-upload upload-dir = docs/_build/html -[pytest] +[tool:pytest] testpaths = tests [bdist_wheel] From ed1a15d32e97b6ec11cbeffdeeaf44be8b3e35dc Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 15:32:09 +0100 Subject: [PATCH 095/208] Update changelog for 1.0.1 --- CHANGES | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES b/CHANGES index 94abe77..9238537 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,14 @@ Changelog ========= +Version 1.0.1 +------------- + +Released on 2017-01-XX. + +* Add support for Python 3.6. +* Documentation hosted `on Read the Docs `_ + Version 1.0.0 ------------- From fee89dfe7453b58b231c2fd1d37621ac30f2450d Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 16:05:40 +0100 Subject: [PATCH 096/208] Set release date for version 1.0.1 --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 9238537..92b0371 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 1.0.1 ------------- -Released on 2017-01-XX. +Released on 2017-01-10. * Add support for Python 3.6. * Documentation hosted `on Read the Docs `_ From 7b40f4e59fa7fa9da0fcae29874a9a3a5e120509 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 16:13:30 +0100 Subject: [PATCH 097/208] =?UTF-8?q?Bump=20version:=201.0.0=20=E2=86=92=201?= =?UTF-8?q?.0.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cssselect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 426ea28..92c7bcb 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.0 +current_version = 1.0.1 commit = True tag = True diff --git a/cssselect/__init__.py b/cssselect/__init__.py index f46a0e4..3b06261 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.0.0' +VERSION = '1.0.1' __version__ = VERSION From 386afc5f001d2c4d6742ac378db25238db8cd671 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Tue, 10 Jan 2017 16:33:54 +0100 Subject: [PATCH 098/208] Do not upload docs when deploying to PyPI from Travis CI --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 5ddb1fd..cc709f1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ after_success: deploy: provider: pypi distributions: sdist bdist_wheel + skip_upload_docs: true user: redapple password: secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU= From a0e47cca81cc079d26e504d51989c12a5e79d1b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Thu, 2 Mar 2017 11:41:43 +0200 Subject: [PATCH 099/208] Spelling fixes --- cssselect/xpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 698748a..d0eb2cb 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -114,7 +114,7 @@ class GenericTranslator(object): #### #### You are welcome to hook into this to change some behavior, #### but do so at your own risks. - #### Until is has recieved a lot more work and review, + #### Until it has received a lot more work and review, #### I reserve the right to change this API in backward-incompatible ways #### with any minor version of cssselect. #### See https://github.com/scrapy/cssselect/pull/22 From 8b3aa08f2e7e133f2a7df4c6941d843165720ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Thu, 2 Mar 2017 11:43:06 +0200 Subject: [PATCH 100/208] Python 3.6 invalid escape sequence deprecation fix https://docs.python.org/3/whatsnew/3.6.html#deprecated-python-behavior --- cssselect/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d155252..fe5f53c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -617,7 +617,7 @@ def _compile(pattern): return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match _match_whitespace = _compile(r'[ \t\r\n\f]+') -_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') +_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') _match_hash = _compile('#(?:%(nmchar)s)+') _match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') _match_string_by_quote = { From daca23b513f755298b166b328e97c0d830a9fcf1 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 14 Dec 2017 17:13:19 +0200 Subject: [PATCH 101/208] Ignore IDE metadata --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4c89f4c..b0ab86a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /dist /docs/_build /.coverage +.idea \ No newline at end of file From 83014a796af107b6eb934e085011ecdf85dc4c42 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 14 Dec 2017 17:17:49 +0200 Subject: [PATCH 102/208] Drop support for EOL Python 2.6 --- .travis.yml | 1 - README.rst | 4 ++-- cssselect/parser.py | 9 +-------- setup.py | 2 +- tox.ini | 6 +----- 5 files changed, 5 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index cc709f1..61edf5a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: python python: - - '2.6' - '2.7' - '3.3' - '3.4' diff --git a/README.rst b/README.rst index 587c2d7..972b06b 100644 --- a/README.rst +++ b/README.rst @@ -17,9 +17,9 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.6+ and 3.3+ +* Compatible with Python 2.7 and 3.3+ * Latest documentation `on Read the Docs `_ -* Source, issues and pull requests `on Github +* Source, issues and pull requests `on GitHub `_ * Releases `on PyPI `_ * Install with ``pip install cssselect`` diff --git a/cssselect/parser.py b/cssselect/parser.py index fe5f53c..dd4709a 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -358,8 +358,6 @@ def parse(css): # message = "%s at %s -> %r" % ( # e, stream.used, stream.peek()) # e.msg = message -# if sys.version_info < (2,6): -# e.message = message # e.args = tuple([message]) # raise @@ -630,12 +628,7 @@ def _compile(pattern): _sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub # Same as r'\1', but faster on CPython -if hasattr(operator, 'methodcaller'): - # Python 2.6+ - _replace_simple = operator.methodcaller('group', 1) -else: - def _replace_simple(match): - return match.group(1) +_replace_simple = operator.methodcaller('group', 1) def _replace_unicode(match): codepoint = int(match.group(1), 16) diff --git a/setup.py b/setup.py index 199ffc7..032aa89 100644 --- a/setup.py +++ b/setup.py @@ -29,12 +29,12 @@ url='https://github.com/scrapy/cssselect', license='BSD', packages=['cssselect'], + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', diff --git a/tox.ini b/tox.ini index 7a3359a..a019f4e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py25,py26,py27,py32,py33 +envlist = py27,py33,py34,py35,py36 [testenv] deps= @@ -9,7 +9,3 @@ deps= commands = py.test --cov-report term --cov=cssselect - -[testenv:py25] -setenv = - PIP_INSECURE = 1 From c040d86c5458547bbbf80c5fd4aa9ce771f85234 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 14 Dec 2017 17:19:16 +0200 Subject: [PATCH 103/208] Use 'is' to compare with None --- tests/test_cssselect.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 4a0bd39..f01aa7f 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -288,12 +288,12 @@ def get_error(css): "Expected string or ident, got ") assert get_error('[href]a') == ( "Expected selector, got ") - assert get_error('[rel=stylesheet]') == None + assert get_error('[rel=stylesheet]') is None assert get_error('[rel:stylesheet]') == ( "Operator expected, got ") assert get_error('[rel=stylesheet') == ( "Expected ']', got ") - assert get_error(':lang(fr)') == None + assert get_error(':lang(fr)') is None assert get_error(':lang(fr') == ( "Expected an argument, got ") assert get_error(':contains("foo') == ( @@ -586,8 +586,8 @@ def series(css): assert series('+n') == (1, 0) assert series('-n') == (-1, 0) assert series('5') == (0, 5) - assert series('foo') == None - assert series('n+') == None + assert series('foo') is None + assert series('n+') is None def test_lang(self): document = etree.fromstring(XMLLANG_IDS) From 1060ca1f3f1746caad8673f0c99299a389f1bc7c Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 14 Dec 2017 17:20:08 +0200 Subject: [PATCH 104/208] Remove redundant parentheses --- cssselect/parser.py | 10 +++++----- cssselect/xpath.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index dd4709a..9bb039c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -552,14 +552,14 @@ def parse_series(tokens): raise ValueError('String tokens not allowed in series.') s = ''.join(token.value for token in tokens).strip() if s == 'odd': - return (2, 1) + return 2, 1 elif s == 'even': - return (2, 0) + return 2, 0 elif s == 'n': - return (1, 0) + return 1, 0 if 'n' not in s: # Just b - return (0, int(s)) + return 0, int(s) a, b = s.split('n', 1) if not a: a = 1 @@ -571,7 +571,7 @@ def parse_series(tokens): b = 0 else: b = int(b) - return (a, b) + return a, b #### Token objects diff --git a/cssselect/xpath.py b/cssselect/xpath.py index d0eb2cb..22cd029 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -490,7 +490,7 @@ def xpath_nth_child_function(self, xpath, function, last=False, b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg = '+%s' % (b_neg) + b_neg = '+%s' % b_neg left = '(%s %s)' % (left, b_neg) expr.append('%s mod %s = 0' % (left, a)) From 6a53f24f3d3118d7e0ae86b2ed7521d6370608d4 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 22 Dec 2017 10:55:55 +0200 Subject: [PATCH 105/208] Drop support for EOL Python 3.3 --- .travis.yml | 1 - README.rst | 2 +- setup.py | 3 +-- tox.ini | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 61edf5a..d86d0a8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: python python: - '2.7' - - '3.3' - '3.4' - '3.5' - '3.6' diff --git a/README.rst b/README.rst index 972b06b..c19c6b3 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.7 and 3.3+ +* Compatible with Python 2.7 and 3.4+ * Latest documentation `on Read the Docs `_ * Source, issues and pull requests `on GitHub `_ diff --git a/setup.py b/setup.py index 032aa89..243927d 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ url='https://github.com/scrapy/cssselect', license='BSD', packages=['cssselect'], - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*', + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -37,7 +37,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', diff --git a/tox.ini b/tox.ini index a019f4e..194490a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py33,py34,py35,py36 +envlist = py27, py34, py35, py36 [testenv] deps= From d12b1418624faf166fdeb9db31ee95430d3c37c5 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Tue, 26 Dec 2017 17:37:14 +0500 Subject: [PATCH 106/208] badges in README --- README.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.rst b/README.rst index c19c6b3..9bcd648 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,22 @@ cssselect: CSS Selectors for Python =================================== +.. image:: https://img.shields.io/pypi/v/cssselect.svg + :target: https://pypi.python.org/pypi/cssselect + :alt: PyPI Version + +.. image:: https://img.shields.io/pypi/pyversions/cssselect.svg + :target: https://pypi.python.org/pypi/cssselect + :alt: Supported Python Versions + +.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg + :target: https://travis-ci.org/scrapy/cssselect + :alt: Build Status + +.. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg + :target: https://codecov.io/github/scrapy/cssselect?branch=master + :alt: Coverage report + *cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_ expressions. Such expressions can be used in lxml_ or another XPath engine to find the matching elements in an XML or HTML document. From 73344698e95ce31433fad643598365f954488722 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Tue, 26 Dec 2017 17:42:16 +0500 Subject: [PATCH 107/208] DOC changelog --- CHANGES | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGES b/CHANGES index 92b0371..d8b27b6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,15 @@ Changelog ========= +Version 1.0.2 +------------- + +Released on 2017-12-26. + +* Drop support for Python 2.6 and Python 3.3. +* Fix deprecation warning in Python 3.6. +* Minor cleanups. + Version 1.0.1 ------------- From c42886850a86565a3eda081ecb9eaffdfddb29e8 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Tue, 26 Dec 2017 17:44:21 +0500 Subject: [PATCH 108/208] =?UTF-8?q?Bump=20version:=201.0.1=20=E2=86=92=201?= =?UTF-8?q?.0.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cssselect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 92c7bcb..e21dbfb 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.1 +current_version = 1.0.2 commit = True tag = True diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 3b06261..9180b72 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.0.1' +VERSION = '1.0.2' __version__ = VERSION From 2e1234db300f4ad7f2372f15933da4f5a084b788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Wed, 27 Dec 2017 12:13:36 -0300 Subject: [PATCH 109/208] Update travis->pypi credentials --- .travis.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index d86d0a8..b76297f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,27 +1,23 @@ language: python python: - - '2.7' - - '3.4' - - '3.5' - - '3.6' - +- '2.7' +- '3.4' +- '3.5' +- '3.6' install: - - pip install lxml -e . - - pip install -U codecov pytest-cov - +- pip install lxml -e . +- pip install -U codecov pytest-cov script: - py.test --cov-report term --cov=cssselect - +- py.test --cov-report term --cov=cssselect after_success: - codecov - +- codecov deploy: provider: pypi distributions: sdist bdist_wheel skip_upload_docs: true - user: redapple + user: scrapy password: - secure: T1PBD+ocIGwHMbBHPqzu7UZxpkB0w98KtEIkNzLXNQcF7JpjugZNwz4xX2xVhi8yvUQ257VtLSKpIOT2FWxrfLrgTZKbTd6Q7V5Lf3HKzLomOKUKMAd54gsOuismE27CT/SHbexskACgwVwkyG9Y3dlG6m/ZBgqoPAGaJrScjEU= + secure: UjCXD1ZfqgFcCs4ciPMJDaOQefV3ZOKZ8/dTZxcoaQlE1lr6CkaN6CfTdD50SX2M9uCNWvEcYnvs6U4SizgZ27MYzFWuHonED2alHKy4AtrxCEHD/+lGo9d18cNjLMPDZateX/lITjGiZ4rmYZNuA6wmA4P/bTmdazbSufcmMqY= on: tags: true repo: scrapy/cssselect From 720126ae39316dd21a4e03e56ccc0ba2c6a0fb24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Wed, 27 Dec 2017 12:39:26 -0300 Subject: [PATCH 110/208] Update changelog for 1.0.3 release --- CHANGES | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES b/CHANGES index d8b27b6..0a0e137 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,13 @@ Changelog ========= +Version 1.0.3 +------------- + +Released on 2017-12-27. + +* Fix artifact uploads to pypi + Version 1.0.2 ------------- From cb7a7e21de1ba9347d58a6a14b7c78b3de1f49ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Wed, 27 Dec 2017 12:39:30 -0300 Subject: [PATCH 111/208] =?UTF-8?q?Bump=20version:=201.0.2=20=E2=86=92=201?= =?UTF-8?q?.0.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cssselect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e21dbfb..acb5a66 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.2 +current_version = 1.0.3 commit = True tag = True diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 9180b72..e9f9ce1 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.0.2' +VERSION = '1.0.3' __version__ = VERSION From 4e90061eea44515c7c1c9e48c2b67a3a8489a692 Mon Sep 17 00:00:00 2001 From: Arthur Darcet Date: Tue, 7 Mar 2017 15:29:46 +0100 Subject: [PATCH 112/208] add a method on the Selector class, to export back the selector to css --- cssselect/parser.py | 69 ++++++++++++++++++++++++++++++++++++++--- cssselect/xpath.py | 8 +++-- tests/test_cssselect.py | 31 ++++++++++++++++++ 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9bb039c..53a76bc 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -76,7 +76,7 @@ def __init__(self, tree, pseudo_element=None): #: +-------------------------+----------------+--------------------------------+ #: | Invalid pseudo-class | ``li:marker`` | ``None`` | #: +-------------------------+----------------+--------------------------------+ - #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | + #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | #: +-------------------------+----------------+--------------------------------+ #: #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement @@ -92,6 +92,20 @@ def __repr__(self): return '%s[%r%s]' % ( self.__class__.__name__, self.parsed_tree, pseudo_element) + def css(self): + """Return a CSS representation for this selector (a string) + """ + if isinstance(self.pseudo_element, FunctionalPseudoElement): + pseudo_element = '::%s' % self.pseudo_element.css() + elif self.pseudo_element: + pseudo_element = '::%s' % self.pseudo_element + else: + pseudo_element = '' + res = '%s%s' % (self.parsed_tree.css(), pseudo_element) + if len(res) > 1: + res = res.lstrip('*') + return res + def specificity(self): """Return the specificity_ of this selector as a tuple of 3 integers. @@ -116,6 +130,9 @@ def __repr__(self): return '%s[%r.%s]' % ( self.__class__.__name__, self.selector, self.class_name) + def css(self): + return '%s.%s' % (self.selector.css(), self.class_name) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -151,6 +168,10 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] + def css(self): + args = ''.join(token.css() for token in self.arguments) + return '%s(%s)' % (self.name, args) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -174,6 +195,10 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] + def css(self): + args = ''.join(token.css() for token in self.arguments) + return '%s:%s(%s)' % (self.selector.css(), self.name, args) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -192,6 +217,9 @@ def __repr__(self): return '%s[%r:%s]' % ( self.__class__.__name__, self.selector, self.ident) + def css(self): + return '%s:%s' % (self.selector.css(), self.ident) + def specificity(self): a, b, c = self.selector.specificity() b += 1 @@ -210,6 +238,10 @@ def __repr__(self): return '%s[%r:not(%r)]' % ( self.__class__.__name__, self.selector, self.subselector) + def css(self): + return '%s:not(%s)' % (self.selector.css(), + self.subselector.css()) + def specificity(self): a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() @@ -238,7 +270,20 @@ def __repr__(self): else: return '%s[%r[%s %s %r]]' % ( self.__class__.__name__, self.selector, attrib, - self.operator, self.value) + self.operator, self.value.value) + + def css(self): + if self.namespace: + attrib = '%s|%s' % (self.namespace, self.attrib) + else: + attrib = self.attrib + + if self.operator == 'exists': + op = attrib + else: + op = '%s%s%s' % (attrib, self.operator, self.value.css()) + + return '%s[%s]' % (self.selector.css(), op) def specificity(self): a, b, c = self.selector.specificity() @@ -258,10 +303,13 @@ def __init__(self, namespace=None, element=None): self.element = element def __repr__(self): + return '%s[%s]' % (self.__class__.__name__, self.css()) + + def css(self): element = self.element or '*' if self.namespace: element = '%s|%s' % (self.namespace, element) - return '%s[%s]' % (self.__class__.__name__, element) + return element def specificity(self): if self.element: @@ -282,6 +330,9 @@ def __repr__(self): return '%s[%r#%s]' % ( self.__class__.__name__, self.selector, self.id) + def css(self): + return '%s#%s' % (self.selector.css(), self.id) + def specificity(self): a, b, c = self.selector.specificity() a += 1 @@ -303,6 +354,10 @@ def __repr__(self): return '%s[%r %s %r]' % ( self.__class__.__name__, self.selector, comb, self.subselector) + def css(self): + return '%s %s %s' % (self.selector.css(), + self.combinator, self.subselector.css()) + def specificity(self): a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() @@ -536,7 +591,7 @@ def parse_attrib(selector, stream): if next != ('DELIM', ']'): raise SelectorSyntaxError( "Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, attrib, op, value.value) + return Attrib(selector, namespace, attrib, op, value) def parse_series(tokens): @@ -591,6 +646,12 @@ def is_delim(self, *values): type = property(operator.itemgetter(0)) value = property(operator.itemgetter(1)) + def css(self): + if self.type == 'STRING': + return repr(self.value) + else: + return self.value + class EOFToken(Token): def __new__(cls, pos): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 22cd029..ad2ccbd 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -308,10 +308,12 @@ def xpath_attrib(self, selector): attrib = '@' + name else: attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name) - if self.lower_case_attribute_values: - value = selector.value.lower() + if selector.value is None: + value = None + elif self.lower_case_attribute_values: + value = selector.value.value.lower() else: - value = selector.value + value = selector.value.value return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f01aa7f..96e63f3 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -244,6 +244,37 @@ def specificity(css): assert specificity('#lorem + foo#ipsum:first-child > bar:first-line' ) == (2, 1, 3) + def test_css_export(self): + def css2css(css, res=None): + selectors = parse(css) + assert len(selectors) == 1 + assert selectors[0].css() == (res or css) + + css2css('*') + css2css(' foo', 'foo') + css2css('Foo', 'Foo') + css2css(':empty ', ':empty') + css2css(':before', '::before') + css2css(':beFOre', '::before') + css2css('*:before', '::before') + css2css(':nth-child(2)') + css2css('.bar') + css2css('[baz]') + css2css('[baz="4"]', "[baz='4']") + css2css('[baz^="4"]', "[baz^='4']") + css2css('#lipsum') + css2css(':not(*)') + css2css(':not(foo)') + css2css(':not(*.foo)') + css2css(':not(*[foo])') + css2css(':not(*:empty)') + css2css(':not(*#foo)') + css2css('foo:empty') + css2css('foo::before') + css2css('foo:empty::before') + css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)") + css2css('#lorem + foo#ipsum:first-child > bar::first-line') + def test_parse_errors(self): def get_error(css): try: From 8d0ff3e39c9c4806277e00ae517ab7da3b41d8f0 Mon Sep 17 00:00:00 2001 From: Arthur Darcet Date: Mon, 11 Feb 2019 17:01:21 +0100 Subject: [PATCH 113/208] rename method to .canonical, and correctly strip extra * in :not selectors (otherwise edge ignores them) --- AUTHORS | 1 + cssselect/parser.py | 49 +++++++++++++++++++++++------------------ tests/test_cssselect.py | 12 +++++----- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/AUTHORS b/AUTHORS index 70ca409..66dcc22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,3 +10,4 @@ Simon Sapin Stefan Behnel Thomas Grainger Varialus +Arthur Darcet diff --git a/cssselect/parser.py b/cssselect/parser.py index 53a76bc..1aed6f8 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -92,16 +92,16 @@ def __repr__(self): return '%s[%r%s]' % ( self.__class__.__name__, self.parsed_tree, pseudo_element) - def css(self): + def canonical(self): """Return a CSS representation for this selector (a string) """ if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = '::%s' % self.pseudo_element.css() + pseudo_element = '::%s' % self.pseudo_element.canonical() elif self.pseudo_element: pseudo_element = '::%s' % self.pseudo_element else: pseudo_element = '' - res = '%s%s' % (self.parsed_tree.css(), pseudo_element) + res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element) if len(res) > 1: res = res.lstrip('*') return res @@ -130,8 +130,8 @@ def __repr__(self): return '%s[%r.%s]' % ( self.__class__.__name__, self.selector, self.class_name) - def css(self): - return '%s.%s' % (self.selector.css(), self.class_name) + def canonical(self): + return '%s.%s' % (self.selector.canonical(), self.class_name) def specificity(self): a, b, c = self.selector.specificity() @@ -168,7 +168,7 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] - def css(self): + def canonical(self): args = ''.join(token.css() for token in self.arguments) return '%s(%s)' % (self.name, args) @@ -195,9 +195,9 @@ def __repr__(self): def argument_types(self): return [token.type for token in self.arguments] - def css(self): + def canonical(self): args = ''.join(token.css() for token in self.arguments) - return '%s:%s(%s)' % (self.selector.css(), self.name, args) + return '%s:%s(%s)' % (self.selector.canonical(), self.name, args) def specificity(self): a, b, c = self.selector.specificity() @@ -217,8 +217,8 @@ def __repr__(self): return '%s[%r:%s]' % ( self.__class__.__name__, self.selector, self.ident) - def css(self): - return '%s:%s' % (self.selector.css(), self.ident) + def canonical(self): + return '%s:%s' % (self.selector.canonical(), self.ident) def specificity(self): a, b, c = self.selector.specificity() @@ -238,9 +238,11 @@ def __repr__(self): return '%s[%r:not(%r)]' % ( self.__class__.__name__, self.selector, self.subselector) - def css(self): - return '%s:not(%s)' % (self.selector.css(), - self.subselector.css()) + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s:not(%s)' % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -272,7 +274,7 @@ def __repr__(self): self.__class__.__name__, self.selector, attrib, self.operator, self.value.value) - def css(self): + def canonical(self): if self.namespace: attrib = '%s|%s' % (self.namespace, self.attrib) else: @@ -283,7 +285,7 @@ def css(self): else: op = '%s%s%s' % (attrib, self.operator, self.value.css()) - return '%s[%s]' % (self.selector.css(), op) + return '%s[%s]' % (self.selector.canonical(), op) def specificity(self): a, b, c = self.selector.specificity() @@ -303,9 +305,9 @@ def __init__(self, namespace=None, element=None): self.element = element def __repr__(self): - return '%s[%s]' % (self.__class__.__name__, self.css()) + return '%s[%s]' % (self.__class__.__name__, self.canonical()) - def css(self): + def canonical(self): element = self.element or '*' if self.namespace: element = '%s|%s' % (self.namespace, element) @@ -330,8 +332,8 @@ def __repr__(self): return '%s[%r#%s]' % ( self.__class__.__name__, self.selector, self.id) - def css(self): - return '%s#%s' % (self.selector.css(), self.id) + def canonical(self): + return '%s#%s' % (self.selector.canonical(), self.id) def specificity(self): a, b, c = self.selector.specificity() @@ -354,9 +356,12 @@ def __repr__(self): return '%s[%r %s %r]' % ( self.__class__.__name__, self.selector, comb, self.subselector) - def css(self): - return '%s %s %s' % (self.selector.css(), - self.combinator, self.subselector.css()) + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s %s %s' % ( + self.selector.canonical(), self.combinator, subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 96e63f3..0819f25 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -248,7 +248,7 @@ def test_css_export(self): def css2css(css, res=None): selectors = parse(css) assert len(selectors) == 1 - assert selectors[0].css() == (res or css) + assert selectors[0].canonical() == (res or css) css2css('*') css2css(' foo', 'foo') @@ -262,18 +262,20 @@ def css2css(css, res=None): css2css('[baz]') css2css('[baz="4"]', "[baz='4']") css2css('[baz^="4"]', "[baz^='4']") + css2css("[ns|attr='4']") css2css('#lipsum') css2css(':not(*)') css2css(':not(foo)') - css2css(':not(*.foo)') - css2css(':not(*[foo])') - css2css(':not(*:empty)') - css2css(':not(*#foo)') + css2css(':not(*.foo)', ':not(.foo)') + css2css(':not(*[foo])', ':not([foo])') + css2css(':not(:empty)') + css2css(':not(#foo)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)") css2css('#lorem + foo#ipsum:first-child > bar::first-line') + css2css('foo > *') def test_parse_errors(self): def get_error(css): From f52371a5821f6472129e4c47b4fdd54ed3a8e1f4 Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sat, 15 Jun 2019 22:55:12 +0300 Subject: [PATCH 114/208] css "^" as "." xpath symbol to use css "^ >" to get immediate children --- .gitignore | 4 +++- cssselect/parser.py | 4 ++-- cssselect/xpath.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index b0ab86a..4436e5d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ /dist /docs/_build /.coverage -.idea \ No newline at end of file +.idea +/venv +*.vscode \ No newline at end of file diff --git a/cssselect/parser.py b/cssselect/parser.py index 9bb039c..61358d3 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -400,8 +400,8 @@ def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() - if peek.type == 'IDENT' or peek == ('DELIM', '*'): - if peek.type == 'IDENT': + if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '^'): + if peek.type == 'IDENT' or peek == ('DELIM', '^'): namespace = stream.next().value else: stream.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 22cd029..4e5f85a 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -187,6 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ + # no prefix if css immediate children (example: css "^ > div" to xpath "./div") + child_re = r'^[ \t\r\n\f]*\^[ \t\r\n\f]*>' + if re.match(child_re, css): + prefix = '' + # prefix = 'child::' + # css = re.sub(child_re, '', css) + # print('*' * 50) + # print(css) return ' | '.join(self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) for selector in parse(css)) @@ -332,6 +340,9 @@ def xpath_element(self, selector): if not element: element = '*' safe = True + if element == '^': + element = '.' + safe = True else: safe = is_safe_name(element) if self.lower_case_element_names: From 053f2669eef8c7942346ee7ee101777f0e267cbc Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 00:27:52 +0300 Subject: [PATCH 115/208] Implement CSS immediate children --- cssselect/parser.py | 4 ++-- cssselect/xpath.py | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 61358d3..11e9ff5 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -400,8 +400,8 @@ def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() - if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '^'): - if peek.type == 'IDENT' or peek == ('DELIM', '^'): + if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'): + if peek.type == 'IDENT' or peek == ('DELIM', '<'): namespace = stream.next().value else: stream.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 4e5f85a..e71d21c 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -187,14 +187,6 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - # no prefix if css immediate children (example: css "^ > div" to xpath "./div") - child_re = r'^[ \t\r\n\f]*\^[ \t\r\n\f]*>' - if re.match(child_re, css): - prefix = '' - # prefix = 'child::' - # css = re.sub(child_re, '', css) - # print('*' * 50) - # print(css) return ' | '.join(self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) for selector in parse(css)) @@ -228,7 +220,18 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::', assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) - return (prefix or '') + _unicode(xpath) + + unicode_xpath = _unicode(xpath) + # CSS immediate children (CSS "<> div" to XPath "child::div" or "./div") + # Works only at the start of a selector + # Needed to get immediate children of a processed selector in Scrapy + # product = response.css('.product') + # name = product.css('<> div') + child_re = r'^[ \t\r\n\f]*\<[ \t\r\n\f]*\/' + if re.match(child_re, unicode_xpath): + prefix = 'child::' + unicode_xpath = re.sub(child_re, '', unicode_xpath) + return (prefix or '') + unicode_xpath def xpath_pseudo_element(self, xpath, pseudo_element): """Translate a pseudo-element. @@ -340,8 +343,8 @@ def xpath_element(self, selector): if not element: element = '*' safe = True - if element == '^': - element = '.' + if element == '<': + element = '<' safe = True else: safe = is_safe_name(element) From 9ec22422722561060bca1d7805556c77681d7b18 Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 01:38:13 +0300 Subject: [PATCH 116/208] Add tests and errors handling. --- cssselect/parser.py | 15 ++++++++++++++- tests/test_cssselect.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 11e9ff5..5d9360c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -401,8 +401,21 @@ def parse_simple_selector(stream, inside_negation=False): selector_start = len(stream.used) peek = stream.peek() if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'): - if peek.type == 'IDENT' or peek == ('DELIM', '<'): + if peek.type == 'IDENT': namespace = stream.next().value + elif peek == ('DELIM', '<'): + if not (len(stream.used) == 0 or + (len(stream.used) == 1 and stream.used[0].type == 'S')): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element "<>" not at the start of a selector' + ) + namespace = stream.next().value + stream.skip_whitespace() + peek = stream.peek() + if not peek == ('DELIM', '>'): + raise SelectorSyntaxError( + 'Got incomplete immediate child pseudo-element "<>" (no ">")' + ) else: stream.next() namespace = None diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f01aa7f..49bb7ba 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -42,7 +42,7 @@ class TestCssselect(unittest.TestCase): def test_tokenizer(self): tokens = [ _unicode(item) for item in tokenize( - u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] + u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)<'))] assert tokens == [ u(""), "", @@ -61,7 +61,8 @@ def test_tokenizer(self): "", "", "", - "", + "<' at 42>", + "", ] def test_parser(self): @@ -146,6 +147,18 @@ def parse_many(first, *others): 'Negation[Element[div]:not(Class[Element[div].foo])]'] assert parse_many('td ~ th') == [ 'CombinedSelector[Element[td] ~ Element[th]]'] + # assert parse_many('<') == ['Element[<]'] + # assert parse_many('<> foo') == [ + # 'CombinedSelector[Element[<] > Element[foo]]' + # ] + # assert parse_many('<> foo bar > div') == [ + # 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] ' + # ' Element[bar]] > Element[div]]' + # ] + # assert parse_many('<> #foo #bar') == [ + # 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] ' + # ' Hash[Element[*]#bar]]' + # ] def test_pseudo_elements(self): def parse_pseudo(css): @@ -310,6 +323,12 @@ def get_error(css): "Got pseudo-element ::before inside :not() at 12") assert get_error(':not(:not(a))') == ( "Got nested :not()") + assert get_error('<> div <> header') == ( + 'Got immediate child pseudo-element "<>" not at the start of a selector' + ) + assert get_error('< div p') == ( + 'Got incomplete immediate child pseudo-element "<>" (no ">")') + assert get_error('> div p') == ("Expected selector, got ' at 0>") def test_translation(self): def xpath(css): @@ -483,6 +502,8 @@ def test_quoting(self): '''descendant-or-self::*[@aval = '"']''') assert css_to_xpath('*[aval=\'"""\']') == ( '''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath('<> div[dataimg=""]') == ( + "child::div[@dataimg = '']") def test_unicode_escapes(self): # \22 == '"' \20 == ' ' @@ -672,6 +693,11 @@ def pcss(main, *selectors, **kwargs): assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ 'second-li', 'li-div'] assert pcss(':lang("e")', html_only=True) == [] + assert pcss('<> div') == [] + assert pcss('<> body') == ['nil'] + assert pcss('<> body > div') == ['outer-div', 'foobar-div'] + assert pcss('<> head') == ['nil'] + assert pcss('<> html') == [] # --- nth-* and nth-last-* ------------------------------------- @@ -853,6 +879,7 @@ def count(selector): assert count('div[class|=dialog]') == 50 # ? Seems right assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right + assert count('<> div') == 1 XMLLANG_IDS = ''' From 7c697daf87f1e7cea3f48a145b1cb7a5458750ad Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 01:45:25 +0300 Subject: [PATCH 117/208] Add more tests. --- .gitignore | 2 +- tests/test_cssselect.py | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 4436e5d..5c47adf 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ /.coverage .idea /venv -*.vscode \ No newline at end of file +*.vscode diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 49bb7ba..f68893b 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -147,18 +147,17 @@ def parse_many(first, *others): 'Negation[Element[div]:not(Class[Element[div].foo])]'] assert parse_many('td ~ th') == [ 'CombinedSelector[Element[td] ~ Element[th]]'] - # assert parse_many('<') == ['Element[<]'] - # assert parse_many('<> foo') == [ - # 'CombinedSelector[Element[<] > Element[foo]]' - # ] - # assert parse_many('<> foo bar > div') == [ - # 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] ' - # ' Element[bar]] > Element[div]]' - # ] - # assert parse_many('<> #foo #bar') == [ - # 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] ' - # ' Hash[Element[*]#bar]]' - # ] + assert parse_many('<> foo') == [ + 'CombinedSelector[Element[<] > Element[foo]]' + ] + assert parse_many('<> foo bar > div') == [ + 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] ' + ' Element[bar]] > Element[div]]' + ] + assert parse_many('<> #foo #bar') == [ + 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] ' + ' Hash[Element[*]#bar]]' + ] def test_pseudo_elements(self): def parse_pseudo(css): From 37b3c0ffcd1db16ca240487f1e0f8bb716a3385c Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 02:12:07 +0300 Subject: [PATCH 118/208] Code review fixes. --- .gitignore | 4 +--- cssselect/xpath.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 5c47adf..b0ab86a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,4 @@ /dist /docs/_build /.coverage -.idea -/venv -*.vscode +.idea \ No newline at end of file diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e71d21c..e3843b5 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -344,7 +344,6 @@ def xpath_element(self, selector): element = '*' safe = True if element == '<': - element = '<' safe = True else: safe = is_safe_name(element) From 920b3d644fa62c95db40141c5322d38e98bbe8d3 Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 17:57:08 +0300 Subject: [PATCH 119/208] Change "<>" selector to ":scope" --- cssselect/parser.py | 22 ++++++++------------- cssselect/xpath.py | 23 +++++++++------------- tests/test_cssselect.py | 43 ++++++++++++++++++++++------------------- 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 5d9360c..99b25a3 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -400,22 +400,9 @@ def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() - if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'): + if peek.type == 'IDENT' or peek == ('DELIM', '*'): if peek.type == 'IDENT': namespace = stream.next().value - elif peek == ('DELIM', '<'): - if not (len(stream.used) == 0 or - (len(stream.used) == 1 and stream.used[0].type == 'S')): - raise SelectorSyntaxError( - 'Got immediate child pseudo-element "<>" not at the start of a selector' - ) - namespace = stream.next().value - stream.skip_whitespace() - peek = stream.peek() - if not peek == ('DELIM', '>'): - raise SelectorSyntaxError( - 'Got incomplete immediate child pseudo-element "<>" (no ">")' - ) else: stream.next() namespace = None @@ -465,6 +452,13 @@ def parse_simple_selector(stream, inside_negation=False): continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) + if result.ident == 'scope': + if not (len(stream.used) == 2 or + (len(stream.used) == 3 + and stream.used[0].type == 'S')): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + 'not at the start of a selector') continue stream.next() stream.skip_whitespace() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e3843b5..d5bbf72 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -220,18 +220,7 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::', assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) - - unicode_xpath = _unicode(xpath) - # CSS immediate children (CSS "<> div" to XPath "child::div" or "./div") - # Works only at the start of a selector - # Needed to get immediate children of a processed selector in Scrapy - # product = response.css('.product') - # name = product.css('<> div') - child_re = r'^[ \t\r\n\f]*\<[ \t\r\n\f]*\/' - if re.match(child_re, unicode_xpath): - prefix = 'child::' - unicode_xpath = re.sub(child_re, '', unicode_xpath) - return (prefix or '') + unicode_xpath + return (prefix or '') + _unicode(xpath) def xpath_pseudo_element(self, xpath, pseudo_element): """Translate a pseudo-element. @@ -343,8 +332,6 @@ def xpath_element(self, selector): if not element: element = '*' safe = True - if element == '<': - safe = True else: safe = is_safe_name(element) if self.lower_case_element_names: @@ -554,6 +541,14 @@ def xpath_lang_function(self, xpath, function): def xpath_root_pseudo(self, xpath): return xpath.add_condition("not(parent::*)") + # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div") + # Works only at the start of a selector + # Needed to get immediate children of a processed selector in Scrapy + # for product in response.css('.product'): + # description = product.css(':scope > div::text').get() + def xpath_scope_pseudo(self, xpath): + return xpath.add_condition("1") + def xpath_first_child_pseudo(self, xpath): return xpath.add_condition('count(preceding-sibling::*) = 0') diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f68893b..0f2a836 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -147,18 +147,19 @@ def parse_many(first, *others): 'Negation[Element[div]:not(Class[Element[div].foo])]'] assert parse_many('td ~ th') == [ 'CombinedSelector[Element[td] ~ Element[th]]'] - assert parse_many('<> foo') == [ - 'CombinedSelector[Element[<] > Element[foo]]' + assert parse_many(':scope > foo') == [ + 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' ] - assert parse_many('<> foo bar > div') == [ - 'CombinedSelector[CombinedSelector[CombinedSelector[Element[<] > Element[foo]] ' - ' Element[bar]] > Element[div]]' + assert parse_many(':scope > foo bar > div') == [ + 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' + 'Element[foo]] Element[bar]] > Element[div]]' ] - assert parse_many('<> #foo #bar') == [ - 'CombinedSelector[CombinedSelector[Element[<] > Hash[Element[*]#foo]] ' - ' Hash[Element[*]#bar]]' + assert parse_many(':scope > #foo #bar') == [ + 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' + 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]' ] + # TODO ADD TESTS def test_pseudo_elements(self): def parse_pseudo(css): result = [] @@ -179,6 +180,7 @@ def parse_one(css): assert parse_one('foo') == ('Element[foo]', None) assert parse_one('*') == ('Element[*]', None) assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) + assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None) # Special cases for CSS 2.1 pseudo-elements assert parse_one(':BEfore') == ('Element[*]', 'before') @@ -322,11 +324,9 @@ def get_error(css): "Got pseudo-element ::before inside :not() at 12") assert get_error(':not(:not(a))') == ( "Got nested :not()") - assert get_error('<> div <> header') == ( - 'Got immediate child pseudo-element "<>" not at the start of a selector' + assert get_error(':scope > div :scope header') == ( + 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) - assert get_error('< div p') == ( - 'Got incomplete immediate child pseudo-element "<>" (no ">")') assert get_error('> div p') == ("Expected selector, got ' at 0>") def test_translation(self): @@ -501,8 +501,8 @@ def test_quoting(self): '''descendant-or-self::*[@aval = '"']''') assert css_to_xpath('*[aval=\'"""\']') == ( '''descendant-or-self::*[@aval = '"""']''') - assert css_to_xpath('<> div[dataimg=""]') == ( - "child::div[@dataimg = '']") + assert css_to_xpath(':scope > div[dataimg=""]') == ( + "descendant-or-self::*[1]/div[@dataimg = '']") def test_unicode_escapes(self): # \22 == '"' \20 == ' ' @@ -580,6 +580,7 @@ def xpath(css): assert xpath('::attr-href') == "descendant-or-self::*/@href" assert xpath('p img::attr(src)') == ( "descendant-or-self::p/descendant-or-self::*/img/@src") + assert xpath(':scope') == "descendant-or-self::*[1]" def test_series(self): def series(css): @@ -692,11 +693,11 @@ def pcss(main, *selectors, **kwargs): assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ 'second-li', 'li-div'] assert pcss(':lang("e")', html_only=True) == [] - assert pcss('<> div') == [] - assert pcss('<> body') == ['nil'] - assert pcss('<> body > div') == ['outer-div', 'foobar-div'] - assert pcss('<> head') == ['nil'] - assert pcss('<> html') == [] + assert pcss(':scope > div') == [] + assert pcss(':scope body') == ['nil'] + assert pcss(':scope body > div') == ['outer-div', 'foobar-div'] + assert pcss(':scope head') == ['nil'] + assert pcss(':scope html') == [] # --- nth-* and nth-last-* ------------------------------------- @@ -878,7 +879,9 @@ def count(selector): assert count('div[class|=dialog]') == 50 # ? Seems right assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right - assert count('<> div') == 1 + assert count(':scope > div') == 1 + assert count(':scope > div > div[class=dialog]') == 1 + assert count(':scope > div div') == 242 XMLLANG_IDS = ''' From 97ab897ce8995662517d45fe417f63e1a1dcc73b Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 18:09:43 +0300 Subject: [PATCH 120/208] Add more tests. --- tests/test_cssselect.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 0f2a836..80dc687 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -150,6 +150,9 @@ def parse_many(first, *others): assert parse_many(':scope > foo') == [ 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' ] + assert parse_many(' :scope > foo') == [ + 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + ] assert parse_many(':scope > foo bar > div') == [ 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' 'Element[foo]] Element[bar]] > Element[div]]' @@ -205,10 +208,14 @@ def parse_one(css): 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', 'selection') - parse_pseudo('foo:before, bar, baz:after') == [ - ('Element[foo]', 'before'), - ('Element[bar]', None), - ('Element[baz]', 'after')] + assert parse_pseudo(':scope > div, foo bar') == [ + ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), + ('CombinedSelector[Element[foo] Element[bar]]', None) + ] + assert parse_pseudo('foo:before, bar, baz:after') == [ + ('Element[foo]', 'before'), ('Element[bar]', None), + ('Element[baz]', 'after') + ] # Special cases for CSS 2.1 pseudo-elements are ignored by default for pseudo in ('after', 'before', 'first-line', 'first-letter'): From 8cc4a266f4851e3b2502e8e740af9a9af8771ac0 Mon Sep 17 00:00:00 2001 From: sortafreel Date: Sun, 16 Jun 2019 18:23:00 +0300 Subject: [PATCH 121/208] Lint --- cssselect/parser.py | 3 ++- tests/test_cssselect.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 99b25a3..bcd1854 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -452,7 +452,8 @@ def parse_simple_selector(stream, inside_negation=False): continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) - if result.ident == 'scope': + if result.ident == 'scope' and repr( + result) == 'Pseudo[Element[*]:scope]': if not (len(stream.used) == 2 or (len(stream.used) == 3 and stream.used[0].type == 'S')): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 80dc687..5c97f30 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -42,7 +42,7 @@ class TestCssselect(unittest.TestCase): def test_tokenizer(self): tokens = [ _unicode(item) for item in tokenize( - u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)<'))] + u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] assert tokens == [ u(""), "", @@ -61,8 +61,7 @@ def test_tokenizer(self): "", "", "", - "<' at 42>", - "", + "", ] def test_parser(self): @@ -162,7 +161,6 @@ def parse_many(first, *others): 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]' ] - # TODO ADD TESTS def test_pseudo_elements(self): def parse_pseudo(css): result = [] @@ -334,6 +332,9 @@ def get_error(css): assert get_error(':scope > div :scope header') == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) + assert get_error('div :scope header') == ( + 'Got immediate child pseudo-element ":scope" not at the start of a selector' + ) assert get_error('> div p') == ("Expected selector, got ' at 0>") def test_translation(self): From 270f11835e81eba71441e53f4a555405df2e2a0c Mon Sep 17 00:00:00 2001 From: sortafreel Date: Mon, 17 Jun 2019 14:53:19 +0300 Subject: [PATCH 122/208] Improve test coverage. --- cssselect/parser.py | 3 +-- tests/test_cssselect.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index bcd1854..3be71bb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -452,8 +452,7 @@ def parse_simple_selector(stream, inside_negation=False): continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) - if result.ident == 'scope' and repr( - result) == 'Pseudo[Element[*]:scope]': + if result.__repr__() == 'Pseudo[Element[*]:scope]': if not (len(stream.used) == 2 or (len(stream.used) == 3 and stream.used[0].type == 'S')): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 5c97f30..8b562da 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -178,6 +178,12 @@ def parse_one(css): assert len(result) == 1 return result[0] + def test_pseudo_repr(css): + result = parse(css) + assert len(result) == 1 + selector = result[0] + return selector.parsed_tree.__repr__() + assert parse_one('foo') == ('Element[foo]', None) assert parse_one('*') == ('Element[*]', None) assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) @@ -205,7 +211,6 @@ def parse_one(css): 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ ' 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', 'selection') - assert parse_pseudo(':scope > div, foo bar') == [ ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), ('CombinedSelector[Element[foo] Element[bar]]', None) @@ -230,6 +235,11 @@ def parse_one(css): self.assertRaises(ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True) + # Special test for the unicode symbols and ':scope' element if check + # Errors if use repr() instead of __repr__() + assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]' + assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]' + def test_specificity(self): def specificity(css): selectors = parse(css) From 4b966853c84f44c8fb079213337e36d4992dd7f0 Mon Sep 17 00:00:00 2001 From: sortafreel Date: Tue, 18 Jun 2019 21:39:06 +0300 Subject: [PATCH 123/208] Edit docs. --- docs/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index fe473f7..c7f0c1a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -108,8 +108,10 @@ in the Level 3 specification: * ``:not()`` accepts a *sequence of simple selectors*, not just single *simple selector*. For example, ``:not(a.important[rel])`` is allowed, even though the negation contains 3 *simple selectors*. +* ``:scope`` allows to access immediate children of a selector: ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must be used at the start of a selector. Simplified version of `level 4 reference`_. .. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors +.. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope .. The following claim was copied from lxml: From 81c8dab8a17e389be9390260e8e22b5c0ef4df4c Mon Sep 17 00:00:00 2001 From: Simon Potter Date: Thu, 11 Jul 2019 19:23:43 +1200 Subject: [PATCH 124/208] Parse |ident as ident. No longer an error. --- cssselect/parser.py | 3 +++ tests/test_cssselect.py | 1 + 2 files changed, 4 insertions(+) diff --git a/cssselect/parser.py b/cssselect/parser.py index 3be71bb..b96d26a 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -430,6 +430,9 @@ def parse_simple_selector(stream, inside_negation=False): elif peek == ('DELIM', '.'): stream.next() result = Class(result, stream.next_ident()) + elif peek == ('DELIM', '|'): + stream.next() + result = Element(None, stream.next_ident()) elif peek == ('DELIM', '['): stream.next() result = parse_attrib(result, stream) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 8b562da..d2432ab 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -81,6 +81,7 @@ def parse_many(first, *others): assert parse_many('*') == ['Element[*]'] assert parse_many('*|*') == ['Element[*]'] assert parse_many('*|foo') == ['Element[foo]'] + assert parse_many('|foo') == ['Element[foo]'] assert parse_many('foo|*') == ['Element[foo|*]'] assert parse_many('foo|bar') == ['Element[foo|bar]'] # This will never match, but it is valid: From 6d758551c700c784c690ff59c8ccf679c573d506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 9 Aug 2019 10:31:51 +0200 Subject: [PATCH 125/208] Enforce a working lxml version on the Python 3.4 CI environment --- .travis.yml | 3 +-- tests/requirements.txt | 5 +++++ tox.ini | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 tests/requirements.txt diff --git a/.travis.yml b/.travis.yml index b76297f..bfc557f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,7 @@ python: - '3.5' - '3.6' install: -- pip install lxml -e . -- pip install -U codecov pytest-cov +- pip install -r tests/requirements.txt -e . script: - py.test --cov-report term --cov=cssselect after_success: diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..5232e84 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,5 @@ +codecov +lxml;python_version!="3.4" +lxml<=4.3.5;python_version=="3.4" +pytest +pytest-cov \ No newline at end of file diff --git a/tox.ini b/tox.ini index 194490a..49a1dda 100644 --- a/tox.ini +++ b/tox.ini @@ -3,9 +3,7 @@ envlist = py27, py34, py35, py36 [testenv] deps= - lxml - pytest<3 - pytest-cov + -r tests/requirements.txt commands = py.test --cov-report term --cov=cssselect From cff38f1f00972b9851ff64fa8380022aa0d76b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 1 Aug 2019 18:30:00 +0200 Subject: [PATCH 126/208] Cover cssselect 1.1.0 in the CHANGES file --- CHANGES | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGES b/CHANGES index 0a0e137..a6c5233 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,20 @@ Changelog ========= +Version 1.1.0 +------------- + +Released on 2019-08-09. + +* Support for the ``:scope`` selector, which allows to access immediate + children of a selector. + +* Support for the ``|E`` syntax for type selectors without a namespace. + +* A new selector method, ``canonical``, returns the CSS expression of the + selector, as a string. + + Version 1.0.3 ------------- @@ -8,6 +22,7 @@ Released on 2017-12-27. * Fix artifact uploads to pypi + Version 1.0.2 ------------- @@ -17,6 +32,7 @@ Released on 2017-12-26. * Fix deprecation warning in Python 3.6. * Minor cleanups. + Version 1.0.1 ------------- @@ -25,6 +41,7 @@ Released on 2017-01-10. * Add support for Python 3.6. * Documentation hosted `on Read the Docs `_ + Version 1.0.0 ------------- From 518e3e1babcc3db38ab8afe948c05a4799693108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 9 Aug 2019 11:17:29 +0200 Subject: [PATCH 127/208] =?UTF-8?q?Bump=20version:=201.0.3=20=E2=86=92=201?= =?UTF-8?q?.1.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cssselect/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index acb5a66..122d3d4 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.3 +current_version = 1.1.0 commit = True tag = True diff --git a/cssselect/__init__.py b/cssselect/__init__.py index e9f9ce1..b41cef9 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -18,5 +18,5 @@ from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.0.3' +VERSION = '1.1.0' __version__ = VERSION From 91822333b7a2ddbb1f11b624b304c2563be2d0ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 17 Sep 2019 10:58:16 +0200 Subject: [PATCH 128/208] Package tests Fixes #92 --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index e98d213..a367dc0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc recursive-include docs * +recursive-include tests * prune docs/_build From c909f051d0034171c0658e25aa3ee4d1b745f8a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 19 Sep 2019 11:50:53 +0200 Subject: [PATCH 129/208] Support :scope after a comma delimiter --- cssselect/parser.py | 9 +++++++-- tests/test_cssselect.py | 10 +++++++++- tox.ini | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..0185cb2 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -517,8 +517,13 @@ def parse_simple_selector(stream, inside_negation=False): result = Pseudo(result, ident) if result.__repr__() == 'Pseudo[Element[*]:scope]': if not (len(stream.used) == 2 or - (len(stream.used) == 3 - and stream.used[0].type == 'S')): + (len(stream.used) == 3 and + stream.used[0].type == 'S') or + (len(stream.used) >= 3 and + stream.used[-3].is_delim(',')) or + (len(stream.used) >= 4 and + stream.used[-3].type == 'S' and + stream.used[-4].is_delim(','))): raise SelectorSyntaxError( 'Got immediate child pseudo-element ":scope" ' 'not at the start of a selector') diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 320736c..b81f8c5 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -214,7 +214,15 @@ def test_pseudo_repr(css): 'selection') assert parse_pseudo(':scope > div, foo bar') == [ ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), - ('CombinedSelector[Element[foo] Element[bar]]', None) + ('CombinedSelector[Element[foo] Element[bar]]', None), + ] + assert parse_pseudo('foo bar, :scope > div') == [ + ('CombinedSelector[Element[foo] Element[bar]]', None), + ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), + ] + assert parse_pseudo('foo bar,:scope > div') == [ + ('CombinedSelector[Element[foo] Element[bar]]', None), + ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), ] assert parse_pseudo('foo:before, bar, baz:after') == [ ('Element[foo]', 'before'), ('Element[bar]', None), diff --git a/tox.ini b/tox.ini index 49a1dda..6a09b07 100644 --- a/tox.ini +++ b/tox.ini @@ -6,4 +6,4 @@ deps= -r tests/requirements.txt commands = - py.test --cov-report term --cov=cssselect + py.test --cov-report term --cov=cssselect {posargs} From 928ad922ddf3701bef5dc178a485b0d0246b784e Mon Sep 17 00:00:00 2001 From: Akshita Agarwal Date: Wed, 16 Oct 2019 17:46:06 +0530 Subject: [PATCH 130/208] add 3.7 version after running tests --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 243927d..de7128d 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,7 @@ 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7' ], **extra_kwargs ) From 24eb0952eaf4c1f1ed86c123840e606959962953 Mon Sep 17 00:00:00 2001 From: Akshita Agarwal Date: Wed, 16 Oct 2019 21:14:46 +0530 Subject: [PATCH 131/208] address comments --- .travis.yml | 3 ++- tox.ini | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bfc557f..69ecf93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - '3.4' - '3.5' - '3.6' +- '3.7' install: - pip install -r tests/requirements.txt -e . script: @@ -20,4 +21,4 @@ deploy: on: tags: true repo: scrapy/cssselect - condition: "$TRAVIS_PYTHON_VERSION == '3.6'" + condition: "$TRAVIS_PYTHON_VERSION == '3.7'" diff --git a/tox.ini b/tox.ini index 49a1dda..32136a0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27, py34, py35, py36 +envlist = py27, py34, py35, py36, py37 [testenv] deps= From c05327240d73beda2132a1d3fcf0d33317738a58 Mon Sep 17 00:00:00 2001 From: whybin <31753349+whybin@users.noreply.github.com> Date: Thu, 31 May 2018 16:24:45 -0700 Subject: [PATCH 132/208] Add XPath tests for operator precedence --- tests/test_cssselect.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 320736c..94da2e1 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -622,6 +622,11 @@ def xpath_attr_href_simple_pseudo_element(self, xpath): other = XPathExpr('@href', '', ) return xpath.join('/', other) + # pseudo-element: + # used to demonstrate operator precedence + def xpath_first_or_second_pseudo(self, xpath): + return xpath.add_condition("@id = 'first' or @id = 'second'") + def xpath(css): return _unicode(CustomTranslator().css_to_xpath(css)) @@ -633,6 +638,25 @@ def xpath(css): assert xpath('p img::attr(src)') == ( "descendant-or-self::p/descendant-or-self::*/img/@src") assert xpath(':scope') == "descendant-or-self::*[1]" + assert xpath(':first-or-second[href]') == ( + "descendant-or-self::*[(@id = 'first' or @id = 'second') " + "and (@href)]") + + assert str(XPathExpr('', '', condition='@href')) == "[(@href)]" + + document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) + sort_key = dict( + (el, count) for count, el in enumerate(document.getiterator()) + ).__getitem__ + def operator_id(selector): + xpath = CustomTranslator().css_to_xpath(selector) + items = document.xpath(xpath) + items.sort(key=sort_key) + return [element.get('id', 'nil') for element in items] + + assert operator_id(':first-or-second') == ['first', 'second'] + assert operator_id(':first-or-second[href]') == ['second'] + assert operator_id('[href]:first-or-second') == ['second'] def test_series(self): def series(css): @@ -935,6 +959,14 @@ def count(selector): assert count(':scope > div > div[class=dialog]') == 1 assert count(':scope > div div') == 242 +OPERATOR_PRECEDENCE_IDS = ''' + + + + + +''' + XMLLANG_IDS = ''' a From 754b701bc26dcd239ae1d9813774f75f78ed2dc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 21 Oct 2019 16:43:10 +0200 Subject: [PATCH 133/208] Use parentheses when joining with AND potentially-complex expressions --- cssselect/xpath.py | 17 +++++++++++------ tests/test_cssselect.py | 12 ++++++------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index db50c77..14e9b80 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -56,7 +56,7 @@ def __repr__(self): def add_condition(self, condition): if self.condition: - self.condition = '%s and (%s)' % (self.condition, condition) + self.condition = '(%s) and (%s)' % (self.condition, condition) else: self.condition = condition return self @@ -457,19 +457,19 @@ def xpath_nth_child_function(self, xpath, function, last=False, if a == 0: return xpath.add_condition('%s = %s' % (siblings_count, b_min_1)) - expr = [] + expressions = [] if a > 0: # siblings count, an+b-1, is always >= 0, # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expr.append('%s >= %s' % (siblings_count, b_min_1)) + expressions.append('%s >= %s' % (siblings_count, b_min_1)) else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expr.append('%s <= %s' % (siblings_count, b_min_1)) + expressions.append('%s <= %s' % (siblings_count, b_min_1)) # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -495,9 +495,14 @@ def xpath_nth_child_function(self, xpath, function, last=False, b_neg = '+%s' % b_neg left = '(%s %s)' % (left, b_neg) - expr.append('%s mod %s = 0' % (left, a)) + expressions.append('%s mod %s = 0' % (left, a)) - xpath.add_condition(' and '.join(expr)) + if len(expressions) > 1: + template = '(%s)' + else: + template = '%s' + xpath.add_condition(' and '.join(template % expression + for expression in expressions)) return xpath def xpath_nth_last_child_function(self, xpath, function): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 94da2e1..d6969f2 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -428,8 +428,8 @@ def xpath(css): "e[count(preceding-sibling::*) <= 0]") assert xpath('e:nth-child(3n+2)') == ( - "e[count(preceding-sibling::*) >= 1 and " - "(count(preceding-sibling::*) +2) mod 3 = 0]") + "e[(count(preceding-sibling::*) >= 1) and " + "((count(preceding-sibling::*) +2) mod 3 = 0)]") assert xpath('e:nth-child(3n-2)') == ( "e[count(preceding-sibling::*) mod 3 = 0]") assert xpath('e:nth-child(-n+6)') == ( @@ -442,8 +442,8 @@ def xpath(css): assert xpath('e:nth-last-child(2n+1)') == ( "e[count(following-sibling::*) mod 2 = 0]") assert xpath('e:nth-last-child(2n+2)') == ( - "e[count(following-sibling::*) >= 1 and " - "(count(following-sibling::*) +1) mod 2 = 0]") + "e[(count(following-sibling::*) >= 1) and " + "((count(following-sibling::*) +1) mod 2 = 0)]") assert xpath('e:nth-last-child(3n+1)') == ( "e[count(following-sibling::*) mod 3 = 0]") # represents the two last e elements @@ -497,7 +497,7 @@ def xpath(css): assert xpath('e > f') == ( "e/f") assert xpath('e + f') == ( - "e/following-sibling::*[name() = 'f' and (position() = 1)]") + "e/following-sibling::*[(name() = 'f') and (position() = 1)]") assert xpath('e ~ f') == ( "e/following-sibling::f") assert xpath('e ~ f:nth-child(3)') == ( @@ -642,7 +642,7 @@ def xpath(css): "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]") - assert str(XPathExpr('', '', condition='@href')) == "[(@href)]" + assert str(XPathExpr('', '', condition='@href')) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) sort_key = dict( From dde3b5e68ba2e49ec4552a75a805536c7dcdc896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 22 Oct 2019 16:55:17 +0200 Subject: [PATCH 134/208] Enable nitpicky Sphinx warnings, fix issues and fail on new issues --- .travis.yml | 24 ++++++++++++++++++------ CHANGES | 4 ++-- docs/conf.py | 5 +++++ tox.ini | 10 +++++++++- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 69ecf93..bd043e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,28 @@ language: python -python: -- '2.7' -- '3.4' -- '3.5' -- '3.6' -- '3.7' +matrix: + include: + - python: 2.7 + env: TOXENV=py27 + - python: 3.4 + env: TOXENV=py34 + - python: 3.5 + env: TOXENV=py35 + - python: 3.6 + env: TOXENV=py36 + - python: 3.7 + env: TOXENV=py37 + - python: 3.7 + env: TOXENV=docs + install: - pip install -r tests/requirements.txt -e . + script: - py.test --cov-report term --cov=cssselect + after_success: - codecov + deploy: provider: pypi distributions: sdist bdist_wheel diff --git a/CHANGES b/CHANGES index a6c5233..4e7185f 100644 --- a/CHANGES +++ b/CHANGES @@ -199,14 +199,14 @@ Version 0.3 Released on 2012-04-17. * Fix many parsing bugs. -* Rename the :class:`Translator` class to :class:`GenericTranslator` +* Rename the ``Translator`` class to :class:`GenericTranslator` * There, implement ``:target``, ``:hover``, ``:focus``, ``:active`` ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited`` as never matching. * Make a new HTML-specific ``HTMLTranslator`` subclass. There, implement ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited`` as appropriate for HTML, with all links "not visited". -* Remove the :func:`css_to_xpath` function. The translator classes +* Remove the ``css_to_xpath`` function. The translator classes are the new API. * Add support for ``:contains()`` back, but case-sensitive. lxml will override it to be case-insensitive for backward-compatibility. diff --git a/docs/conf.py b/docs/conf.py index aa897ef..86898c2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -248,3 +248,8 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'http://docs.python.org/': None} + + +# --- Nitpicking options ------------------------------------------------------ + +nitpicky = True diff --git a/tox.ini b/tox.ini index 32136a0..1d50b69 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27, py34, py35, py36, py37 +envlist = py27, py34, py35, py36, py37, docs [testenv] deps= @@ -7,3 +7,11 @@ deps= commands = py.test --cov-report term --cov=cssselect + +[testenv:docs] +changedir = docs +deps = + sphinx + sphinx_rtd_theme +commands = + sphinx-build -W -b html . {envtmpdir}/html \ No newline at end of file From f4a04641c6ff66aec9e4247be079b02942c81b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 22 Oct 2019 18:13:54 +0200 Subject: [PATCH 135/208] Remove unused FunctionalPseudoElement methods --- cssselect/parser.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..b63e3df 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -165,18 +165,10 @@ def __repr__(self): self.__class__.__name__, self.name, [token.value for token in self.arguments]) - def argument_types(self): - return [token.type for token in self.arguments] - def canonical(self): args = ''.join(token.css() for token in self.arguments) return '%s(%s)' % (self.name, args) - def specificity(self): - a, b, c = self.selector.specificity() - b += 1 - return a, b, c - class Function(object): """ From b4efd7f0e61324be6fe8d25d268628490dda82ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 22 Oct 2019 18:37:35 +0200 Subject: [PATCH 136/208] Revert "Remove unused FunctionalPseudoElement methods" This reverts commit f4a04641c6ff66aec9e4247be079b02942c81b65. --- cssselect/parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cssselect/parser.py b/cssselect/parser.py index b63e3df..7125030 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -165,10 +165,18 @@ def __repr__(self): self.__class__.__name__, self.name, [token.value for token in self.arguments]) + def argument_types(self): + return [token.type for token in self.arguments] + def canonical(self): args = ''.join(token.css() for token in self.arguments) return '%s(%s)' % (self.name, args) + def specificity(self): + a, b, c = self.selector.specificity() + b += 1 + return a, b, c + class Function(object): """ From 9c1fbc9c194c312077a9f82fab0b0c2e57a22e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 22 Oct 2019 19:03:04 +0200 Subject: [PATCH 137/208] Cover all FunctionalPseudoElement methods with tests --- tests/test_cssselect.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 320736c..e4bd74e 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -258,6 +258,7 @@ def specificity(css): assert specificity('[baz="4"]') == (0, 1, 0) assert specificity('[baz^="4"]') == (0, 1, 0) assert specificity('#lipsum') == (1, 0, 0) + assert specificity('::attr(name)') == (0, 0, 1) assert specificity(':not(*)') == (0, 0, 0) assert specificity(':not(foo)') == (0, 0, 1) @@ -686,6 +687,31 @@ def langid(selector): 'first', 'second', 'third', 'fourth', 'eighth'] assert langid(':lang(es)') == [] + def test_argument_types(self): + + class CustomTranslator(GenericTranslator): + + def __init__(self): + self.argument_types = [] + + def xpath_pseudo_element(self, xpath, function): + self.argument_types += function.argument_types() + + def argument_types(css): + translator = CustomTranslator() + translator.css_to_xpath(css) + return translator.argument_types + + mappings = ( + ('', []), + ('ident', ['IDENT']), + ('"string"', ['STRING']), + ('1', ['NUMBER']), + ) + for argument_string, argument_list in mappings: + css = '::pseudo_element({})'.format(argument_string) + assert argument_types(css) == argument_list + def test_select(self): document = etree.fromstring(HTML_IDS) sort_key = dict( From 98019114d6b01f64cdcf38ad34abd5cc63e2accd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 24 Oct 2019 13:44:49 +0200 Subject: [PATCH 138/208] Add a PyLint CI pipeline --- .travis.yml | 23 ++++++++++++++++------- pylintrc | 33 +++++++++++++++++++++++++++++++++ tox.ini | 8 ++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 pylintrc diff --git a/.travis.yml b/.travis.yml index 69ecf93..63d3ef2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,19 @@ language: python -python: -- '2.7' -- '3.4' -- '3.5' -- '3.6' -- '3.7' +sudo: false +matrix: + include: + - python: 3.7 + env: TOXENV=pylint + - python: 2.7 + env: TOXENV=py27 + - python: 3.4 + env: TOXENV=py34 + - python: 3.5 + env: TOXENV=py35 + - python: 3.6 + env: TOXENV=py36 + - python: 3.7 + env: TOXENV=py37 install: - pip install -r tests/requirements.txt -e . script: @@ -21,4 +30,4 @@ deploy: on: tags: true repo: scrapy/cssselect - condition: "$TRAVIS_PYTHON_VERSION == '3.7'" + condition: "$TOXENV == py37" diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..b6972ec --- /dev/null +++ b/pylintrc @@ -0,0 +1,33 @@ +[MASTER] +persistent=no + +[MESSAGES CONTROL] +disable=assignment-from-no-return, + bad-continuation, + bad-whitespace, + c-extension-no-member, + consider-using-in, + fixme, + inconsistent-return-statements, + invalid-name, + missing-class-docstring, + missing-function-docstring, + missing-module-docstring, + multiple-imports, + no-else-return, + no-member, + no-self-use, + redefined-builtin, + redefined-outer-name, + too-few-public-methods, + too-many-arguments, + too-many-branches, + too-many-function-args, + too-many-lines, + too-many-public-methods, + too-many-statements, + undefined-variable, + unidiomatic-typecheck, + unused-argument, + unused-import, + useless-object-inheritance # Required for Python 2 support diff --git a/tox.ini b/tox.ini index 32136a0..430720a 100644 --- a/tox.ini +++ b/tox.ini @@ -7,3 +7,11 @@ deps= commands = py.test --cov-report term --cov=cssselect + +[testenv:pylint] +basepython = python3.7 +deps = + {[testenv]deps} + pylint +commands = + pylint cssselect docs setup.py tests From cc573dfd2f83266c35014682e0fd16727b24f2fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 11 Nov 2019 12:30:25 +0100 Subject: [PATCH 139/208] Also run tests from the documentation --- docs/conftest.py | 16 ++++++++++++++++ tests/requirements.txt | 3 ++- tox.ini | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 docs/conftest.py diff --git a/docs/conftest.py b/docs/conftest.py new file mode 100644 index 0000000..a98f9e5 --- /dev/null +++ b/docs/conftest.py @@ -0,0 +1,16 @@ +from doctest import ELLIPSIS, NORMALIZE_WHITESPACE + +from sybil import Sybil +from sybil.parsers.codeblock import CodeBlockParser +from sybil.parsers.doctest import DocTestParser +from sybil.parsers.skip import skip + + +pytest_collect_file = Sybil( + parsers=[ + DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE), + CodeBlockParser(future_imports=['print_function']), + skip, + ], + pattern='*.rst', +).pytest() diff --git a/tests/requirements.txt b/tests/requirements.txt index 5232e84..00f8f94 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,4 +2,5 @@ codecov lxml;python_version!="3.4" lxml<=4.3.5;python_version=="3.4" pytest -pytest-cov \ No newline at end of file +pytest-cov +sybil \ No newline at end of file diff --git a/tox.ini b/tox.ini index 32136a0..ad6780d 100644 --- a/tox.ini +++ b/tox.ini @@ -6,4 +6,4 @@ deps= -r tests/requirements.txt commands = - py.test --cov-report term --cov=cssselect + py.test --cov-report term --cov=cssselect docs tests From 05c0e76dc68ac0a62ef4ba47c1e1ace855053a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 14 Nov 2019 12:24:42 +0100 Subject: [PATCH 140/208] Add bandit to CI --- .bandit.yml | 2 ++ .travis.yml | 15 +++++++++------ tox.ini | 6 ++++++ 3 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 .bandit.yml diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 0000000..7fcde04 --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,2 @@ +skips: +- B101 diff --git a/.travis.yml b/.travis.yml index 69ecf93..dbf5885 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,13 @@ language: python -python: -- '2.7' -- '3.4' -- '3.5' -- '3.6' -- '3.7' +matrix: + include: + - env: TOXENV=security + python: 3.8 + - python: 2.7 + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: 3.7 install: - pip install -r tests/requirements.txt -e . script: diff --git a/tox.ini b/tox.ini index 32136a0..4db8e7c 100644 --- a/tox.ini +++ b/tox.ini @@ -7,3 +7,9 @@ deps= commands = py.test --cov-report term --cov=cssselect + +[testenv:security] +deps = + bandit +commands = + bandit -r -c .bandit.yml {posargs:cssselect} From b26932d66cd629dbe491b31f2ac5462d2311db14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Sun, 11 Oct 2020 20:12:40 +0200 Subject: [PATCH 141/208] Fix class reference (#110) --- cssselect/xpath.py | 2 +- tests/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index db50c77..eb8be92 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -180,7 +180,7 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): This string is prepended to the XPath expression for each selector. The default makes selectors scoped to the context node’s subtree. :raises: - :class:`SelectorSyntaxError` on invalid selectors, + :class:`~cssselect.SelectorSyntaxError` on invalid selectors, :class:`ExpressionError` on unknown/unsupported selectors, including pseudo-elements. :returns: diff --git a/tests/requirements.txt b/tests/requirements.txt index 5232e84..000d5f2 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,5 +1,5 @@ codecov lxml;python_version!="3.4" lxml<=4.3.5;python_version=="3.4" -pytest +pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4 pytest-cov \ No newline at end of file From 163404122e5a05afe71dba59d808d7afd9726344 Mon Sep 17 00:00:00 2001 From: KOLANICH Date: Sat, 20 Feb 2021 17:20:36 +0300 Subject: [PATCH 142/208] Added .editorconfig according to PEP 8 --- .editorconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..38558bf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 4 +insert_final_newline = true +end_of_line = lf + +[*.{yml,yaml}] +indent_size = 2 From 1f643a84d651ebd3075c2f61e30f46bf90451b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 5 Mar 2021 16:50:19 +0100 Subject: [PATCH 143/208] =?UTF-8?q?Travis=20CI=20=E2=86=92=20GitHub=20Acti?= =?UTF-8?q?ons?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/publish.yml | 31 ++++++++++++++++++++++++++ .github/workflows/tests.yml | 41 +++++++++++++++++++++++++++++++++++ .travis.yml | 24 -------------------- tox.ini | 2 +- 4 files changed, 73 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/tests.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..7c0f8d0 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,31 @@ +name: Publish +on: [push] + +jobs: + publish: + runs-on: ubuntu-latest + if: startsWith(github.event.ref, 'refs/tags/') + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3 + + - name: Check Tag + id: check-release-tag + run: | + if [[ ${{ github.event.ref }} =~ ^refs/tags/[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then + echo ::set-output name=release_tag::true + fi + + - name: Publish to PyPI + if: steps.check-release-tag.outputs.release_tag == 'true' + run: | + pip install --upgrade setuptools wheel twine + python setup.py sdist bdist_wheel + export TWINE_USERNAME=__token__ + export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} + twine upload dist/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..1a0cf65 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,41 @@ +name: Tests +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - python-version: 2.7 + env: + TOXENV: py + - python-version: 3.4 + env: + TOXENV: py + - python-version: 3.5 + env: + TOXENV: py + - python-version: 3.6 + env: + TOXENV: py + - python-version: 3.7 + env: + TOXENV: py + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + env: ${{ matrix.env }} + run: | + pip install -U tox + tox + + - name: Upload coverage report + run: bash <(curl -s https://codecov.io/bash) \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 69ecf93..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: python -python: -- '2.7' -- '3.4' -- '3.5' -- '3.6' -- '3.7' -install: -- pip install -r tests/requirements.txt -e . -script: -- py.test --cov-report term --cov=cssselect -after_success: -- codecov -deploy: - provider: pypi - distributions: sdist bdist_wheel - skip_upload_docs: true - user: scrapy - password: - secure: UjCXD1ZfqgFcCs4ciPMJDaOQefV3ZOKZ8/dTZxcoaQlE1lr6CkaN6CfTdD50SX2M9uCNWvEcYnvs6U4SizgZ27MYzFWuHonED2alHKy4AtrxCEHD/+lGo9d18cNjLMPDZateX/lITjGiZ4rmYZNuA6wmA4P/bTmdazbSufcmMqY= - on: - tags: true - repo: scrapy/cssselect - condition: "$TRAVIS_PYTHON_VERSION == '3.7'" diff --git a/tox.ini b/tox.ini index 32136a0..4fb1d7c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27, py34, py35, py36, py37 +envlist = py [testenv] deps= From b9506ce52a622b001d965c99e10d8deaf25e8bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 5 Mar 2021 16:51:31 +0100 Subject: [PATCH 144/208] Remove end-of-life Python 3.4 from CI --- .github/workflows/tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1a0cf65..817d824 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,9 +10,6 @@ jobs: - python-version: 2.7 env: TOXENV: py - - python-version: 3.4 - env: - TOXENV: py - python-version: 3.5 env: TOXENV: py From a2e2894bd79457fed402b91440b63f0b28692b02 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Wed, 23 Jun 2021 10:02:12 -0300 Subject: [PATCH 145/208] Update CI badge --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 9bcd648..c6d387f 100644 --- a/README.rst +++ b/README.rst @@ -10,9 +10,9 @@ cssselect: CSS Selectors for Python :target: https://pypi.python.org/pypi/cssselect :alt: Supported Python Versions -.. image:: https://img.shields.io/travis/scrapy/cssselect/master.svg - :target: https://travis-ci.org/scrapy/cssselect - :alt: Build Status +.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg + :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml + :alt: Tests .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg :target: https://codecov.io/github/scrapy/cssselect?branch=master From 4bf687a167e5abd1e50f65b1749baa7634767665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jun 2021 15:43:46 +0200 Subject: [PATCH 146/208] Add flake8 (#105) --- .flake8 | 15 +++++++++++++++ .github/workflows/checks.yml | 3 +++ tox.ini | 12 +++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..89e6e07 --- /dev/null +++ b/.flake8 @@ -0,0 +1,15 @@ +[flake8] +max-line-length = 99 +ignore = W503 +exclude = + .git + .tox + venv* + + # pending revision + cssselect/__init__.py + cssselect/parser.py + cssselect/xpath.py + docs/conf.py + setup.py + tests/test_cssselect.py diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 99fff74..2f38d19 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -7,6 +7,9 @@ jobs: strategy: matrix: include: + - python-version: 3 + env: + TOXENV: flake8 - python-version: 3 env: TOXENV: security diff --git a/tox.ini b/tox.ini index eabac24..5ae98ce 100644 --- a/tox.ini +++ b/tox.ini @@ -1,15 +1,21 @@ [tox] -envlist = security,py +envlist = flake8,security,py [testenv] +basepython = python3 deps= -r tests/requirements.txt - commands = py.test --cov-report term --cov=cssselect +[testenv:flake8] +deps = + flake8==3.9.2 +commands = + flake8 {posargs: cssselect setup.py tests docs/conf.py} + [testenv:security] deps = bandit commands = - bandit -r -c .bandit.yml {posargs:cssselect} + bandit -r -c .bandit.yml {posargs: cssselect} From 5399d4194e14ad79247bc589cb777b5a547ac149 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Wed, 23 Jun 2021 11:21:22 -0300 Subject: [PATCH 147/208] Add black check --- .github/workflows/checks.yml | 3 +++ pyproject.toml | 3 +++ setup.py | 50 +++++++++++++++++++----------------- tox.ini | 8 +++++- 4 files changed, 39 insertions(+), 25 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 2f38d19..bb50590 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -7,6 +7,9 @@ jobs: strategy: matrix: include: + - python-version: 3 + env: + TOXENV: black - python-version: 3 env: TOXENV: flake8 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b409f47 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 99 +exclude = 'cssselect/|tests/' diff --git a/setup.py b/setup.py index de7128d..3782534 100644 --- a/setup.py +++ b/setup.py @@ -2,45 +2,47 @@ import re import os.path + try: from setuptools import setup - extra_kwargs = {'test_suite': 'cssselect.tests'} + + extra_kwargs = {"test_suite": "cssselect.tests"} except ImportError: from distutils.core import setup + extra_kwargs = {} ROOT = os.path.dirname(__file__) -README = open(os.path.join(ROOT, 'README.rst')).read() -INIT_PY = open(os.path.join(ROOT, 'cssselect', '__init__.py')).read() +README = open(os.path.join(ROOT, "README.rst")).read() +INIT_PY = open(os.path.join(ROOT, "cssselect", "__init__.py")).read() VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1) setup( - name='cssselect', + name="cssselect", version=VERSION, - author='Ian Bicking', - author_email='ianb@colorstudy.com', - maintainer='Paul Tremberth', - maintainer_email='paul.tremberth@gmail.com', - description= - 'cssselect parses CSS3 Selectors and translates them to XPath 1.0', + author="Ian Bicking", + author_email="ianb@colorstudy.com", + maintainer="Paul Tremberth", + maintainer_email="paul.tremberth@gmail.com", + description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", long_description=README, - url='https://github.com/scrapy/cssselect', - license='BSD', - packages=['cssselect'], - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', + url="https://github.com/scrapy/cssselect", + license="BSD", + packages=["cssselect"], + python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7' + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", ], **extra_kwargs ) diff --git a/tox.ini b/tox.ini index 5ae98ce..1d94302 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = flake8,security,py +envlist = black,flake8,security,py [testenv] basepython = python3 @@ -8,6 +8,12 @@ deps= commands = py.test --cov-report term --cov=cssselect +[testenv:black] +deps = + black==21.6b0 +commands = + black --check {posargs: cssselect setup.py tests} + [testenv:flake8] deps = flake8==3.9.2 From 79c341b15930b6c5ec811a4f7953719722148e3a Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com> Date: Thu, 24 Jun 2021 10:42:33 -0300 Subject: [PATCH 148/208] Remove support for py2, py34, py35 (#116) --- .github/workflows/tests.yml | 20 ++++---------------- .gitignore | 4 +++- README.rst | 2 +- setup.py | 10 ++++------ tests/requirements.txt | 5 ----- tox.ini | 11 +++++++---- 6 files changed, 19 insertions(+), 33 deletions(-) delete mode 100644 tests/requirements.txt diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 817d824..799f52f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,19 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - include: - - python-version: 2.7 - env: - TOXENV: py - - python-version: 3.5 - env: - TOXENV: py - - python-version: 3.6 - env: - TOXENV: py - - python-version: 3.7 - env: - TOXENV: py + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 @@ -29,10 +17,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run tests - env: ${{ matrix.env }} run: | + pip install -U pip pip install -U tox - tox + tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) \ No newline at end of file + run: bash <(curl -s https://codecov.io/bash) diff --git a/.gitignore b/.gitignore index b0ab86a..c276bd1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ /dist /docs/_build /.coverage -.idea \ No newline at end of file +.idea +htmlcov/ +coverage.xml diff --git a/README.rst b/README.rst index c6d387f..dfeedae 100644 --- a/README.rst +++ b/README.rst @@ -33,7 +33,7 @@ extracted as a stand-alone project. Quick facts: * Free software: BSD licensed -* Compatible with Python 2.7 and 3.4+ +* Compatible with Python 3.6+ * Latest documentation `on Read the Docs `_ * Source, issues and pull requests `on GitHub `_ diff --git a/setup.py b/setup.py index 3782534..bddda2e 100644 --- a/setup.py +++ b/setup.py @@ -31,18 +31,16 @@ url="https://github.com/scrapy/cssselect", license="BSD", packages=["cssselect"], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", + python_requires=">=3.6", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], - **extra_kwargs + **extra_kwargs, ) diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 000d5f2..0000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -codecov -lxml;python_version!="3.4" -lxml<=4.3.5;python_version=="3.4" -pytest >=4.6, <4.7 # 4.7 drops support for Python 2.7 and 3.4 -pytest-cov \ No newline at end of file diff --git a/tox.ini b/tox.ini index 8cbafdf..a9d39b8 100644 --- a/tox.ini +++ b/tox.ini @@ -2,11 +2,14 @@ envlist = black,flake8,security,py [testenv] -basepython = python3 -deps= - -r tests/requirements.txt +deps = + lxml>=4.4 + pytest-cov>=2.8 + pytest>=5.4 commands = - py.test --cov-report term --cov=cssselect + pytest --cov=cssselect \ + --cov-report=term-missing --cov-report=html --cov-report=xml \ + --verbose {posargs: cssselect tests} [testenv:black] deps = From 7bc326df9ceda7073c75f1cb636183daf38694cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 30 Jun 2021 14:05:18 +0200 Subject: [PATCH 149/208] Simplify the README.rst file (#103) --- README.rst | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index dfeedae..9708616 100644 --- a/README.rst +++ b/README.rst @@ -18,24 +18,22 @@ cssselect: CSS Selectors for Python :target: https://codecov.io/github/scrapy/cssselect?branch=master :alt: Coverage report -*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_ -expressions. Such expressions can be used in lxml_ or another XPath engine -to find the matching elements in an XML or HTML document. +**cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and +translate them to `XPath 1.0`_ expressions. -This module used to live inside of lxml as ``lxml.cssselect`` before it was -extracted as a stand-alone project. - -.. _CSS3 Selectors: https://www.w3.org/TR/css3-selectors/ -.. _XPath 1.0: https://www.w3.org/TR/xpath/ -.. _lxml: http://lxml.de/ +`XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find +the matching elements in an XML or HTML document. +Find the cssselect online documentation at https://cssselect.readthedocs.io. Quick facts: -* Free software: BSD licensed -* Compatible with Python 3.6+ -* Latest documentation `on Read the Docs `_ * Source, issues and pull requests `on GitHub `_ -* Releases `on PyPI `_ +* Releases `on PyPI `_ * Install with ``pip install cssselect`` + + +.. _CSS3 selectors: https://www.w3.org/TR/selectors-3/ +.. _XPath 1.0: https://www.w3.org/TR/xpath/all/ +.. _lxml: https://lxml.de/ From 577ca9c1c8f0286b7f34d5bee8192eed6219b677 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Wed, 7 Jul 2021 07:58:03 -0300 Subject: [PATCH 150/208] Add pylint to tox's envlist --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a9d39b8..f260626 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = black,flake8,security,py +envlist = black,flake8,pylint,security,py [testenv] deps = From 599cbb50866ba4ea4211426a1d30de32e48df5a5 Mon Sep 17 00:00:00 2001 From: Julius Kibunjia Date: Wed, 14 Jul 2021 11:19:34 +0300 Subject: [PATCH 151/208] Add matches-any pseudo-class: ':is()' (#109) --- cssselect/parser.py | 51 +++++++++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 13 +++++++++-- tests/test_cssselect.py | 19 +++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..5494bd4 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -250,6 +250,30 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Matching(object): + """ + Represents selector:is(selector_list) + """ + def __init__(self, selector, selector_list): + self.selector = selector + self.selector_list = selector_list + + def __repr__(self): + return '%s[%r:is(%s)]' % ( + self.__class__.__name__, self.selector, ", ".join( + map(repr, self.selector_list))) + + def canonical(self): + selector_arguments = [] + for s in self.selector_list: + selarg = s.canonical() + selector_arguments.append(selarg.lstrip('*')) + return '%s:is(%s)' % (self.selector.canonical(), + ", ".join(map(str, selector_arguments))) + + def specificity(self): + return max([x.specificity() for x in self.selector_list]) + class Attrib(object): """ Represents selector[namespace|attrib operator value] @@ -432,6 +456,7 @@ def parse_selector_group(stream): else: break + def parse_selector(stream): result, pseudo_element = parse_simple_selector(stream) while 1: @@ -538,6 +563,9 @@ def parse_simple_selector(stream, inside_negation=False): if next != ('DELIM', ')'): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() in ('matches', 'is'): + selectors = parse_simple_selector_arguments(stream) + result = Matching(result, selectors) else: result = Function(result, ident, parse_arguments(stream)) else: @@ -564,6 +592,29 @@ def parse_arguments(stream): "Expected an argument, got %s" % (next,)) +def parse_simple_selector_arguments(stream): + arguments = [] + while 1: + result, pseudo_element = parse_simple_selector(stream, True) + if pseudo_element: + raise SelectorSyntaxError( + 'Got pseudo-element ::%s inside function' + % (pseudo_element, )) + stream.skip_whitespace() + next = stream.next() + if next in (('EOF', None), ('DELIM', ',')): + stream.next() + stream.skip_whitespace() + arguments.append(result) + elif next == ('DELIM', ')'): + arguments.append(result) + break + else: + raise SelectorSyntaxError( + "Expected an argument, got %s" % (next,)) + return arguments + + def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a8722bb..db44d42 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -54,9 +54,9 @@ def __str__(self): def __repr__(self): return '%s[%s]' % (self.__class__.__name__, self) - def add_condition(self, condition): + def add_condition(self, condition, conjuction='and'): if self.condition: - self.condition = '(%s) and (%s)' % (self.condition, condition) + self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition) else: self.condition = condition return self @@ -272,6 +272,15 @@ def xpath_negation(self, negation): else: return xpath.add_condition('0') + def xpath_matching(self, matching): + xpath = self.xpath(matching.selector) + exprs = [self.xpath(selector) for selector in matching.selector_list] + for e in exprs: + e.add_name_test() + if e.condition: + xpath.add_condition(e.condition, 'or') + return xpath + def xpath_function(self, function): """Translate a functional pseudo-class.""" method = 'xpath_%s_function' % function.name.replace('-', '_') diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index d6969f2..bd37875 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -145,6 +145,10 @@ def parse_many(first, *others): 'Hash[Element[div]#foobar]'] assert parse_many('div:not(div.foo)') == [ 'Negation[Element[div]:not(Class[Element[div].foo])]'] + assert parse_many('div:is(.foo, #bar)') == [ + 'Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]'] + assert parse_many(':is(:hover, :visited)') == [ + 'Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]'] assert parse_many('td ~ th') == [ 'CombinedSelector[Element[td] ~ Element[th]]'] assert parse_many(':scope > foo') == [ @@ -266,6 +270,9 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) + assert specificity(':is(.foo, #bar)') == (1, 0, 0) + assert specificity(':is(:hover, :visited)') == (0, 1, 0) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) assert specificity('foo::before') == (0, 0, 2) @@ -300,6 +307,8 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') + css2css(':is(#bar, .foo)') + css2css(':is(:focused, :visited)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') @@ -373,6 +382,10 @@ def get_error(css): "Got pseudo-element ::before inside :not() at 12") assert get_error(':not(:not(a))') == ( "Got nested :not()") + assert get_error(':is(:before)') == ( + "Got pseudo-element ::before inside function") + assert get_error(':is(a b)') == ( + "Expected an argument, got ") assert get_error(':scope > div :scope header') == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) @@ -863,6 +876,12 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + assert pcss(':is(#first-li, #second-li)') == [ + 'first-li', 'second-li'] + assert pcss('a:is(#name-anchor, #tag-anchor)') == [ + 'name-anchor', 'tag-anchor'] + assert pcss(':is(.c)') == [ + 'first-ol', 'third-li', 'fourth-li'] assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] # Invalid characters in XPath element names, should not crash From b06a7fcb4da29b150abd4bc7d642de2aa1d34db1 Mon Sep 17 00:00:00 2001 From: Pascal Corpet Date: Wed, 21 Jul 2021 23:32:06 +0200 Subject: [PATCH 152/208] Update to pylint 2.9.5 --- docs/conf.py | 4 ++-- setup.py | 6 ++++-- tox.ini | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index aa897ef..62b5202 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,8 +50,8 @@ # built documents. # # The full version, including alpha/beta/rc tags. -init_py = open(os.path.join(os.path.dirname(__file__), - '..', 'cssselect', '__init__.py')).read() +with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file: + init_py = init_file.read() release = re.search("VERSION = '([^']+)'", init_py).group(1) # The short X.Y version. version = release.rstrip('dev') diff --git a/setup.py b/setup.py index bddda2e..3a0bea0 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,10 @@ ROOT = os.path.dirname(__file__) -README = open(os.path.join(ROOT, "README.rst")).read() -INIT_PY = open(os.path.join(ROOT, "cssselect", "__init__.py")).read() +with open(os.path.join(ROOT, "README.rst")) as readme_file: + README = readme_file.read() +with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: + INIT_PY = init_file.read() VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1) diff --git a/tox.ini b/tox.ini index f260626..372ecb9 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==2.8.3 + pylint==2.9.5 commands = pylint {posargs: cssselect setup.py tests docs} From 9edc6c3f5cf558bd99fa9e584c6832fabe24b942 Mon Sep 17 00:00:00 2001 From: Pascal Corpet Date: Mon, 26 Jul 2021 15:55:37 +0200 Subject: [PATCH 153/208] Apply black formatting (#122) --- cssselect/__init__.py | 11 +- cssselect/parser.py | 407 ++++++------ cssselect/xpath.py | 327 +++++----- docs/conf.py | 2 +- pyproject.toml | 1 - setup.py | 2 +- tests/test_cssselect.py | 1295 +++++++++++++++++++-------------------- 7 files changed, 1028 insertions(+), 1017 deletions(-) diff --git a/cssselect/__init__.py b/cssselect/__init__.py index b41cef9..2e4f824 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -13,10 +13,15 @@ """ -from cssselect.parser import (parse, Selector, FunctionalPseudoElement, - SelectorError, SelectorSyntaxError) +from cssselect.parser import ( + parse, + Selector, + FunctionalPseudoElement, + SelectorError, + SelectorSyntaxError, +) from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError -VERSION = '1.1.0' +VERSION = "1.1.0" __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index 5494bd4..a27ece5 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -27,7 +27,7 @@ def ascii_lower(string): """Lower-case, but only in the ASCII range.""" - return string.encode('utf8').lower().decode('utf8') + return string.encode("utf8").lower().decode("utf8") class SelectorError(Exception): @@ -39,12 +39,14 @@ class SelectorError(Exception): """ + class SelectorSyntaxError(SelectorError, SyntaxError): """Parsing a selector that does not match the grammar.""" #### Parsed objects + class Selector(object): """ Represents a parsed selector. @@ -55,10 +57,10 @@ class Selector(object): or unsupported pseudo-elements. """ + def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree - if pseudo_element is not None and not isinstance( - pseudo_element, FunctionalPseudoElement): + if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): pseudo_element = ascii_lower(pseudo_element) #: A :class:`FunctionalPseudoElement`, #: or the identifier for the pseudo-element as a string, @@ -86,24 +88,22 @@ def __repr__(self): if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: - pseudo_element = '::%s' % self.pseudo_element + pseudo_element = "::%s" % self.pseudo_element else: - pseudo_element = '' - return '%s[%r%s]' % ( - self.__class__.__name__, self.parsed_tree, pseudo_element) + pseudo_element = "" + return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) def canonical(self): - """Return a CSS representation for this selector (a string) - """ + """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = '::%s' % self.pseudo_element.canonical() + pseudo_element = "::%s" % self.pseudo_element.canonical() elif self.pseudo_element: - pseudo_element = '::%s' % self.pseudo_element + pseudo_element = "::%s" % self.pseudo_element else: - pseudo_element = '' - res = '%s%s' % (self.parsed_tree.canonical(), pseudo_element) + pseudo_element = "" + res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) if len(res) > 1: - res = res.lstrip('*') + res = res.lstrip("*") return res def specificity(self): @@ -122,16 +122,16 @@ class Class(object): """ Represents selector.class_name """ + def __init__(self, selector, class_name): self.selector = selector self.class_name = class_name def __repr__(self): - return '%s[%r.%s]' % ( - self.__class__.__name__, self.selector, self.class_name) + return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) def canonical(self): - return '%s.%s' % (self.selector.canonical(), self.class_name) + return "%s.%s" % (self.selector.canonical(), self.class_name) def specificity(self): a, b, c = self.selector.specificity() @@ -156,21 +156,24 @@ class FunctionalPseudoElement(object): Use at your own risks. """ + def __init__(self, name, arguments): self.name = ascii_lower(name) self.arguments = arguments def __repr__(self): - return '%s[::%s(%r)]' % ( - self.__class__.__name__, self.name, - [token.value for token in self.arguments]) + return "%s[::%s(%r)]" % ( + self.__class__.__name__, + self.name, + [token.value for token in self.arguments], + ) def argument_types(self): return [token.type for token in self.arguments] def canonical(self): - args = ''.join(token.css() for token in self.arguments) - return '%s(%s)' % (self.name, args) + args = "".join(token.css() for token in self.arguments) + return "%s(%s)" % (self.name, args) def specificity(self): a, b, c = self.selector.specificity() @@ -182,22 +185,26 @@ class Function(object): """ Represents selector:name(expr) """ + def __init__(self, selector, name, arguments): self.selector = selector self.name = ascii_lower(name) self.arguments = arguments def __repr__(self): - return '%s[%r:%s(%r)]' % ( - self.__class__.__name__, self.selector, self.name, - [token.value for token in self.arguments]) + return "%s[%r:%s(%r)]" % ( + self.__class__.__name__, + self.selector, + self.name, + [token.value for token in self.arguments], + ) def argument_types(self): return [token.type for token in self.arguments] def canonical(self): - args = ''.join(token.css() for token in self.arguments) - return '%s:%s(%s)' % (self.selector.canonical(), self.name, args) + args = "".join(token.css() for token in self.arguments) + return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) def specificity(self): a, b, c = self.selector.specificity() @@ -209,16 +216,16 @@ class Pseudo(object): """ Represents selector:ident """ + def __init__(self, selector, ident): self.selector = selector self.ident = ascii_lower(ident) def __repr__(self): - return '%s[%r:%s]' % ( - self.__class__.__name__, self.selector, self.ident) + return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) def canonical(self): - return '%s:%s' % (self.selector.canonical(), self.ident) + return "%s:%s" % (self.selector.canonical(), self.ident) def specificity(self): a, b, c = self.selector.specificity() @@ -230,19 +237,19 @@ class Negation(object): """ Represents selector:not(subselector) """ + def __init__(self, selector, subselector): self.selector = selector self.subselector = subselector def __repr__(self): - return '%s[%r:not(%r)]' % ( - self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s:not(%s)' % (self.selector.canonical(), subsel) + subsel = subsel.lstrip("*") + return "%s:not(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -254,30 +261,34 @@ class Matching(object): """ Represents selector:is(selector_list) """ + def __init__(self, selector, selector_list): self.selector = selector self.selector_list = selector_list def __repr__(self): - return '%s[%r:is(%s)]' % ( - self.__class__.__name__, self.selector, ", ".join( - map(repr, self.selector_list))) + return "%s[%r:is(%s)]" % ( + self.__class__.__name__, + self.selector, + ", ".join(map(repr, self.selector_list)), + ) def canonical(self): selector_arguments = [] for s in self.selector_list: selarg = s.canonical() - selector_arguments.append(selarg.lstrip('*')) - return '%s:is(%s)' % (self.selector.canonical(), - ", ".join(map(str, selector_arguments))) + selector_arguments.append(selarg.lstrip("*")) + return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) def specificity(self): return max([x.specificity() for x in self.selector_list]) + class Attrib(object): """ Represents selector[namespace|attrib operator value] """ + def __init__(self, selector, namespace, attrib, operator, value): self.selector = selector self.namespace = namespace @@ -287,29 +298,32 @@ def __init__(self, selector, namespace, attrib, operator, value): def __repr__(self): if self.namespace: - attrib = '%s|%s' % (self.namespace, self.attrib) + attrib = "%s|%s" % (self.namespace, self.attrib) else: attrib = self.attrib - if self.operator == 'exists': - return '%s[%r[%s]]' % ( - self.__class__.__name__, self.selector, attrib) + if self.operator == "exists": + return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) else: - return '%s[%r[%s %s %r]]' % ( - self.__class__.__name__, self.selector, attrib, - self.operator, self.value.value) + return "%s[%r[%s %s %r]]" % ( + self.__class__.__name__, + self.selector, + attrib, + self.operator, + self.value.value, + ) def canonical(self): if self.namespace: - attrib = '%s|%s' % (self.namespace, self.attrib) + attrib = "%s|%s" % (self.namespace, self.attrib) else: attrib = self.attrib - if self.operator == 'exists': + if self.operator == "exists": op = attrib else: - op = '%s%s%s' % (attrib, self.operator, self.value.css()) + op = "%s%s%s" % (attrib, self.operator, self.value.css()) - return '%s[%s]' % (self.selector.canonical(), op) + return "%s[%s]" % (self.selector.canonical(), op) def specificity(self): a, b, c = self.selector.specificity() @@ -324,17 +338,18 @@ class Element(object): `None` is for the universal selector '*' """ + def __init__(self, namespace=None, element=None): self.namespace = namespace self.element = element def __repr__(self): - return '%s[%s]' % (self.__class__.__name__, self.canonical()) + return "%s[%s]" % (self.__class__.__name__, self.canonical()) def canonical(self): - element = self.element or '*' + element = self.element or "*" if self.namespace: - element = '%s|%s' % (self.namespace, element) + element = "%s|%s" % (self.namespace, element) return element def specificity(self): @@ -348,16 +363,16 @@ class Hash(object): """ Represents selector#id """ + def __init__(self, selector, id): self.selector = selector self.id = id def __repr__(self): - return '%s[%r#%s]' % ( - self.__class__.__name__, self.selector, self.id) + return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) def canonical(self): - return '%s#%s' % (self.selector.canonical(), self.id) + return "%s#%s" % (self.selector.canonical(), self.id) def specificity(self): a, b, c = self.selector.specificity() @@ -373,19 +388,17 @@ def __init__(self, selector, combinator, subselector): self.subselector = subselector def __repr__(self): - if self.combinator == ' ': - comb = '' + if self.combinator == " ": + comb = "" else: comb = self.combinator - return '%s[%r %s %r]' % ( - self.__class__.__name__, self.selector, comb, self.subselector) + return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s %s %s' % ( - self.selector.canonical(), self.combinator, subsel) + subsel = subsel.lstrip("*") + return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -396,14 +409,13 @@ def specificity(self): #### Parser # foo -_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$') +_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") # foo#bar or #bar -_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$') +_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") # foo.bar or .bar -_class_re = re.compile( - r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$') +_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") def parse(css): @@ -427,16 +439,16 @@ def parse(css): return [Selector(Element(element=match.group(1)))] match = _id_re.match(css) if match is not None: - return [Selector(Hash(Element(element=match.group(1) or None), - match.group(2)))] + return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] match = _class_re.match(css) if match is not None: - return [Selector(Class(Element(element=match.group(1) or None), - match.group(2)))] + return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))] stream = TokenStream(tokenize(css)) stream.source = css return list(parse_selector_group(stream)) + + # except SelectorSyntaxError: # e = sys.exc_info()[1] # message = "%s at %s -> %r" % ( @@ -450,7 +462,7 @@ def parse_selector_group(stream): stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) - if stream.peek() == ('DELIM', ','): + if stream.peek() == ("DELIM", ","): stream.next() stream.skip_whitespace() else: @@ -462,20 +474,20 @@ def parse_selector(stream): while 1: stream.skip_whitespace() peek = stream.peek() - if peek in (('EOF', None), ('DELIM', ',')): + if peek in (("EOF", None), ("DELIM", ",")): break if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s not at the end of a selector' - % pseudo_element) - if peek.is_delim('+', '>', '~'): + "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + ) + if peek.is_delim("+", ">", "~"): # A combinator combinator = stream.next().value stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended # at peek == ' ' - combinator = ' ' + combinator = " " next_selector, pseudo_element = parse_simple_selector(stream) result = CombinedSelector(result, combinator, next_selector) return result, pseudo_element @@ -485,13 +497,13 @@ def parse_simple_selector(stream, inside_negation=False): stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() - if peek.type == 'IDENT' or peek == ('DELIM', '*'): - if peek.type == 'IDENT': + if peek.type == "IDENT" or peek == ("DELIM", "*"): + if peek.type == "IDENT": namespace = stream.next().value else: stream.next() namespace = None - if stream.peek() == ('DELIM', '|'): + if stream.peek() == ("DELIM", "|"): stream.next() element = stream.next_ident_or_star() else: @@ -503,77 +515,82 @@ def parse_simple_selector(stream, inside_negation=False): pseudo_element = None while 1: peek = stream.peek() - if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or ( - inside_negation and peek == ('DELIM', ')')): + if ( + peek.type in ("S", "EOF") + or peek.is_delim(",", "+", ">", "~") + or (inside_negation and peek == ("DELIM", ")")) + ): break if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s not at the end of a selector' - % pseudo_element) - if peek.type == 'HASH': + "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + ) + if peek.type == "HASH": result = Hash(result, stream.next().value) - elif peek == ('DELIM', '.'): + elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) - elif peek == ('DELIM', '|'): + elif peek == ("DELIM", "|"): stream.next() result = Element(None, stream.next_ident()) - elif peek == ('DELIM', '['): + elif peek == ("DELIM", "["): stream.next() result = parse_attrib(result, stream) - elif peek == ('DELIM', ':'): + elif peek == ("DELIM", ":"): stream.next() - if stream.peek() == ('DELIM', ':'): + if stream.peek() == ("DELIM", ":"): stream.next() pseudo_element = stream.next_ident() - if stream.peek() == ('DELIM', '('): + if stream.peek() == ("DELIM", "("): stream.next() pseudo_element = FunctionalPseudoElement( - pseudo_element, parse_arguments(stream)) + pseudo_element, parse_arguments(stream) + ) continue ident = stream.next_ident() - if ident.lower() in ('first-line', 'first-letter', - 'before', 'after'): + if ident.lower() in ("first-line", "first-letter", "before", "after"): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. pseudo_element = _unicode(ident) continue - if stream.peek() != ('DELIM', '('): + if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if result.__repr__() == 'Pseudo[Element[*]:scope]': - if not (len(stream.used) == 2 or - (len(stream.used) == 3 - and stream.used[0].type == 'S')): + if result.__repr__() == "Pseudo[Element[*]:scope]": + if not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + ): raise SelectorSyntaxError( 'Got immediate child pseudo-element ":scope" ' - 'not at the start of a selector') + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() - if ident.lower() == 'not': + if ident.lower() == "not": if inside_negation: - raise SelectorSyntaxError('Got nested :not()') + raise SelectorSyntaxError("Got nested :not()") argument, argument_pseudo_element = parse_simple_selector( - stream, inside_negation=True) + stream, inside_negation=True + ) next = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s inside :not() at %s' - % (argument_pseudo_element, next.pos)) - if next != ('DELIM', ')'): + "Got pseudo-element ::%s inside :not() at %s" + % (argument_pseudo_element, next.pos) + ) + if next != ("DELIM", ")"): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) - elif ident.lower() in ('matches', 'is'): + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) else: result = Function(result, ident, parse_arguments(stream)) else: - raise SelectorSyntaxError( - "Expected selector, got %s" % (peek,)) + raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) if len(stream.used) == selector_start: - raise SelectorSyntaxError( - "Expected selector, got %s" % (stream.peek(),)) + raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) return result, pseudo_element @@ -582,14 +599,12 @@ def parse_arguments(stream): while 1: stream.skip_whitespace() next = stream.next() - if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ - ('DELIM', '+'), ('DELIM', '-')]: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]: arguments.append(next) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) def parse_simple_selector_arguments(stream): @@ -598,35 +613,33 @@ def parse_simple_selector_arguments(stream): result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s inside function' - % (pseudo_element, )) + "Got pseudo-element ::%s inside function" % (pseudo_element,) + ) stream.skip_whitespace() next = stream.next() - if next in (('EOF', None), ('DELIM', ',')): + if next in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) return arguments def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() - if attrib is None and stream.peek() != ('DELIM', '|'): - raise SelectorSyntaxError( - "Expected '|', got %s" % (stream.peek(),)) - if stream.peek() == ('DELIM', '|'): + if attrib is None and stream.peek() != ("DELIM", "|"): + raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) + if stream.peek() == ("DELIM", "|"): stream.next() - if stream.peek() == ('DELIM', '='): + if stream.peek() == ("DELIM", "="): namespace = None stream.next() - op = '|=' + op = "|=" else: namespace = attrib attrib = stream.next_ident() @@ -636,27 +649,23 @@ def parse_attrib(selector, stream): if op is None: stream.skip_whitespace() next = stream.next() - if next == ('DELIM', ']'): - return Attrib(selector, namespace, attrib, 'exists', None) - elif next == ('DELIM', '='): - op = '=' - elif next.is_delim('^', '$', '*', '~', '|', '!') and ( - stream.peek() == ('DELIM', '=')): - op = next.value + '=' + if next == ("DELIM", "]"): + return Attrib(selector, namespace, attrib, "exists", None) + elif next == ("DELIM", "="): + op = "=" + elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): + op = next.value + "=" stream.next() else: - raise SelectorSyntaxError( - "Operator expected, got %s" % (next,)) + raise SelectorSyntaxError("Operator expected, got %s" % (next,)) stream.skip_whitespace() value = stream.next() - if value.type not in ('IDENT', 'STRING'): - raise SelectorSyntaxError( - "Expected string or ident, got %s" % (value,)) + if value.type not in ("IDENT", "STRING"): + raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) stream.skip_whitespace() next = stream.next() - if next != ('DELIM', ']'): - raise SelectorSyntaxError( - "Expected ']', got %s" % (next,)) + if next != ("DELIM", "]"): + raise SelectorSyntaxError("Expected ']', got %s" % (next,)) return Attrib(selector, namespace, attrib, op, value) @@ -669,23 +678,23 @@ def parse_series(tokens): """ for token in tokens: - if token.type == 'STRING': - raise ValueError('String tokens not allowed in series.') - s = ''.join(token.value for token in tokens).strip() - if s == 'odd': + if token.type == "STRING": + raise ValueError("String tokens not allowed in series.") + s = "".join(token.value for token in tokens).strip() + if s == "odd": return 2, 1 - elif s == 'even': + elif s == "even": return 2, 0 - elif s == 'n': + elif s == "n": return 1, 0 - if 'n' not in s: + if "n" not in s: # Just b return 0, int(s) - a, b = s.split('n', 1) + a, b = s.split("n", 1) if not a: a = 1 - elif a == '-' or a == '+': - a = int(a+'1') + elif a == "-" or a == "+": + a = int(a + "1") else: a = int(a) if not b: @@ -697,6 +706,7 @@ def parse_series(tokens): #### Token objects + class Token(tuple): def __new__(cls, type_, value, pos): obj = tuple.__new__(cls, (type_, value)) @@ -707,13 +717,13 @@ def __repr__(self): return "<%s '%s' at %i>" % (self.type, self.value, self.pos) def is_delim(self, *values): - return self.type == 'DELIM' and self.value in values + return self.type == "DELIM" and self.value in values type = property(operator.itemgetter(0)) value = property(operator.itemgetter(1)) def css(self): - if self.type == 'STRING': + if self.type == "STRING": return repr(self.value) else: return self.value @@ -721,41 +731,44 @@ def css(self): class EOFToken(Token): def __new__(cls, pos): - return Token.__new__(cls, 'EOF', None, pos) + return Token.__new__(cls, "EOF", None, pos) def __repr__(self): - return '<%s at %i>' % (self.type, self.pos) + return "<%s at %i>" % (self.type, self.pos) #### Tokenizer class TokenMacros: - unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?' - escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]' - string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape - nonascii = r'[^\0-\177]' - nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii) - nmstart = '[_a-z]|%s|%s' % (escape, nonascii) + unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" + escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" + string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape + nonascii = r"[^\0-\177]" + nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) + nmstart = "[_a-z]|%s|%s" % (escape, nonascii) + def _compile(pattern): return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match -_match_whitespace = _compile(r'[ \t\r\n\f]+') -_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') -_match_hash = _compile('#(?:%(nmchar)s)+') -_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') + +_match_whitespace = _compile(r"[ \t\r\n\f]+") +_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") +_match_hash = _compile("#(?:%(nmchar)s)+") +_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") _match_string_by_quote = { "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), } -_sub_simple_escape = re.compile(r'\\(.)').sub +_sub_simple_escape = re.compile(r"\\(.)").sub _sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub -_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub +_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython -_replace_simple = operator.methodcaller('group', 1) +_replace_simple = operator.methodcaller("group", 1) + def _replace_unicode(match): codepoint = int(match.group(1), 16) @@ -776,59 +789,62 @@ def tokenize(s): while pos < len_s: match = _match_whitespace(s, pos=pos) if match: - yield Token('S', ' ', pos) + yield Token("S", " ", pos) pos = match.end() continue match = _match_ident(s, pos=pos) if match: - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, match.group())) - yield Token('IDENT', value, pos) + value = _sub_simple_escape( + _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) + ) + yield Token("IDENT", value, pos) pos = match.end() continue match = _match_hash(s, pos=pos) if match: - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, match.group()[1:])) - yield Token('HASH', value, pos) + value = _sub_simple_escape( + _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:]) + ) + yield Token("HASH", value, pos) pos = match.end() continue quote = s[pos] if quote in _match_string_by_quote: match = _match_string_by_quote[quote](s, pos=pos + 1) - assert match, 'Should have found at least an empty match' + assert match, "Should have found at least an empty match" end_pos = match.end() if end_pos == len_s: - raise SelectorSyntaxError('Unclosed string at %s' % pos) + raise SelectorSyntaxError("Unclosed string at %s" % pos) if s[end_pos] != quote: - raise SelectorSyntaxError('Invalid string at %s' % pos) - value = _sub_simple_escape(_replace_simple, - _sub_unicode_escape(_replace_unicode, - _sub_newline_escape('', match.group()))) - yield Token('STRING', value, pos) + raise SelectorSyntaxError("Invalid string at %s" % pos) + value = _sub_simple_escape( + _replace_simple, + _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())), + ) + yield Token("STRING", value, pos) pos = end_pos + 1 continue match = _match_number(s, pos=pos) if match: value = match.group() - yield Token('NUMBER', value, pos) + yield Token("NUMBER", value, pos) pos = match.end() continue pos2 = pos + 2 - if s[pos:pos2] == '/*': - pos = s.find('*/', pos2) + if s[pos:pos2] == "/*": + pos = s.find("*/", pos2) if pos == -1: pos = len_s else: pos += 2 continue - yield Token('DELIM', s[pos], pos) + yield Token("DELIM", s[pos], pos) pos += 1 assert pos == len_s @@ -866,21 +882,20 @@ def peek(self): def next_ident(self): next = self.next() - if next.type != 'IDENT': - raise SelectorSyntaxError('Expected ident, got %s' % (next,)) + if next.type != "IDENT": + raise SelectorSyntaxError("Expected ident, got %s" % (next,)) return next.value def next_ident_or_star(self): next = self.next() - if next.type == 'IDENT': + if next.type == "IDENT": return next.value - elif next == ('DELIM', '*'): + elif next == ("DELIM", "*"): return None else: - raise SelectorSyntaxError( - "Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) def skip_whitespace(self): peek = self.peek() - if peek.type == 'S': + if peek.type == "S": self.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index db44d42..f80e629 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -28,7 +28,7 @@ def _unicode_safe_getattr(obj, name, default=None): # getattr() with a non-ASCII name fails on Python 2.x - name = name.encode('ascii', 'replace').decode('ascii') + name = name.encode("ascii", "replace").decode("ascii") return getattr(obj, name, default) @@ -38,48 +38,47 @@ class ExpressionError(SelectorError, RuntimeError): #### XPath Helpers -class XPathExpr(object): - def __init__(self, path='', element='*', condition='', star_prefix=False): +class XPathExpr(object): + def __init__(self, path="", element="*", condition="", star_prefix=False): self.path = path self.element = element self.condition = condition def __str__(self): - path = _unicode(self.path) + _unicode(self.element) + path = _unicode(self.path) + _unicode(self.element) if self.condition: - path += '[%s]' % self.condition + path += "[%s]" % self.condition return path def __repr__(self): - return '%s[%s]' % (self.__class__.__name__, self) + return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition, conjuction='and'): + def add_condition(self, condition, conjuction="and"): if self.condition: - self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition) + self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: self.condition = condition return self def add_name_test(self): - if self.element == '*': + if self.element == "*": # We weren't doing a test anyway return - self.add_condition( - "name() = %s" % GenericTranslator.xpath_literal(self.element)) - self.element = '*' + self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element)) + self.element = "*" def add_star_prefix(self): """ Append '*/' to the path to keep the context constrained to a single parent. """ - self.path += '*/' + self.path += "*/" def join(self, combiner, other): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. - if other.path != '*/': + if other.path != "*/": path += other.path self.path = path self.element = other.element @@ -92,14 +91,15 @@ def join(self, combiner, other): # The spec is actually more permissive than that, but don’t bother. # This is just for the fast path. # http://www.w3.org/TR/REC-xml/#NT-NameStartChar -is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match +is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match # Test that the string is not empty and does not contain whitespace -is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match +is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match #### Translation + class GenericTranslator(object): """ Translator for "generic" XML documents. @@ -122,30 +122,30 @@ class GenericTranslator(object): #### combinator_mapping = { - ' ': 'descendant', - '>': 'child', - '+': 'direct_adjacent', - '~': 'indirect_adjacent', + " ": "descendant", + ">": "child", + "+": "direct_adjacent", + "~": "indirect_adjacent", } attribute_operator_mapping = { - 'exists': 'exists', - '=': 'equals', - '~=': 'includes', - '|=': 'dashmatch', - '^=': 'prefixmatch', - '$=': 'suffixmatch', - '*=': 'substringmatch', - '!=': 'different', # XXX Not in Level 3 but meh + "exists": "exists", + "=": "equals", + "~=": "includes", + "|=": "dashmatch", + "^=": "prefixmatch", + "$=": "suffixmatch", + "*=": "substringmatch", + "!=": "different", # XXX Not in Level 3 but meh } #: The attribute used for ID selectors depends on the document language: #: http://www.w3.org/TR/selectors/#id-selectors - id_attribute = 'id' + id_attribute = "id" #: The attribute used for ``:lang()`` depends on the document language: #: http://www.w3.org/TR/selectors/#lang-pseudo - lang_attribute = 'xml:lang' + lang_attribute = "xml:lang" #: The case sensitivity of document language element names, #: attribute names, and attribute values in selectors depends @@ -168,7 +168,7 @@ class GenericTranslator(object): # class used to represent and xpath expression xpathexpr_cls = XPathExpr - def css_to_xpath(self, css, prefix='descendant-or-self::'): + def css_to_xpath(self, css, prefix="descendant-or-self::"): """Translate a *group of selectors* to XPath. Pseudo-elements are not supported here since XPath only knows @@ -187,12 +187,14 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'): The equivalent XPath 1.0 expression as an Unicode string. """ - return ' | '.join(self.selector_to_xpath(selector, prefix, - translate_pseudo_elements=True) - for selector in parse(css)) + return " | ".join( + self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) + for selector in parse(css) + ) - def selector_to_xpath(self, selector, prefix='descendant-or-self::', - translate_pseudo_elements=False): + def selector_to_xpath( + self, selector, prefix="descendant-or-self::", translate_pseudo_elements=False + ): """Translate a parsed selector to XPath. @@ -213,14 +215,14 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::', The equivalent XPath 1.0 expression as an Unicode string. """ - tree = getattr(selector, 'parsed_tree', None) + tree = getattr(selector, "parsed_tree", None) if not tree: - raise TypeError('Expected a parsed selector, got %r' % (selector,)) + raise TypeError("Expected a parsed selector, got %r" % (selector,)) xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) - return (prefix or '') + _unicode(xpath) + return (prefix or "") + _unicode(xpath) def xpath_pseudo_element(self, xpath, pseudo_element): """Translate a pseudo-element. @@ -229,7 +231,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): but can be overridden by sub-classes. """ - raise ExpressionError('Pseudo-elements are not supported.') + raise ExpressionError("Pseudo-elements are not supported.") @staticmethod def xpath_literal(s): @@ -239,38 +241,39 @@ def xpath_literal(s): elif '"' not in s: s = '"%s"' % s else: - s = "concat(%s)" % ','.join([ - (("'" in part) and '"%s"' or "'%s'") % part - for part in split_at_single_quotes(s) if part - ]) + s = "concat(%s)" % ",".join( + [ + (("'" in part) and '"%s"' or "'%s'") % part + for part in split_at_single_quotes(s) + if part + ] + ) return s def xpath(self, parsed_selector): """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, 'xpath_%s' % type_name.lower(), None) + method = getattr(self, "xpath_%s" % type_name.lower(), None) if method is None: - raise ExpressionError('%s is not supported.' % type_name) + raise ExpressionError("%s is not supported." % type_name) return method(parsed_selector) - # Dispatched by parsed object type def xpath_combinedselector(self, combined): """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, 'xpath_%s_combinator' % combinator) - return method(self.xpath(combined.selector), - self.xpath(combined.subselector)) + method = getattr(self, "xpath_%s_combinator" % combinator) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation): xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() if sub_xpath.condition: - return xpath.add_condition('not(%s)' % sub_xpath.condition) + return xpath.add_condition("not(%s)" % sub_xpath.condition) else: - return xpath.add_condition('0') + return xpath.add_condition("0") def xpath_matching(self, matching): xpath = self.xpath(matching.selector) @@ -278,45 +281,42 @@ def xpath_matching(self, matching): for e in exprs: e.add_name_test() if e.condition: - xpath.add_condition(e.condition, 'or') + xpath.add_condition(e.condition, "or") return xpath def xpath_function(self, function): """Translate a functional pseudo-class.""" - method = 'xpath_%s_function' % function.name.replace('-', '_') + method = "xpath_%s_function" % function.name.replace("-", "_") method = _unicode_safe_getattr(self, method, None) if not method: - raise ExpressionError( - "The pseudo-class :%s() is unknown" % function.name) + raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo): """Translate a pseudo-class.""" - method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_') + method = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") method = _unicode_safe_getattr(self, method, None) if not method: # TODO: better error message for pseudo-elements? - raise ExpressionError( - "The pseudo-class :%s is unknown" % pseudo.ident) + raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) return method(self.xpath(pseudo.selector)) - def xpath_attrib(self, selector): """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, 'xpath_attrib_%s' % operator) + method = getattr(self, "xpath_attrib_%s" % operator) if self.lower_case_attribute_names: name = selector.attrib.lower() else: name = selector.attrib safe = is_safe_name(name) if selector.namespace: - name = '%s:%s' % (selector.namespace, name) + name = "%s:%s" % (selector.namespace, name) safe = safe and is_safe_name(selector.namespace) if safe: - attrib = '@' + name + attrib = "@" + name else: - attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name) + attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) if selector.value is None: value = None elif self.lower_case_attribute_values: @@ -329,19 +329,18 @@ def xpath_class(self, class_selector): """Translate a class selector.""" # .foo is defined as [class~=foo] in the spec. xpath = self.xpath(class_selector.selector) - return self.xpath_attrib_includes( - xpath, '@class', class_selector.class_name) + return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name) def xpath_hash(self, id_selector): """Translate an ID selector.""" xpath = self.xpath(id_selector.selector) - return self.xpath_attrib_equals(xpath, '@id', id_selector.id) + return self.xpath_attrib_equals(xpath, "@id", id_selector.id) def xpath_element(self, selector): """Translate a type or universal selector.""" element = selector.element if not element: - element = '*' + element = "*" safe = True else: safe = is_safe_name(element) @@ -350,39 +349,36 @@ def xpath_element(self, selector): if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes - element = '%s:%s' % (selector.namespace, element) + element = "%s:%s" % (selector.namespace, element) safe = safe and is_safe_name(selector.namespace) xpath = self.xpathexpr_cls(element=element) if not safe: xpath.add_name_test() return xpath - # CombinedSelector: dispatch by combinator def xpath_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left""" - return left.join('/descendant-or-self::*/', right) + return left.join("/descendant-or-self::*/", right) def xpath_child_combinator(self, left, right): """right is an immediate child of left""" - return left.join('/', right) + return left.join("/", right) def xpath_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left""" - xpath = left.join('/following-sibling::', right) + xpath = left.join("/following-sibling::", right) xpath.add_name_test() - return xpath.add_condition('position() = 1') + return xpath.add_condition("position() = 1") def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" - return left.join('/following-sibling::', right) - + return left.join("/following-sibling::", right) # Function: dispatch by function/pseudo-class name - def xpath_nth_child_function(self, xpath, function, last=False, - add_name_test=True): + def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): try: a, b = parse_series(function.arguments) except ValueError: @@ -436,35 +432,35 @@ def xpath_nth_child_function(self, xpath, function, last=False, # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, # and since n ∈ {0, 1, 2, ...}, if b-1<=0, # there is always an "n" matching any number of siblings (maybe none) - if a == 1 and b_min_1 <=0: + if a == 1 and b_min_1 <= 0: return xpath # early-exit condition 2: # ~~~~~~~~~~~~~~~~~~~~~~~ # an+b-1 siblings with a<0 and (b-1)<0 is not possible if a < 0 and b_min_1 < 0: - return xpath.add_condition('0') + return xpath.add_condition("0") # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) if add_name_test: - nodetest = '*' + nodetest = "*" else: - nodetest = '%s' % xpath.element + nodetest = "%s" % xpath.element # count siblings before or after the element if not last: - siblings_count = 'count(preceding-sibling::%s)' % nodetest + siblings_count = "count(preceding-sibling::%s)" % nodetest else: - siblings_count = 'count(following-sibling::%s)' % nodetest + siblings_count = "count(following-sibling::%s)" % nodetest # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition('%s = %s' % (siblings_count, b_min_1)) + return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) expressions = [] @@ -473,12 +469,12 @@ def xpath_nth_child_function(self, xpath, function, last=False, # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expressions.append('%s >= %s' % (siblings_count, b_min_1)) + expressions.append("%s >= %s" % (siblings_count, b_min_1)) else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expressions.append('%s <= %s' % (siblings_count, b_min_1)) + expressions.append("%s <= %s" % (siblings_count, b_min_1)) # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -501,56 +497,48 @@ def xpath_nth_child_function(self, xpath, function, last=False, b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg = '+%s' % b_neg - left = '(%s %s)' % (left, b_neg) + b_neg = "+%s" % b_neg + left = "(%s %s)" % (left, b_neg) - expressions.append('%s mod %s = 0' % (left, a)) + expressions.append("%s mod %s = 0" % (left, a)) if len(expressions) > 1: - template = '(%s)' + template = "(%s)" else: - template = '%s' - xpath.add_condition(' and '.join(template % expression - for expression in expressions)) + template = "%s" + xpath.add_condition(" and ".join(template % expression for expression in expressions)) return xpath def xpath_nth_last_child_function(self, xpath, function): return self.xpath_nth_child_function(xpath, function, last=True) def xpath_nth_of_type_function(self, xpath, function): - if xpath.element == '*': - raise ExpressionError( - "*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, - add_name_test=False) + if xpath.element == "*": + raise ExpressionError("*:nth-of-type() is not implemented") + return self.xpath_nth_child_function(xpath, function, add_name_test=False) def xpath_nth_last_of_type_function(self, xpath, function): - if xpath.element == '*': - raise ExpressionError( - "*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, last=True, - add_name_test=False) + if xpath.element == "*": + raise ExpressionError("*:nth-of-type() is not implemented") + return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False) def xpath_contains_function(self, xpath, function): # Defined there, removed in later drafts: # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" - % function.arguments) + "Expected a single string or ident for :contains(), got %r" % function.arguments + ) value = function.arguments[0].value - return xpath.add_condition( - 'contains(., %s)' % self.xpath_literal(value)) + return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) def xpath_lang_function(self, xpath, function): - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments) + "Expected a single string or ident for :lang(), got %r" % function.arguments + ) value = function.arguments[0].value - return xpath.add_condition( - "lang(%s)" % (self.xpath_literal(value))) - + return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name @@ -566,31 +554,28 @@ def xpath_scope_pseudo(self, xpath): return xpath.add_condition("1") def xpath_first_child_pseudo(self, xpath): - return xpath.add_condition('count(preceding-sibling::*) = 0') + return xpath.add_condition("count(preceding-sibling::*) = 0") def xpath_last_child_pseudo(self, xpath): - return xpath.add_condition('count(following-sibling::*) = 0') + return xpath.add_condition("count(following-sibling::*) = 0") def xpath_first_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:first-of-type is not implemented") - return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:first-of-type is not implemented") + return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) def xpath_last_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:last-of-type is not implemented") - return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:last-of-type is not implemented") + return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) def xpath_only_child_pseudo(self, xpath): - return xpath.add_condition('count(parent::*/child::*) = 1') + return xpath.add_condition("count(parent::*/child::*) = 1") def xpath_only_of_type_pseudo(self, xpath): - if xpath.element == '*': - raise ExpressionError( - "*:only-of-type is not implemented") - return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element) + if xpath.element == "*": + raise ExpressionError("*:only-of-type is not implemented") + return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) def xpath_empty_pseudo(self, xpath): return xpath.add_condition("not(*) and not(string-length())") @@ -617,61 +602,63 @@ def xpath_attrib_exists(self, xpath, name, value): return xpath def xpath_attrib_equals(self, xpath, name, value): - xpath.add_condition('%s = %s' % (name, self.xpath_literal(value))) + xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_different(self, xpath, name, value): # FIXME: this seems like a weird hack... if value: - xpath.add_condition('not(%s) or %s != %s' - % (name, name, self.xpath_literal(value))) + xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) else: - xpath.add_condition('%s != %s' - % (name, self.xpath_literal(value))) + xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_includes(self, xpath, name, value): if is_non_whitespace(value): xpath.add_condition( "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(' '+value+' '))) + % (name, name, self.xpath_literal(" " + value + " ")) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_dashmatch(self, xpath, name, value): # Weird, but true... - xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % ( - name, - name, self.xpath_literal(value), - name, self.xpath_literal(value + '-'))) + xpath.add_condition( + "%s and (%s = %s or starts-with(%s, %s))" + % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-")) + ) return xpath def xpath_attrib_prefixmatch(self, xpath, name, value): if value: - xpath.add_condition('%s and starts-with(%s, %s)' % ( - name, name, self.xpath_literal(value))) + xpath.add_condition( + "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_suffixmatch(self, xpath, name, value): if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( - '%s and substring(%s, string-length(%s)-%s) = %s' - % (name, name, name, len(value)-1, self.xpath_literal(value))) + "%s and substring(%s, string-length(%s)-%s) = %s" + % (name, name, name, len(value) - 1, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath def xpath_attrib_substringmatch(self, xpath, name, value): if value: # Attribute selectors are case sensitive - xpath.add_condition('%s and contains(%s, %s)' % ( - name, name, self.xpath_literal(value))) + xpath.add_condition( + "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) + ) else: - xpath.add_condition('0') + xpath.add_condition("0") return xpath @@ -692,7 +679,7 @@ class HTMLTranslator(GenericTranslator): """ - lang_attribute = 'lang' + lang_attribute = "lang" def __init__(self, xhtml=False): self.xhtml = xhtml # Might be useful for sub-classes? @@ -706,33 +693,36 @@ def xpath_checked_pseudo(self, xpath): return xpath.add_condition( "(@selected and name(.) = 'option') or " "(@checked " - "and (name(.) = 'input' or name(.) = 'command')" - "and (@type = 'checkbox' or @type = 'radio'))") + "and (name(.) = 'input' or name(.) = 'command')" + "and (@type = 'checkbox' or @type = 'radio'))" + ) def xpath_lang_function(self, xpath, function): - if function.argument_types() not in (['STRING'], ['IDENT']): + if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments) + "Expected a single string or ident for :lang(), got %r" % function.arguments + ) value = function.arguments[0].value return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" - # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " - "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + '-'))) + # XPath 1.0 has no lower-case function... + "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + "'abcdefghijklmnopqrstuvwxyz'), " + "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + ) def xpath_link_pseudo(self, xpath): - return xpath.add_condition("@href and " - "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") + return xpath.add_condition( + "@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" + ) # Links are never visited, the implementation for :visited is the same # as in GenericTranslator def xpath_disabled_pseudo(self, xpath): # http://www.w3.org/TR/html5/section-index.html#attributes-1 - return xpath.add_condition(''' + return xpath.add_condition( + """ ( @disabled and ( @@ -754,13 +744,15 @@ def xpath_disabled_pseudo(self, xpath): ) and ancestor::fieldset[@disabled] ) - ''') + """ + ) # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." def xpath_enabled_pseudo(self, xpath): # http://www.w3.org/TR/html5/section-index.html#attributes-1 - return xpath.add_condition(''' + return xpath.add_condition( + """ ( @href and ( name(.) = 'a' or @@ -788,7 +780,8 @@ def xpath_enabled_pseudo(self, xpath): @disabled or ancestor::optgroup[@disabled] ) ) - ''') + """ + ) # FIXME: ... or "li elements that are children of menu elements, # and that have a child element that defines a command, if the first # such element's Disabled State facet is false (not disabled)". diff --git a/docs/conf.py b/docs/conf.py index 62b5202..9dc2575 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,7 +52,7 @@ # The full version, including alpha/beta/rc tags. with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file: init_py = init_file.read() -release = re.search("VERSION = '([^']+)'", init_py).group(1) +release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. version = release.rstrip('dev') diff --git a/pyproject.toml b/pyproject.toml index b409f47..57a5583 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,2 @@ [tool.black] line-length = 99 -exclude = 'cssselect/|tests/' diff --git a/setup.py b/setup.py index 3a0bea0..f95721d 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ README = readme_file.read() with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: INIT_PY = init_file.read() -VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1) +VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) setup( diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index bd37875..ba46d8a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -21,17 +21,23 @@ import unittest from lxml import etree, html -from cssselect import (parse, GenericTranslator, HTMLTranslator, - SelectorSyntaxError, ExpressionError) -from cssselect.parser import (tokenize, parse_series, _unicode, - FunctionalPseudoElement) +from cssselect import ( + parse, + GenericTranslator, + HTMLTranslator, + SelectorSyntaxError, + ExpressionError, +) +from cssselect.parser import tokenize, parse_series, _unicode, FunctionalPseudoElement from cssselect.xpath import _unicode_safe_getattr, XPathExpr if sys.version_info[0] < 3: # Python 2 def u(text): - return text.decode('utf8') + return text.decode("utf8") + + else: # Python 3 def u(text): @@ -41,8 +47,8 @@ def u(text): class TestCssselect(unittest.TestCase): def test_tokenizer(self): tokens = [ - _unicode(item) for item in tokenize( - u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))] + _unicode(item) for item in tokenize(u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')) + ] assert tokens == [ u(""), "", @@ -69,8 +75,7 @@ def repr_parse(css): selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [repr(selector.parsed_tree).replace("(u'", "('") - for selector in selectors] + return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] def parse_many(first, *others): result = repr_parse(first) @@ -78,92 +83,91 @@ def parse_many(first, *others): assert repr_parse(other) == result return result - assert parse_many('*') == ['Element[*]'] - assert parse_many('*|*') == ['Element[*]'] - assert parse_many('*|foo') == ['Element[foo]'] - assert parse_many('|foo') == ['Element[foo]'] - assert parse_many('foo|*') == ['Element[foo|*]'] - assert parse_many('foo|bar') == ['Element[foo|bar]'] + assert parse_many("*") == ["Element[*]"] + assert parse_many("*|*") == ["Element[*]"] + assert parse_many("*|foo") == ["Element[foo]"] + assert parse_many("|foo") == ["Element[foo]"] + assert parse_many("foo|*") == ["Element[foo|*]"] + assert parse_many("foo|bar") == ["Element[foo|bar]"] # This will never match, but it is valid: - assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]'] - assert parse_many( - 'div>.foo', - 'div> .foo', - 'div >.foo', - 'div > .foo', - 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo' - ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]'] - assert parse_many('td.foo,.bar', - 'td.foo, .bar', - 'td.foo\t\r\n\f ,\t\r\n\f .bar' - ) == [ - 'Class[Element[td].foo]', - 'Class[Element[*].bar]' + assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"] + assert ( + parse_many( + "div>.foo", + "div> .foo", + "div >.foo", + "div > .foo", + "div \n> \t \t .foo", + "div\r>\n\n\n.foo", + "div\f>\f.foo", + ) + == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] + ) + assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [ + "Class[Element[td].foo]", + "Class[Element[*].bar]", + ] + assert parse_many("div, td.foo, div.bar span") == [ + "Element[div]", + "Class[Element[td].foo]", + "CombinedSelector[Class[Element[div].bar] Element[span]]", + ] + assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"] + assert parse_many("td:first") == ["Pseudo[Element[td]:first]"] + assert parse_many("td:first") == ["Pseudo[Element[td]:first]"] + assert parse_many("td :first") == [ + "CombinedSelector[Element[td] Pseudo[Element[*]:first]]" + ] + assert parse_many("td :first") == [ + "CombinedSelector[Element[td] Pseudo[Element[*]:first]]" + ] + assert parse_many("a[name]", "a[ name\t]") == ["Attrib[Element[a][name]]"] + assert parse_many("a [name]") == [ + "CombinedSelector[Element[a] Attrib[Element[*][name]]]" + ] + assert parse_many('a[rel="include"]', "a[rel = include]") == [ + "Attrib[Element[a][rel = 'include']]" ] - assert parse_many('div, td.foo, div.bar span') == [ - 'Element[div]', - 'Class[Element[td].foo]', - 'CombinedSelector[Class[Element[div].bar] ' - ' Element[span]]'] - assert parse_many('div > p') == [ - 'CombinedSelector[Element[div] > Element[p]]'] - assert parse_many('td:first') == [ - 'Pseudo[Element[td]:first]'] - assert parse_many('td:first') == [ - 'Pseudo[Element[td]:first]'] - assert parse_many('td :first') == [ - 'CombinedSelector[Element[td] ' - ' Pseudo[Element[*]:first]]'] - assert parse_many('td :first') == [ - 'CombinedSelector[Element[td] ' - ' Pseudo[Element[*]:first]]'] - assert parse_many('a[name]', 'a[ name\t]') == [ - 'Attrib[Element[a][name]]'] - assert parse_many('a [name]') == [ - 'CombinedSelector[Element[a] Attrib[Element[*][name]]]'] - assert parse_many('a[rel="include"]', 'a[rel = include]') == [ - "Attrib[Element[a][rel = 'include']]"] assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ - "Attrib[Element[a][hreflang |= 'en']]"] - assert parse_many('div:nth-child(10)') == [ - "Function[Element[div]:nth-child(['10'])]"] - assert parse_many(':nth-child(2n+2)') == [ - "Function[Element[*]:nth-child(['2', 'n', '+2'])]"] - assert parse_many('div:nth-of-type(10)') == [ - "Function[Element[div]:nth-of-type(['10'])]"] - assert parse_many('div div:nth-of-type(10) .aclass') == [ - 'CombinedSelector[CombinedSelector[Element[div] ' - "Function[Element[div]:nth-of-type(['10'])]] " - ' Class[Element[*].aclass]]'] - assert parse_many('label:only') == [ - 'Pseudo[Element[label]:only]'] - assert parse_many('a:lang(fr)') == [ - "Function[Element[a]:lang(['fr'])]"] - assert parse_many('div:contains("foo")') == [ - "Function[Element[div]:contains(['foo'])]"] - assert parse_many('div#foobar') == [ - 'Hash[Element[div]#foobar]'] - assert parse_many('div:not(div.foo)') == [ - 'Negation[Element[div]:not(Class[Element[div].foo])]'] - assert parse_many('div:is(.foo, #bar)') == [ - 'Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]'] - assert parse_many(':is(:hover, :visited)') == [ - 'Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]'] - assert parse_many('td ~ th') == [ - 'CombinedSelector[Element[td] ~ Element[th]]'] - assert parse_many(':scope > foo') == [ - 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + "Attrib[Element[a][hreflang |= 'en']]" + ] + assert parse_many("div:nth-child(10)") == ["Function[Element[div]:nth-child(['10'])]"] + assert parse_many(":nth-child(2n+2)") == [ + "Function[Element[*]:nth-child(['2', 'n', '+2'])]" + ] + assert parse_many("div:nth-of-type(10)") == ["Function[Element[div]:nth-of-type(['10'])]"] + assert parse_many("div div:nth-of-type(10) .aclass") == [ + "CombinedSelector[CombinedSelector[Element[div] " + "Function[Element[div]:nth-of-type(['10'])]] " + " Class[Element[*].aclass]]" + ] + assert parse_many("label:only") == ["Pseudo[Element[label]:only]"] + assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] + assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] + assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] + assert parse_many("div:not(div.foo)") == [ + "Negation[Element[div]:not(Class[Element[div].foo])]" + ] + assert parse_many("div:is(.foo, #bar)") == [ + "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]" + ] + assert parse_many(":is(:hover, :visited)") == [ + "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]" + ] + assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"] + assert parse_many(":scope > foo") == [ + "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]" ] - assert parse_many(' :scope > foo') == [ - 'CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]' + assert parse_many(" :scope > foo") == [ + "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]" ] - assert parse_many(':scope > foo bar > div') == [ - 'CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' - 'Element[foo]] Element[bar]] > Element[div]]' + assert parse_many(":scope > foo bar > div") == [ + "CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > " + "Element[foo]] Element[bar]] > Element[div]]" ] - assert parse_many(':scope > #foo #bar') == [ - 'CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > ' - 'Hash[Element[*]#foo]] Hash[Element[*]#bar]]' + assert parse_many(":scope > #foo #bar") == [ + "CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > " + "Hash[Element[*]#foo]] Hash[Element[*]#bar]]" ] def test_pseudo_elements(self): @@ -189,61 +193,66 @@ def test_pseudo_repr(css): selector = result[0] return selector.parsed_tree.__repr__() - assert parse_one('foo') == ('Element[foo]', None) - assert parse_one('*') == ('Element[*]', None) - assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) - assert parse_one(':scope') == ('Pseudo[Element[*]:scope]', None) + assert parse_one("foo") == ("Element[foo]", None) + assert parse_one("*") == ("Element[*]", None) + assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None) + assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None) # Special cases for CSS 2.1 pseudo-elements - assert parse_one(':BEfore') == ('Element[*]', 'before') - assert parse_one(':aftER') == ('Element[*]', 'after') - assert parse_one(':First-Line') == ('Element[*]', 'first-line') - assert parse_one(':First-Letter') == ('Element[*]', 'first-letter') - - assert parse_one('::befoRE') == ('Element[*]', 'before') - assert parse_one('::AFter') == ('Element[*]', 'after') - assert parse_one('::firsT-linE') == ('Element[*]', 'first-line') - assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter') - - assert parse_one('::text-content') == ('Element[*]', 'text-content') - assert parse_one('::attr(name)') == ( - "Element[*]", "FunctionalPseudoElement[::attr(['name'])]") - - assert parse_one('::Selection') == ('Element[*]', 'selection') - assert parse_one('foo:after') == ('Element[foo]', 'after') - assert parse_one('foo::selection') == ('Element[foo]', 'selection') - assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == ( - 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ ' - 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', - 'selection') - assert parse_pseudo(':scope > div, foo bar') == [ - ('CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]', None), - ('CombinedSelector[Element[foo] Element[bar]]', None) + assert parse_one(":BEfore") == ("Element[*]", "before") + assert parse_one(":aftER") == ("Element[*]", "after") + assert parse_one(":First-Line") == ("Element[*]", "first-line") + assert parse_one(":First-Letter") == ("Element[*]", "first-letter") + + assert parse_one("::befoRE") == ("Element[*]", "before") + assert parse_one("::AFter") == ("Element[*]", "after") + assert parse_one("::firsT-linE") == ("Element[*]", "first-line") + assert parse_one("::firsT-letteR") == ("Element[*]", "first-letter") + + assert parse_one("::text-content") == ("Element[*]", "text-content") + assert parse_one("::attr(name)") == ( + "Element[*]", + "FunctionalPseudoElement[::attr(['name'])]", + ) + + assert parse_one("::Selection") == ("Element[*]", "selection") + assert parse_one("foo:after") == ("Element[foo]", "after") + assert parse_one("foo::selection") == ("Element[foo]", "selection") + assert parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection") == ( + "CombinedSelector[Hash[Element[lorem]#ipsum] ~ " + "Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]", + "selection", + ) + assert parse_pseudo(":scope > div, foo bar") == [ + ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None), + ("CombinedSelector[Element[foo] Element[bar]]", None), ] - assert parse_pseudo('foo:before, bar, baz:after') == [ - ('Element[foo]', 'before'), ('Element[bar]', None), - ('Element[baz]', 'after') + assert parse_pseudo("foo:before, bar, baz:after") == [ + ("Element[foo]", "before"), + ("Element[bar]", None), + ("Element[baz]", "after"), ] # Special cases for CSS 2.1 pseudo-elements are ignored by default - for pseudo in ('after', 'before', 'first-line', 'first-letter'): - selector, = parse('e:%s' % pseudo) + for pseudo in ("after", "before", "first-line", "first-letter"): + (selector,) = parse("e:%s" % pseudo) assert selector.pseudo_element == pseudo - assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e" + assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e" # Pseudo Elements are ignored by default, but if allowed they are not # supported by GenericTranslator tr = GenericTranslator() - selector, = parse('e::foo') - assert selector.pseudo_element == 'foo' - assert tr.selector_to_xpath(selector, prefix='') == "e" - self.assertRaises(ExpressionError, tr.selector_to_xpath, selector, - translate_pseudo_elements=True) + (selector,) = parse("e::foo") + assert selector.pseudo_element == "foo" + assert tr.selector_to_xpath(selector, prefix="") == "e" + self.assertRaises( + ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True + ) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() - assert test_pseudo_repr(u':fİrst-child') == u'Pseudo[Element[*]:fİrst-child]' - assert test_pseudo_repr(':scope') == 'Pseudo[Element[*]:scope]' + assert test_pseudo_repr(u":fİrst-child") == u"Pseudo[Element[*]:fİrst-child]" + assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self): def specificity(css): @@ -251,35 +260,34 @@ def specificity(css): assert len(selectors) == 1 return selectors[0].specificity() - assert specificity('*') == (0, 0, 0) - assert specificity(' foo') == (0, 0, 1) - assert specificity(':empty ') == (0, 1, 0) - assert specificity(':before') == (0, 0, 1) - assert specificity('*:before') == (0, 0, 1) - assert specificity(':nth-child(2)') == (0, 1, 0) - assert specificity('.bar') == (0, 1, 0) - assert specificity('[baz]') == (0, 1, 0) + assert specificity("*") == (0, 0, 0) + assert specificity(" foo") == (0, 0, 1) + assert specificity(":empty ") == (0, 1, 0) + assert specificity(":before") == (0, 0, 1) + assert specificity("*:before") == (0, 0, 1) + assert specificity(":nth-child(2)") == (0, 1, 0) + assert specificity(".bar") == (0, 1, 0) + assert specificity("[baz]") == (0, 1, 0) assert specificity('[baz="4"]') == (0, 1, 0) assert specificity('[baz^="4"]') == (0, 1, 0) - assert specificity('#lipsum') == (1, 0, 0) + assert specificity("#lipsum") == (1, 0, 0) - assert specificity(':not(*)') == (0, 0, 0) - assert specificity(':not(foo)') == (0, 0, 1) - assert specificity(':not(.foo)') == (0, 1, 0) - assert specificity(':not([foo])') == (0, 1, 0) - assert specificity(':not(:empty)') == (0, 1, 0) - assert specificity(':not(#foo)') == (1, 0, 0) + assert specificity(":not(*)") == (0, 0, 0) + assert specificity(":not(foo)") == (0, 0, 1) + assert specificity(":not(.foo)") == (0, 1, 0) + assert specificity(":not([foo])") == (0, 1, 0) + assert specificity(":not(:empty)") == (0, 1, 0) + assert specificity(":not(#foo)") == (1, 0, 0) - assert specificity(':is(.foo, #bar)') == (1, 0, 0) - assert specificity(':is(:hover, :visited)') == (0, 1, 0) + assert specificity(":is(.foo, #bar)") == (1, 0, 0) + assert specificity(":is(:hover, :visited)") == (0, 1, 0) - assert specificity('foo:empty') == (0, 1, 1) - assert specificity('foo:before') == (0, 0, 2) - assert specificity('foo::before') == (0, 0, 2) - assert specificity('foo:empty::before') == (0, 1, 2) + assert specificity("foo:empty") == (0, 1, 1) + assert specificity("foo:before") == (0, 0, 2) + assert specificity("foo::before") == (0, 0, 2) + assert specificity("foo:empty::before") == (0, 1, 2) - assert specificity('#lorem + foo#ipsum:first-child > bar:first-line' - ) == (2, 1, 3) + assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3) def test_css_export(self): def css2css(css, res=None): @@ -287,34 +295,34 @@ def css2css(css, res=None): assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) - css2css('*') - css2css(' foo', 'foo') - css2css('Foo', 'Foo') - css2css(':empty ', ':empty') - css2css(':before', '::before') - css2css(':beFOre', '::before') - css2css('*:before', '::before') - css2css(':nth-child(2)') - css2css('.bar') - css2css('[baz]') + css2css("*") + css2css(" foo", "foo") + css2css("Foo", "Foo") + css2css(":empty ", ":empty") + css2css(":before", "::before") + css2css(":beFOre", "::before") + css2css("*:before", "::before") + css2css(":nth-child(2)") + css2css(".bar") + css2css("[baz]") css2css('[baz="4"]', "[baz='4']") css2css('[baz^="4"]', "[baz^='4']") css2css("[ns|attr='4']") - css2css('#lipsum') - css2css(':not(*)') - css2css(':not(foo)') - css2css(':not(*.foo)', ':not(.foo)') - css2css(':not(*[foo])', ':not([foo])') - css2css(':not(:empty)') - css2css(':not(#foo)') - css2css(':is(#bar, .foo)') - css2css(':is(:focused, :visited)') - css2css('foo:empty') - css2css('foo::before') - css2css('foo:empty::before') + css2css("#lipsum") + css2css(":not(*)") + css2css(":not(foo)") + css2css(":not(*.foo)", ":not(.foo)") + css2css(":not(*[foo])", ":not([foo])") + css2css(":not(:empty)") + css2css(":not(#foo)") + css2css(":is(#bar, .foo)") + css2css(":is(:focused, :visited)") + css2css("foo:empty") + css2css("foo::before") + css2css("foo:empty::before") css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)") - css2css('#lorem + foo#ipsum:first-child > bar::first-line') - css2css('foo > *') + css2css("#lorem + foo#ipsum:first-child > bar::first-line") + css2css("foo > *") def test_parse_errors(self): def get_error(css): @@ -324,283 +332,224 @@ def get_error(css): # Py2, Py3, ... return str(sys.exc_info()[1]).replace("(u'", "('") - assert get_error('attributes(href)/html/body/a') == ( - "Expected selector, got ") - assert get_error('attributes(href)') == ( - "Expected selector, got ") - assert get_error('html/body/a') == ( - "Expected selector, got ") - assert get_error(' ') == ( - "Expected selector, got ") - assert get_error('div, ') == ( - "Expected selector, got ") - assert get_error(' , div') == ( - "Expected selector, got ") - assert get_error('p, , div') == ( - "Expected selector, got ") - assert get_error('div > ') == ( - "Expected selector, got ") - assert get_error(' > div') == ( - "Expected selector, got ' at 2>") - assert get_error('foo|#bar') == ( - "Expected ident or '*', got ") - assert get_error('#.foo') == ( - "Expected selector, got ") - assert get_error('.#foo') == ( - "Expected ident, got ") - assert get_error(':#foo') == ( - "Expected ident, got ") - assert get_error('[*]') == ( - "Expected '|', got ") - assert get_error('[foo|]') == ( - "Expected ident, got ") - assert get_error('[#]') == ( - "Expected ident or '*', got ") - assert get_error('[foo=#]') == ( - "Expected string or ident, got ") - assert get_error('[href]a') == ( - "Expected selector, got ") - assert get_error('[rel=stylesheet]') is None - assert get_error('[rel:stylesheet]') == ( - "Operator expected, got ") - assert get_error('[rel=stylesheet') == ( - "Expected ']', got ") - assert get_error(':lang(fr)') is None - assert get_error(':lang(fr') == ( - "Expected an argument, got ") - assert get_error(':contains("foo') == ( - "Unclosed string at 10") - assert get_error('foo!') == ( - "Expected selector, got ") + assert get_error("attributes(href)/html/body/a") == ( + "Expected selector, got " + ) + assert get_error("attributes(href)") == ("Expected selector, got ") + assert get_error("html/body/a") == ("Expected selector, got ") + assert get_error(" ") == ("Expected selector, got ") + assert get_error("div, ") == ("Expected selector, got ") + assert get_error(" , div") == ("Expected selector, got ") + assert get_error("p, , div") == ("Expected selector, got ") + assert get_error("div > ") == ("Expected selector, got ") + assert get_error(" > div") == ("Expected selector, got ' at 2>") + assert get_error("foo|#bar") == ("Expected ident or '*', got ") + assert get_error("#.foo") == ("Expected selector, got ") + assert get_error(".#foo") == ("Expected ident, got ") + assert get_error(":#foo") == ("Expected ident, got ") + assert get_error("[*]") == ("Expected '|', got ") + assert get_error("[foo|]") == ("Expected ident, got ") + assert get_error("[#]") == ("Expected ident or '*', got ") + assert get_error("[foo=#]") == ("Expected string or ident, got ") + assert get_error("[href]a") == ("Expected selector, got ") + assert get_error("[rel=stylesheet]") is None + assert get_error("[rel:stylesheet]") == ("Operator expected, got ") + assert get_error("[rel=stylesheet") == ("Expected ']', got ") + assert get_error(":lang(fr)") is None + assert get_error(":lang(fr") == ("Expected an argument, got ") + assert get_error(':contains("foo') == ("Unclosed string at 10") + assert get_error("foo!") == ("Expected selector, got ") # Mis-placed pseudo-elements - assert get_error('a:before:empty') == ( - "Got pseudo-element ::before not at the end of a selector") - assert get_error('li:before a') == ( - "Got pseudo-element ::before not at the end of a selector") - assert get_error(':not(:before)') == ( - "Got pseudo-element ::before inside :not() at 12") - assert get_error(':not(:not(a))') == ( - "Got nested :not()") - assert get_error(':is(:before)') == ( - "Got pseudo-element ::before inside function") - assert get_error(':is(a b)') == ( - "Expected an argument, got ") - assert get_error(':scope > div :scope header') == ( + assert get_error("a:before:empty") == ( + "Got pseudo-element ::before not at the end of a selector" + ) + assert get_error("li:before a") == ( + "Got pseudo-element ::before not at the end of a selector" + ) + assert get_error(":not(:before)") == ("Got pseudo-element ::before inside :not() at 12") + assert get_error(":not(:not(a))") == ("Got nested :not()") + assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function") + assert get_error(":is(a b)") == ("Expected an argument, got ") + assert get_error(":scope > div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) - assert get_error('div :scope header') == ( + assert get_error("div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) - assert get_error('> div p') == ("Expected selector, got ' at 0>") + assert get_error("> div p") == ("Expected selector, got ' at 0>") def test_translation(self): def xpath(css): - return _unicode(GenericTranslator().css_to_xpath(css, prefix='')) - - assert xpath('*') == "*" - assert xpath('e') == "e" - assert xpath('*|e') == "e" - assert xpath('e|f') == "e:f" - assert xpath('e[foo]') == "e[@foo]" - assert xpath('e[foo|bar]') == "e[@foo:bar]" + return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) + + assert xpath("*") == "*" + assert xpath("e") == "e" + assert xpath("*|e") == "e" + assert xpath("e|f") == "e:f" + assert xpath("e[foo]") == "e[@foo]" + assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" - "concat(' ', normalize-space(@foo), ' '), ' bar ')]") - assert xpath('e[foo^="bar"]') == ( - "e[@foo and starts-with(@foo, 'bar')]") + "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + ) + assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( - "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']") - assert xpath('e[foo*="bar"]') == ( - "e[@foo and contains(@foo, 'bar')]") + "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']" + ) + assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" - "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") + "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + ) # --- nth-* and nth-last-* ------------------------------------- - assert xpath('e:nth-child(1)') == ( - "e[count(preceding-sibling::*) = 0]") + assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]") # always true - assert xpath('e:nth-child(n)') == ( - "e") - assert xpath('e:nth-child(n+1)') == ( - "e") + assert xpath("e:nth-child(n)") == ("e") + assert xpath("e:nth-child(n+1)") == ("e") # always true too - assert xpath('e:nth-child(n-10)') == ( - "e") + assert xpath("e:nth-child(n-10)") == ("e") # b=2 is the limit... - assert xpath('e:nth-child(n+2)') == ( - "e[count(preceding-sibling::*) >= 1]") + assert xpath("e:nth-child(n+2)") == ("e[count(preceding-sibling::*) >= 1]") # always false - assert xpath('e:nth-child(-n)') == ( - "e[0]") + assert xpath("e:nth-child(-n)") == ("e[0]") # equivalent to first child - assert xpath('e:nth-child(-n+1)') == ( - "e[count(preceding-sibling::*) <= 0]") + assert xpath("e:nth-child(-n+1)") == ("e[count(preceding-sibling::*) <= 0]") - assert xpath('e:nth-child(3n+2)') == ( + assert xpath("e:nth-child(3n+2)") == ( "e[(count(preceding-sibling::*) >= 1) and " - "((count(preceding-sibling::*) +2) mod 3 = 0)]") - assert xpath('e:nth-child(3n-2)') == ( - "e[count(preceding-sibling::*) mod 3 = 0]") - assert xpath('e:nth-child(-n+6)') == ( - "e[count(preceding-sibling::*) <= 5]") - - assert xpath('e:nth-last-child(1)') == ( - "e[count(following-sibling::*) = 0]") - assert xpath('e:nth-last-child(2n)') == ( - "e[(count(following-sibling::*) +1) mod 2 = 0]") - assert xpath('e:nth-last-child(2n+1)') == ( - "e[count(following-sibling::*) mod 2 = 0]") - assert xpath('e:nth-last-child(2n+2)') == ( + "((count(preceding-sibling::*) +2) mod 3 = 0)]" + ) + assert xpath("e:nth-child(3n-2)") == ("e[count(preceding-sibling::*) mod 3 = 0]") + assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]") + + assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]") + assert xpath("e:nth-last-child(2n)") == ("e[(count(following-sibling::*) +1) mod 2 = 0]") + assert xpath("e:nth-last-child(2n+1)") == ("e[count(following-sibling::*) mod 2 = 0]") + assert xpath("e:nth-last-child(2n+2)") == ( "e[(count(following-sibling::*) >= 1) and " - "((count(following-sibling::*) +1) mod 2 = 0)]") - assert xpath('e:nth-last-child(3n+1)') == ( - "e[count(following-sibling::*) mod 3 = 0]") + "((count(following-sibling::*) +1) mod 2 = 0)]" + ) + assert xpath("e:nth-last-child(3n+1)") == ("e[count(following-sibling::*) mod 3 = 0]") # represents the two last e elements - assert xpath('e:nth-last-child(-n+2)') == ( - "e[count(following-sibling::*) <= 1]") - - assert xpath('e:nth-of-type(1)') == ( - "e[count(preceding-sibling::e) = 0]") - assert xpath('e:nth-last-of-type(1)') == ( - "e[count(following-sibling::e) = 0]") - assert xpath('div e:nth-last-of-type(1) .aclass') == ( + assert xpath("e:nth-last-child(-n+2)") == ("e[count(following-sibling::*) <= 1]") + + assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]") + assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]") + assert xpath("div e:nth-last-of-type(1) .aclass") == ( "div/descendant-or-self::*/e[count(following-sibling::e) = 0]" - "/descendant-or-self::*/*[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' aclass ')]") - - assert xpath('e:first-child') == ( - "e[count(preceding-sibling::*) = 0]") - assert xpath('e:last-child') == ( - "e[count(following-sibling::*) = 0]") - assert xpath('e:first-of-type') == ( - "e[count(preceding-sibling::e) = 0]") - assert xpath('e:last-of-type') == ( - "e[count(following-sibling::e) = 0]") - assert xpath('e:only-child') == ( - "e[count(parent::*/child::*) = 1]") - assert xpath('e:only-of-type') == ( - "e[count(parent::*/child::e) = 1]") - assert xpath('e:empty') == ( - "e[not(*) and not(string-length())]") - assert xpath('e:EmPTY') == ( - "e[not(*) and not(string-length())]") - assert xpath('e:root') == ( - "e[not(parent::*)]") - assert xpath('e:hover') == ( - "e[0]") # never matches - assert xpath('e:contains("foo")') == ( - "e[contains(., 'foo')]") - assert xpath('e:ConTains(foo)') == ( - "e[contains(., 'foo')]") - assert xpath('e.warning') == ( - "e[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' warning ')]") - assert xpath('e#myid') == ( - "e[@id = 'myid']") - assert xpath('e:not(:nth-child(odd))') == ( - "e[not(count(preceding-sibling::*) mod 2 = 0)]") - assert xpath('e:nOT(*)') == ( - "e[0]") # never matches - assert xpath('e f') == ( - "e/descendant-or-self::*/f") - assert xpath('e > f') == ( - "e/f") - assert xpath('e + f') == ( - "e/following-sibling::*[(name() = 'f') and (position() = 1)]") - assert xpath('e ~ f') == ( - "e/following-sibling::f") - assert xpath('e ~ f:nth-child(3)') == ( - "e/following-sibling::f[count(preceding-sibling::*) = 2]") - assert xpath('div#container p') == ( - "div[@id = 'container']/descendant-or-self::*/p") + "/descendant-or-self::*/*[@class and contains(" + "concat(' ', normalize-space(@class), ' '), ' aclass ')]" + ) + + assert xpath("e:first-child") == ("e[count(preceding-sibling::*) = 0]") + assert xpath("e:last-child") == ("e[count(following-sibling::*) = 0]") + assert xpath("e:first-of-type") == ("e[count(preceding-sibling::e) = 0]") + assert xpath("e:last-of-type") == ("e[count(following-sibling::e) = 0]") + assert xpath("e:only-child") == ("e[count(parent::*/child::*) = 1]") + assert xpath("e:only-of-type") == ("e[count(parent::*/child::e) = 1]") + assert xpath("e:empty") == ("e[not(*) and not(string-length())]") + assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") + assert xpath("e:root") == ("e[not(parent::*)]") + assert xpath("e:hover") == ("e[0]") # never matches + assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") + assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") + assert xpath("e.warning") == ( + "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]" + ) + assert xpath("e#myid") == ("e[@id = 'myid']") + assert xpath("e:not(:nth-child(odd))") == ("e[not(count(preceding-sibling::*) mod 2 = 0)]") + assert xpath("e:nOT(*)") == ("e[0]") # never matches + assert xpath("e f") == ("e/descendant-or-self::*/f") + assert xpath("e > f") == ("e/f") + assert xpath("e + f") == ("e/following-sibling::*[(name() = 'f') and (position() = 1)]") + assert xpath("e ~ f") == ("e/following-sibling::f") + assert xpath("e ~ f:nth-child(3)") == ( + "e/following-sibling::f[count(preceding-sibling::*) = 2]" + ) + assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") # Invalid characters in XPath element names - assert xpath(r'di\a0 v') == ( - u("*[name() = 'di v']")) # di\xa0v - assert xpath(r'di\[v') == ( - "*[name() = 'di[v']") - assert xpath(r'[h\a0 ref]') == ( - u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref - assert xpath(r'[h\]ref]') == ( - "*[attribute::*[name() = 'h]ref']]") - - self.assertRaises(ExpressionError, xpath, u(':fİrst-child')) - self.assertRaises(ExpressionError, xpath, ':first-of-type') - self.assertRaises(ExpressionError, xpath, ':only-of-type') - self.assertRaises(ExpressionError, xpath, ':last-of-type') - self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)') - self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)') - self.assertRaises(ExpressionError, xpath, ':nth-child(n-)') - self.assertRaises(ExpressionError, xpath, ':after') - self.assertRaises(ExpressionError, xpath, ':lorem-ipsum') - self.assertRaises(ExpressionError, xpath, ':lorem(ipsum)') - self.assertRaises(ExpressionError, xpath, '::lorem-ipsum') + assert xpath(r"di\a0 v") == (u("*[name() = 'di v']")) # di\xa0v + assert xpath(r"di\[v") == ("*[name() = 'di[v']") + assert xpath(r"[h\a0 ref]") == (u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref + assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") + + self.assertRaises(ExpressionError, xpath, u(":fİrst-child")) + self.assertRaises(ExpressionError, xpath, ":first-of-type") + self.assertRaises(ExpressionError, xpath, ":only-of-type") + self.assertRaises(ExpressionError, xpath, ":last-of-type") + self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") + self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") + self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") + self.assertRaises(ExpressionError, xpath, ":after") + self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") + self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") + self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, - 'foo') + self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") def test_unicode(self): if sys.version_info[0] < 3: - css = '.a\xc1b'.decode('ISO-8859-1') + css = ".a\xc1b".decode("ISO-8859-1") else: - css = '.a\xc1b' + css = ".a\xc1b" xpath = GenericTranslator().css_to_xpath(css) assert css[1:] in xpath - xpath = xpath.encode('ascii', 'xmlcharrefreplace').decode('ASCII') + xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII") assert xpath == ( "descendant-or-self::*[@class and contains(" - "concat(' ', normalize-space(@class), ' '), ' aÁb ')]") + "concat(' ', normalize-space(@class), ' '), ' aÁb ')]" + ) def test_quoting(self): css_to_xpath = GenericTranslator().css_to_xpath - assert css_to_xpath('*[aval="\'"]') == ( - '''descendant-or-self::*[@aval = "'"]''') - assert css_to_xpath('*[aval="\'\'\'"]') == ( - """descendant-or-self::*[@aval = "'''"]""") - assert css_to_xpath('*[aval=\'"\']') == ( - '''descendant-or-self::*[@aval = '"']''') - assert css_to_xpath('*[aval=\'"""\']') == ( - '''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""") + assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""") + assert css_to_xpath("*[aval='\"']") == ("""descendant-or-self::*[@aval = '"']""") + assert css_to_xpath('*[aval=\'"""\']') == ('''descendant-or-self::*[@aval = '"""']''') assert css_to_xpath(':scope > div[dataimg=""]') == ( - "descendant-or-self::*[1]/div[@dataimg = '']") + "descendant-or-self::*[1]/div[@dataimg = '']" + ) def test_unicode_escapes(self): # \22 == '"' \20 == ' ' css_to_xpath = GenericTranslator().css_to_xpath assert css_to_xpath(r'*[aval="\'\22\'"]') == ( - '''descendant-or-self::*[@aval = concat("'",'"',"'")]''') + """descendant-or-self::*[@aval = concat("'",'"',"'")]""" + ) assert css_to_xpath(r'*[aval="\'\22 2\'"]') == ( - '''descendant-or-self::*[@aval = concat("'",'"2',"'")]''') + """descendant-or-self::*[@aval = concat("'",'"2',"'")]""" + ) assert css_to_xpath(r'*[aval="\'\20 \'"]') == ( - '''descendant-or-self::*[@aval = "' '"]''') - assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == ( - '''descendant-or-self::*[@aval = "' '"]''') + """descendant-or-self::*[@aval = "' '"]""" + ) + assert css_to_xpath("*[aval=\"'\\20\r\n '\"]") == ( + """descendant-or-self::*[@aval = "' '"]""" + ) def test_xpath_pseudo_elements(self): class CustomTranslator(GenericTranslator): def xpath_pseudo_element(self, xpath, pseudo_element): if isinstance(pseudo_element, FunctionalPseudoElement): - method = 'xpath_%s_functional_pseudo_element' % ( - pseudo_element.name.replace('-', '_')) + method = "xpath_%s_functional_pseudo_element" % ( + pseudo_element.name.replace("-", "_") + ) method = _unicode_safe_getattr(self, method, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" - % pseudo_element.name) + "The functional pseudo-element ::%s() is unknown" % pseudo_element.name + ) xpath = method(xpath, pseudo_element.arguments) else: - method = 'xpath_%s_simple_pseudo_element' % ( - pseudo_element.replace('-', '_')) + method = "xpath_%s_simple_pseudo_element" % (pseudo_element.replace("-", "_")) method = _unicode_safe_getattr(self, method, None) if not method: raise ExpressionError( - "The pseudo-element ::%s is unknown" - % pseudo_element) + "The pseudo-element ::%s is unknown" % pseudo_element + ) xpath = method(xpath) return xpath @@ -608,8 +557,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): # elements that have a certain number of attributes def xpath_nb_attr_function(self, xpath, function): nb_attributes = int(function.arguments[0].value) - return xpath.add_condition( - "count(@*)=%d" % nb_attributes) + return xpath.add_condition("count(@*)=%d" % nb_attributes) # pseudo-class: # elements that have 5 attributes @@ -620,20 +568,29 @@ def xpath_five_attributes_pseudo(self, xpath): # element's attribute by name def xpath_attr_functional_pseudo_element(self, xpath, arguments): attribute_name = arguments[0].value - other = XPathExpr('@%s' % attribute_name, '', ) - return xpath.join('/', other) + other = XPathExpr( + "@%s" % attribute_name, + "", + ) + return xpath.join("/", other) # pseudo-element: # element's text() nodes def xpath_text_node_simple_pseudo_element(self, xpath): - other = XPathExpr('text()', '', ) - return xpath.join('/', other) + other = XPathExpr( + "text()", + "", + ) + return xpath.join("/", other) # pseudo-element: # element's href attribute def xpath_attr_href_simple_pseudo_element(self, xpath): - other = XPathExpr('@href', '', ) - return xpath.join('/', other) + other = XPathExpr( + "@href", + "", + ) + return xpath.join("/", other) # pseudo-element: # used to demonstrate operator precedence @@ -643,91 +600,86 @@ def xpath_first_or_second_pseudo(self, xpath): def xpath(css): return _unicode(CustomTranslator().css_to_xpath(css)) - assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]" - assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]" - assert xpath('::attr(href)') == "descendant-or-self::*/@href" - assert xpath('::text-node') == "descendant-or-self::*/text()" - assert xpath('::attr-href') == "descendant-or-self::*/@href" - assert xpath('p img::attr(src)') == ( - "descendant-or-self::p/descendant-or-self::*/img/@src") - assert xpath(':scope') == "descendant-or-self::*[1]" - assert xpath(':first-or-second[href]') == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " - "and (@href)]") + assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]" + assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]" + assert xpath("::attr(href)") == "descendant-or-self::*/@href" + assert xpath("::text-node") == "descendant-or-self::*/text()" + assert xpath("::attr-href") == "descendant-or-self::*/@href" + assert xpath("p img::attr(src)") == ( + "descendant-or-self::p/descendant-or-self::*/img/@src" + ) + assert xpath(":scope") == "descendant-or-self::*[1]" + assert xpath(":first-or-second[href]") == ( + "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + ) - assert str(XPathExpr('', '', condition='@href')) == "[@href]" + assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ + def operator_id(selector): xpath = CustomTranslator().css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] + return [element.get("id", "nil") for element in items] - assert operator_id(':first-or-second') == ['first', 'second'] - assert operator_id(':first-or-second[href]') == ['second'] - assert operator_id('[href]:first-or-second') == ['second'] + assert operator_id(":first-or-second") == ["first", "second"] + assert operator_id(":first-or-second[href]") == ["second"] + assert operator_id("[href]:first-or-second") == ["second"] def test_series(self): def series(css): - selector, = parse(':nth-child(%s)' % css) + (selector,) = parse(":nth-child(%s)" % css) args = selector.parsed_tree.arguments try: return parse_series(args) except ValueError: return None - assert series('1n+3') == (1, 3) - assert series('1n +3') == (1, 3) - assert series('1n + 3') == (1, 3) - assert series('1n+ 3') == (1, 3) - assert series('1n-3') == (1, -3) - assert series('1n -3') == (1, -3) - assert series('1n - 3') == (1, -3) - assert series('1n- 3') == (1, -3) - assert series('n-5') == (1, -5) - assert series('odd') == (2, 1) - assert series('even') == (2, 0) - assert series('3n') == (3, 0) - assert series('n') == (1, 0) - assert series('+n') == (1, 0) - assert series('-n') == (-1, 0) - assert series('5') == (0, 5) - assert series('foo') is None - assert series('n+') is None + assert series("1n+3") == (1, 3) + assert series("1n +3") == (1, 3) + assert series("1n + 3") == (1, 3) + assert series("1n+ 3") == (1, 3) + assert series("1n-3") == (1, -3) + assert series("1n -3") == (1, -3) + assert series("1n - 3") == (1, -3) + assert series("1n- 3") == (1, -3) + assert series("n-5") == (1, -5) + assert series("odd") == (2, 1) + assert series("even") == (2, 0) + assert series("3n") == (3, 0) + assert series("n") == (1, 0) + assert series("+n") == (1, 0) + assert series("-n") == (-1, 0) + assert series("5") == (0, 5) + assert series("foo") is None + assert series("n+") is None def test_lang(self): document = etree.fromstring(XMLLANG_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath def langid(selector): xpath = css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] - - assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth'] - assert langid(':lang("en-us")') == ['second', 'fourth'] - assert langid(':lang(en-nz)') == ['third'] - assert langid(':lang(fr)') == ['fifth'] - assert langid(':lang(ru)') == ['sixth'] - assert langid(":lang('ZH')") == ['eighth'] - assert langid(':lang(de) :lang(zh)') == ['eighth'] - assert langid(':lang(en), :lang(zh)') == [ - 'first', 'second', 'third', 'fourth', 'eighth'] - assert langid(':lang(es)') == [] + return [element.get("id", "nil") for element in items] + + assert langid(':lang("EN")') == ["first", "second", "third", "fourth"] + assert langid(':lang("en-us")') == ["second", "fourth"] + assert langid(":lang(en-nz)") == ["third"] + assert langid(":lang(fr)") == ["fifth"] + assert langid(":lang(ru)") == ["sixth"] + assert langid(":lang('ZH')") == ["eighth"] + assert langid(":lang(de) :lang(zh)") == ["eighth"] + assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"] + assert langid(":lang(es)") == [] def test_select(self): document = etree.fromstring(HTML_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.getiterator()) - ).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath @@ -739,172 +691,218 @@ def select_ids(selector, html_only): xpath = html_css_to_xpath(selector) items = document.xpath(xpath) items.sort(key=sort_key) - return [element.get('id', 'nil') for element in items] + return [element.get("id", "nil") for element in items] def pcss(main, *selectors, **kwargs): - html_only = kwargs.pop('html_only', False) + html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: assert select_ids(selector, html_only) == result return result - all_ids = pcss('*') - assert all_ids[:6] == [ - 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'] - assert all_ids[-1:] == ['foobar-span'] - assert pcss('div') == ['outer-div', 'li-div', 'foobar-div'] - assert pcss('DIV', html_only=True) == [ - 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML - assert pcss('div div') == ['li-div'] - assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div'] - assert pcss('a[name]') == ['name-anchor'] - assert pcss('a[NAme]', html_only=True) == [ - 'name-anchor'] # case-insensitive in HTML: - assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[rel="tag"]') == ['tag-anchor'] - assert pcss('a[href*="localhost"]') == ['tag-anchor'] + all_ids = pcss("*") + assert all_ids[:6] == ["html", "nil", "link-href", "link-nohref", "nil", "outer-div"] + assert all_ids[-1:] == ["foobar-span"] + assert pcss("div") == ["outer-div", "li-div", "foobar-div"] + assert pcss("DIV", html_only=True) == [ + "outer-div", + "li-div", + "foobar-div", + ] # case-insensitive in HTML + assert pcss("div div") == ["li-div"] + assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"] + assert pcss("a[name]") == ["name-anchor"] + assert pcss("a[NAme]", html_only=True) == ["name-anchor"] # case-insensitive in HTML: + assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[rel="tag"]') == ["tag-anchor"] + assert pcss('a[href*="localhost"]') == ["tag-anchor"] assert pcss('a[href*=""]') == [] - assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[href^="http:"]') == ['tag-anchor'] + assert pcss('a[href^="http"]') == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[href^="http:"]') == ["tag-anchor"] assert pcss('a[href^=""]') == [] - assert pcss('a[href$="org"]') == ['nofollow-anchor'] + assert pcss('a[href$="org"]') == ["nofollow-anchor"] assert pcss('a[href$=""]') == [] - assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ - 'foobar-div'] - assert pcss('[foobar~="ab bc"]', - '[foobar~=""]', '[foobar~=" \t"]') == [] + assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == ["foobar-div"] + assert pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]') == [] assert pcss('div[foobar~="cd"]') == [] - assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li'] + assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ["second-li"] # Attribute values are case sensitive assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] assert pcss('*[lang|="e"]') == [] # ... :lang() is not. - assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [ - 'second-li', 'li-div'] + assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == ["second-li", "li-div"] assert pcss(':lang("e")', html_only=True) == [] - assert pcss(':scope > div') == [] - assert pcss(':scope body') == ['nil'] - assert pcss(':scope body > div') == ['outer-div', 'foobar-div'] - assert pcss(':scope head') == ['nil'] - assert pcss(':scope html') == [] + assert pcss(":scope > div") == [] + assert pcss(":scope body") == ["nil"] + assert pcss(":scope body > div") == ["outer-div", "foobar-div"] + assert pcss(":scope head") == ["nil"] + assert pcss(":scope html") == [] # --- nth-* and nth-last-* ------------------------------------- # select nothing - assert pcss('li:nth-child(-n)') == [] + assert pcss("li:nth-child(-n)") == [] # select all children - assert pcss('li:nth-child(n)') == [ - 'first-li', 'second-li', 'third-li', 'fourth-li', - 'fifth-li', 'sixth-li', 'seventh-li'] - - assert pcss('li:nth-child(3)', - '#first-li ~ :nth-child(3)') == ['third-li'] - assert pcss('li:nth-child(10)') == [] - assert pcss('li:nth-child(2n)', 'li:nth-child(even)', - 'li:nth-child(2n+0)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [ - 'first-li', 'third-li', 'fifth-li', 'seventh-li'] - assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li'] - assert pcss('li:nth-child(3n+1)') == [ - 'first-li', 'fourth-li', 'seventh-li'] - assert pcss('li:nth-child(-n+3)') == [ - 'first-li', 'second-li', 'third-li'] - assert pcss('li:nth-child(-2n+4)') == ['second-li', 'fourth-li'] - assert pcss('li:nth-last-child(0)') == [] - assert pcss('li:nth-last-child(1)') == ['seventh-li'] - assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-last-child(2n+1)') == [ - 'first-li', 'third-li', 'fifth-li', 'seventh-li'] - assert pcss('li:nth-last-child(2n+2)') == [ - 'second-li', 'fourth-li', 'sixth-li'] - assert pcss('li:nth-last-child(3n+1)') == [ - 'first-li', 'fourth-li', 'seventh-li'] - assert pcss('ol:first-of-type') == ['first-ol'] - assert pcss('ol:nth-child(1)') == [] - assert pcss('ol:nth-of-type(2)') == ['second-ol'] - assert pcss('ol:nth-last-of-type(1)') == ['second-ol'] + assert pcss("li:nth-child(n)") == [ + "first-li", + "second-li", + "third-li", + "fourth-li", + "fifth-li", + "sixth-li", + "seventh-li", + ] + + assert pcss("li:nth-child(3)", "#first-li ~ :nth-child(3)") == ["third-li"] + assert pcss("li:nth-child(10)") == [] + assert pcss("li:nth-child(2n)", "li:nth-child(even)", "li:nth-child(2n+0)") == [ + "second-li", + "fourth-li", + "sixth-li", + ] + assert pcss("li:nth-child(+2n+1)", "li:nth-child(odd)") == [ + "first-li", + "third-li", + "fifth-li", + "seventh-li", + ] + assert pcss("li:nth-child(2n+4)") == ["fourth-li", "sixth-li"] + assert pcss("li:nth-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("li:nth-child(-n+3)") == ["first-li", "second-li", "third-li"] + assert pcss("li:nth-child(-2n+4)") == ["second-li", "fourth-li"] + assert pcss("li:nth-last-child(0)") == [] + assert pcss("li:nth-last-child(1)") == ["seventh-li"] + assert pcss("li:nth-last-child(2n)", "li:nth-last-child(even)") == [ + "second-li", + "fourth-li", + "sixth-li", + ] + assert pcss("li:nth-last-child(2n+1)") == [ + "first-li", + "third-li", + "fifth-li", + "seventh-li", + ] + assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"] + assert pcss("li:nth-last-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("ol:first-of-type") == ["first-ol"] + assert pcss("ol:nth-child(1)") == [] + assert pcss("ol:nth-of-type(2)") == ["second-ol"] + assert pcss("ol:nth-last-of-type(1)") == ["second-ol"] # "+" and "~" tests - assert pcss('ol#first-ol li + li:nth-child(4)') == ['fourth-li'] - assert pcss('li + li:nth-child(1)') == [] - assert pcss('li ~ li:nth-child(2n+1)') == [ - 'third-li', 'fifth-li', 'seventh-li' - ] # all but the first - assert pcss('li ~ li:nth-last-child(2n+1)') == [ - 'third-li', 'fifth-li', 'seventh-li' - ] # all but the first - - assert pcss('span:only-child') == ['foobar-span'] - assert pcss('li div:only-child') == ['li-div'] - assert pcss('div *:only-child') == ['li-div', 'foobar-span'] - self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') - assert pcss('p:only-of-type') == ['paragraph'] - assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] - assert pcss('li:empty') == [ - 'third-li', 'fourth-li', 'fifth-li', 'sixth-li'] - assert pcss(':root', 'html:root') == ['html'] - assert pcss('li:root', '* :root') == [] + assert pcss("ol#first-ol li + li:nth-child(4)") == ["fourth-li"] + assert pcss("li + li:nth-child(1)") == [] + assert pcss("li ~ li:nth-child(2n+1)") == [ + "third-li", + "fifth-li", + "seventh-li", + ] # all but the first + assert pcss("li ~ li:nth-last-child(2n+1)") == [ + "third-li", + "fifth-li", + "seventh-li", + ] # all but the first + + assert pcss("span:only-child") == ["foobar-span"] + assert pcss("li div:only-child") == ["li-div"] + assert pcss("div *:only-child") == ["li-div", "foobar-span"] + self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + assert pcss("p:only-of-type") == ["paragraph"] + assert pcss("a:empty", "a:EMpty") == ["name-anchor"] + assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] + assert pcss(":root", "html:root") == ["html"] + assert pcss("li:root", "* :root") == [] assert pcss('*:contains("link")', ':CONtains("link")') == [ - 'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'] + "html", + "nil", + "outer-div", + "tag-anchor", + "nofollow-anchor", + ] assert pcss('*:contains("LInk")') == [] # case sensitive assert pcss('*:contains("e")') == [ - 'html', 'nil', 'outer-div', 'first-ol', 'first-li', - 'paragraph', 'p-em'] + "html", + "nil", + "outer-div", + "first-ol", + "first-li", + "paragraph", + "p-em", + ] assert pcss('*:contains("E")') == [] # case-sensitive - assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol'] - assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li'] - assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [ - 'third-li', 'fourth-li'] - assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li'] - assert pcss('li div', 'li > div', 'div div') == ['li-div'] - assert pcss('div > div') == [] - assert pcss('div>.c', 'div > .c') == ['first-ol'] - assert pcss('div + div') == ['foobar-div'] - assert pcss('a ~ a') == ['tag-anchor', 'nofollow-anchor'] - assert pcss('a[rel="tag"] ~ a') == ['nofollow-anchor'] - assert pcss('ol#first-ol li:last-child') == ['seventh-li'] - assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li'] - assert pcss('#outer-div:first-child') == ['outer-div'] - assert pcss('#outer-div :first-child') == [ - 'name-anchor', 'first-li', 'li-div', 'p-b', - 'checkbox-fieldset-disabled', 'area-href'] - assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] - assert pcss(':not(*)') == [] - assert pcss('a:not([href])') == ['name-anchor'] - assert pcss('ol :Not(li[class])') == [ - 'first-li', 'second-li', 'li-div', - 'fifth-li', 'sixth-li', 'seventh-li'] - assert pcss(':is(#first-li, #second-li)') == [ - 'first-li', 'second-li'] - assert pcss('a:is(#name-anchor, #tag-anchor)') == [ - 'name-anchor', 'tag-anchor'] - assert pcss(':is(.c)') == [ - 'first-ol', 'third-li', 'fourth-li'] - assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] + assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"] + assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"] + assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == ["third-li", "fourth-li"] + assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"] + assert pcss("li div", "li > div", "div div") == ["li-div"] + assert pcss("div > div") == [] + assert pcss("div>.c", "div > .c") == ["first-ol"] + assert pcss("div + div") == ["foobar-div"] + assert pcss("a ~ a") == ["tag-anchor", "nofollow-anchor"] + assert pcss('a[rel="tag"] ~ a') == ["nofollow-anchor"] + assert pcss("ol#first-ol li:last-child") == ["seventh-li"] + assert pcss("ol#first-ol *:last-child") == ["li-div", "seventh-li"] + assert pcss("#outer-div:first-child") == ["outer-div"] + assert pcss("#outer-div :first-child") == [ + "name-anchor", + "first-li", + "li-div", + "p-b", + "checkbox-fieldset-disabled", + "area-href", + ] + assert pcss("a[href]") == ["tag-anchor", "nofollow-anchor"] + assert pcss(":not(*)") == [] + assert pcss("a:not([href])") == ["name-anchor"] + assert pcss("ol :Not(li[class])") == [ + "first-li", + "second-li", + "li-div", + "fifth-li", + "sixth-li", + "seventh-li", + ] + assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"] + assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"] + assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"] + assert pcss("ol.a.b.c > li.c:nth-child(3)") == ["third-li"] # Invalid characters in XPath element names, should not crash - assert pcss(r'di\a0 v', r'div\[') == [] - assert pcss(r'[h\a0 ref]', r'[h\]ref]') == [] + assert pcss(r"di\a0 v", r"div\[") == [] + assert pcss(r"[h\a0 ref]", r"[h\]ref]") == [] # HTML-specific - assert pcss(':link', html_only=True) == [ - 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] - assert pcss(':visited', html_only=True) == [] - assert pcss(':enabled', html_only=True) == [ - 'link-href', 'tag-anchor', 'nofollow-anchor', - 'checkbox-unchecked', 'text-checked', 'checkbox-checked', - 'area-href'] - assert pcss(':disabled', html_only=True) == [ - 'checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', - 'checkbox-fieldset-disabled'] - assert pcss(':checked', html_only=True) == [ - 'checkbox-checked', 'checkbox-disabled-checked'] + assert pcss(":link", html_only=True) == [ + "link-href", + "tag-anchor", + "nofollow-anchor", + "area-href", + ] + assert pcss(":visited", html_only=True) == [] + assert pcss(":enabled", html_only=True) == [ + "link-href", + "tag-anchor", + "nofollow-anchor", + "checkbox-unchecked", + "text-checked", + "checkbox-checked", + "area-href", + ] + assert pcss(":disabled", html_only=True) == [ + "checkbox-disabled", + "checkbox-disabled-checked", + "fieldset", + "checkbox-fieldset-disabled", + ] + assert pcss(":checked", html_only=True) == [ + "checkbox-checked", + "checkbox-disabled-checked", + ] def test_select_shakespeare(self): document = html.document_fromstring(HTML_SHAKESPEARE) - body = document.xpath('//body')[0] + body = document.xpath("//body")[0] css_to_xpath = GenericTranslator().css_to_xpath try: @@ -927,66 +925,67 @@ def count(selector): ## Changed from original; probably because I'm only ## searching the body. - #assert count('*') == 252 - assert count('*') == 246 - assert count('div:contains(CELIA)') == 26 - assert count('div:only-child') == 22 # ? - assert count('div:nth-child(even)') == 106 - assert count('div:nth-child(2n)') == 106 - assert count('div:nth-child(odd)') == 137 - assert count('div:nth-child(2n+1)') == 137 - assert count('div:nth-child(n)') == 243 - assert count('div:last-child') == 53 - assert count('div:first-child') == 51 - assert count('div > div') == 242 - assert count('div + div') == 190 - assert count('div ~ div') == 190 - assert count('body') == 1 - assert count('body div') == 243 - assert count('div') == 243 - assert count('div div') == 242 - assert count('div div div') == 241 - assert count('div, div, div') == 243 - assert count('div, a, span') == 243 - assert count('.dialog') == 51 - assert count('div.dialog') == 51 - assert count('div .dialog') == 51 - assert count('div.character, div.dialog') == 99 - assert count('div.direction.dialog') == 0 - assert count('div.dialog.direction') == 0 - assert count('div.dialog.scene') == 1 - assert count('div.scene.scene') == 1 - assert count('div.scene .scene') == 0 - assert count('div.direction .dialog ') == 0 - assert count('div .dialog .direction') == 4 - assert count('div.dialog .dialog .direction') == 4 - assert count('#speech5') == 1 - assert count('div#speech5') == 1 - assert count('div #speech5') == 1 - assert count('div.scene div.dialog') == 49 - assert count('div#scene1 div.dialog div') == 142 - assert count('#scene1 #speech1') == 1 - assert count('div[class]') == 103 - assert count('div[class=dialog]') == 50 - assert count('div[class^=dia]') == 51 - assert count('div[class$=log]') == 50 - assert count('div[class*=sce]') == 1 - assert count('div[class|=dialog]') == 50 # ? Seems right - assert count('div[class!=madeup]') == 243 # ? Seems right - assert count('div[class~=dialog]') == 51 # ? Seems right - assert count(':scope > div') == 1 - assert count(':scope > div > div[class=dialog]') == 1 - assert count(':scope > div div') == 242 - -OPERATOR_PRECEDENCE_IDS = ''' + # assert count('*') == 252 + assert count("*") == 246 + assert count("div:contains(CELIA)") == 26 + assert count("div:only-child") == 22 # ? + assert count("div:nth-child(even)") == 106 + assert count("div:nth-child(2n)") == 106 + assert count("div:nth-child(odd)") == 137 + assert count("div:nth-child(2n+1)") == 137 + assert count("div:nth-child(n)") == 243 + assert count("div:last-child") == 53 + assert count("div:first-child") == 51 + assert count("div > div") == 242 + assert count("div + div") == 190 + assert count("div ~ div") == 190 + assert count("body") == 1 + assert count("body div") == 243 + assert count("div") == 243 + assert count("div div") == 242 + assert count("div div div") == 241 + assert count("div, div, div") == 243 + assert count("div, a, span") == 243 + assert count(".dialog") == 51 + assert count("div.dialog") == 51 + assert count("div .dialog") == 51 + assert count("div.character, div.dialog") == 99 + assert count("div.direction.dialog") == 0 + assert count("div.dialog.direction") == 0 + assert count("div.dialog.scene") == 1 + assert count("div.scene.scene") == 1 + assert count("div.scene .scene") == 0 + assert count("div.direction .dialog ") == 0 + assert count("div .dialog .direction") == 4 + assert count("div.dialog .dialog .direction") == 4 + assert count("#speech5") == 1 + assert count("div#speech5") == 1 + assert count("div #speech5") == 1 + assert count("div.scene div.dialog") == 49 + assert count("div#scene1 div.dialog div") == 142 + assert count("#scene1 #speech1") == 1 + assert count("div[class]") == 103 + assert count("div[class=dialog]") == 50 + assert count("div[class^=dia]") == 51 + assert count("div[class$=log]") == 50 + assert count("div[class*=sce]") == 1 + assert count("div[class|=dialog]") == 50 # ? Seems right + assert count("div[class!=madeup]") == 243 # ? Seems right + assert count("div[class~=dialog]") == 51 # ? Seems right + assert count(":scope > div") == 1 + assert count(":scope > div > div[class=dialog]") == 1 + assert count(":scope > div div") == 242 + + +OPERATOR_PRECEDENCE_IDS = """ -''' +""" -XMLLANG_IDS = ''' +XMLLANG_IDS = """ a b @@ -998,9 +997,9 @@ def count(selector): -''' +""" -HTML_IDS = ''' +HTML_IDS = """ @@ -1049,10 +1048,10 @@ def count(selector):
-''' +""" -HTML_SHAKESPEARE = ''' +HTML_SHAKESPEARE = """ @@ -1361,8 +1360,8 @@ def count(selector): -''' +""" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From 89248d7537435eb3d10ebb3666fe423b1d11577d Mon Sep 17 00:00:00 2001 From: annbgn Date: Sun, 8 Aug 2021 13:41:53 +0300 Subject: [PATCH 154/208] support :where() pseudo class --- cssselect/parser.py | 34 ++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 9 +++++++++ tests/test_cssselect.py | 10 ++++++++++ 3 files changed, 53 insertions(+) diff --git a/cssselect/parser.py b/cssselect/parser.py index a27ece5..15f7139 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -284,6 +284,37 @@ def specificity(self): return max([x.specificity() for x in self.selector_list]) +class SpecificityAdjustment(object): + """ + Represents selector:where(selector_list) + Same as selector:is(selector_list), but its specificity is always 0 + """ + + def __init__(self, selector, selector_list): + self.selector = selector + self.selector_list = selector_list + + def __repr__(self): + return "%s[%r:where(%s)]" % ( + self.__class__.__name__, + self.selector, + ", ".join(map(repr, self.selector_list)), + ) + + def canonical(self): + selector_arguments = [] + for s in self.selector_list: + selarg = s.canonical() + selector_arguments.append(selarg.lstrip("*")) + return "%s:where(%s)" % ( + self.selector.canonical(), + ", ".join(map(str, selector_arguments)), + ) + + def specificity(self): + return 0, 0, 0 + + class Attrib(object): """ Represents selector[namespace|attrib operator value] @@ -585,6 +616,9 @@ def parse_simple_selector(stream, inside_negation=False): elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) + elif ident.lower() == "where": + selectors = parse_simple_selector_arguments(stream) + result = SpecificityAdjustment(result, selectors) else: result = Function(result, ident, parse_arguments(stream)) else: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f80e629..41aa7f9 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -284,6 +284,15 @@ def xpath_matching(self, matching): xpath.add_condition(e.condition, "or") return xpath + def xpath_specificityadjustment(self, matching): + xpath = self.xpath(matching.selector) + exprs = [self.xpath(selector) for selector in matching.selector_list] + for e in exprs: + e.add_name_test() + if e.condition: + xpath.add_condition(e.condition, "or") + return xpath + def xpath_function(self, function): """Translate a functional pseudo-class.""" method = "xpath_%s_function" % function.name.replace("-", "_") diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ba46d8a..e4e89bb 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -154,6 +154,10 @@ def parse_many(first, *others): assert parse_many(":is(:hover, :visited)") == [ "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]" ] + assert parse_many(":where(:hover, :visited)") == [ + "SpecificityAdjustment[Element[*]:where(Pseudo[Element[*]:hover]," + " Pseudo[Element[*]:visited])]" + ] assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"] assert parse_many(":scope > foo") == [ "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]" @@ -281,6 +285,7 @@ def specificity(css): assert specificity(":is(.foo, #bar)") == (1, 0, 0) assert specificity(":is(:hover, :visited)") == (0, 1, 0) + assert specificity(":where(:hover, :visited)") == (0, 0, 0) assert specificity("foo:empty") == (0, 1, 1) assert specificity("foo:before") == (0, 0, 2) @@ -317,6 +322,7 @@ def css2css(css, res=None): css2css(":not(#foo)") css2css(":is(#bar, .foo)") css2css(":is(:focused, :visited)") + css2css(":where(:focused, :visited)") css2css("foo:empty") css2css("foo::before") css2css("foo:empty::before") @@ -371,6 +377,8 @@ def get_error(css): assert get_error(":not(:not(a))") == ("Got nested :not()") assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function") assert get_error(":is(a b)") == ("Expected an argument, got ") + assert get_error(":where(:before)") == ("Got pseudo-element ::before inside function") + assert get_error(":where(a b)") == ("Expected an argument, got ") assert get_error(":scope > div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) @@ -469,6 +477,8 @@ def xpath(css): "e/following-sibling::f[count(preceding-sibling::*) = 2]" ) assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") + assert xpath("e:where(foo)") == "e[name() = 'foo']" + assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]" # Invalid characters in XPath element names assert xpath(r"di\a0 v") == (u("*[name() = 'di v']")) # di\xa0v From f564dfd93358ea55bbd55b75beeb71872a06ed12 Mon Sep 17 00:00:00 2001 From: annbgn <47499658+annbgn@users.noreply.github.com> Date: Wed, 18 Aug 2021 21:45:05 +0300 Subject: [PATCH 155/208] add support for :has() (#115) --- cssselect/parser.py | 62 +++++++++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 44 +++++++++++++++++++++++++++-- tests/test_cssselect.py | 27 ++++++++++++++++++ 3 files changed, 130 insertions(+), 3 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 15f7139..9c733f9 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -257,6 +257,41 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Relation(object): + """ + Represents selector:has(subselector) + """ + + def __init__(self, selector, combinator, subselector): + self.selector = selector + self.combinator = combinator + self.subselector = subselector + + def __repr__(self): + return "%s[%r:has(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) + + def canonical(self): + try: + subsel = self.subselector[0].canonical() + except TypeError: + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip("*") + return "%s:has(%s)" % (self.selector.canonical(), subsel) + + def specificity(self): + a1, b1, c1 = self.selector.specificity() + try: + a2, b2, c2 = self.subselector[-1].specificity() + except TypeError: + a2, b2, c2 = self.subselector.specificity() + return a1 + a2, b1 + b2, c1 + c2 + + class Matching(object): """ Represents selector:is(selector_list) @@ -613,6 +648,10 @@ def parse_simple_selector(stream, inside_negation=False): if next != ("DELIM", ")"): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() == "has": + combinator, arguments = parse_relative_selector(stream) + result = Relation(result, combinator, arguments) + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) @@ -641,6 +680,29 @@ def parse_arguments(stream): raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) +def parse_relative_selector(stream): + stream.skip_whitespace() + subselector = "" + next = stream.next() + + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next + stream.skip_whitespace() + next = stream.next() + else: + combinator = Token("DELIM", " ", pos=0) + + while 1: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + subselector += next.value + elif next == ("DELIM", ")"): + result = parse(subselector) + return combinator, result[0] + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + next = stream.next() + + def parse_simple_selector_arguments(stream): arguments = [] while 1: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 41aa7f9..3b68dd8 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import sys import re +import copy from cssselect.parser import parse, parse_series, SelectorError @@ -75,14 +76,21 @@ def add_star_prefix(self): """ self.path += "*/" - def join(self, combiner, other): + def join(self, combiner, other, closing_combiner=None, has_inner_condition=False): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path self.path = path - self.element = other.element - self.condition = other.condition + if not has_inner_condition: + self.element = other.element + closing_combiner if closing_combiner else other.element + self.condition = other.condition + else: + self.element = other.element + if other.condition: + self.element += "[" + other.condition + "]" + if closing_combiner: + self.element += closing_combiner return self @@ -275,6 +283,17 @@ def xpath_negation(self, negation): else: return xpath.add_condition("0") + def xpath_relation(self, relation): + xpath = self.xpath(relation.selector) + combinator = relation.combinator + subselector = relation.subselector + right = self.xpath(subselector.parsed_tree) + method = getattr( + self, + "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + ) + return method(xpath, right) + def xpath_matching(self, matching): xpath = self.xpath(matching.selector) exprs = [self.xpath(selector) for selector in matching.selector_list] @@ -385,6 +404,25 @@ def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) + def xpath_relation_descendant_combinator(self, left, right): + """right is a child, grand-child or further descendant of left; select left""" + return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) + + def xpath_relation_child_combinator(self, left, right): + """right is an immediate child of left; select left""" + return left.join("[./", right, closing_combiner="]") + + def xpath_relation_direct_adjacent_combinator(self, left, right): + """right is a sibling immediately after left; select left""" + xpath = left.add_condition( + "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + ) + return xpath + + def xpath_relation_indirect_adjacent_combinator(self, left, right): + """right is a sibling after left, immediately or not; select left""" + return left.join("[following-sibling::", right, closing_combiner="]") + # Function: dispatch by function/pseudo-class name def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index e4e89bb..cdb2446 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -148,6 +148,9 @@ def parse_many(first, *others): assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" ] + assert parse_many("div:has(div.foo)") == [ + "Relation[Element[div]:has(Selector[Class[Element[div].foo]])]" + ] assert parse_many("div:is(.foo, #bar)") == [ "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]" ] @@ -283,6 +286,11 @@ def specificity(css): assert specificity(":not(:empty)") == (0, 1, 0) assert specificity(":not(#foo)") == (1, 0, 0) + assert specificity(":has(*)") == (0, 0, 0) + assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(.foo)") == (0, 1, 0) + assert specificity(":has(> foo)") == (0, 0, 1) + assert specificity(":is(.foo, #bar)") == (1, 0, 0) assert specificity(":is(:hover, :visited)") == (0, 1, 0) assert specificity(":where(:hover, :visited)") == (0, 0, 0) @@ -320,6 +328,9 @@ def css2css(css, res=None): css2css(":not(*[foo])", ":not([foo])") css2css(":not(:empty)") css2css(":not(#foo)") + css2css(":has(*)") + css2css(":has(foo)") + css2css(":has(*.foo)", ":has(.foo)") css2css(":is(#bar, .foo)") css2css(":is(:focused, :visited)") css2css(":where(:focused, :visited)") @@ -387,6 +398,10 @@ def get_error(css): ) assert get_error("> div p") == ("Expected selector, got ' at 0>") + # Unsupported :has() with several arguments + assert get_error(":has(a, b)") == ("Expected an argument, got ") + assert get_error(":has()") == ("Expected selector, got ") + def test_translation(self): def xpath(css): return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) @@ -461,6 +476,16 @@ def xpath(css): assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches + assert ( + xpath("div:has(bar.foo)") == "div[descendant::bar" + "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]]" + ) + assert xpath("e:has(> f)") == "e[./f]" + assert xpath("e:has(f)") == "e[descendant::f]" + assert xpath("e:has(~ f)") == "e[following-sibling::f]" + assert ( + xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") assert xpath("e.warning") == ( @@ -873,6 +898,8 @@ def pcss(main, *selectors, **kwargs): "sixth-li", "seventh-li", ] + assert pcss("link:has(*)") == [] + assert pcss("ol:has(div)") == ["first-ol"] assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"] assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"] assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"] From 96e53a58bb1398a69fe2140855477d585a93803c Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Fri, 21 Oct 2022 15:34:51 +0500 Subject: [PATCH 156/208] Add support for Python 3.10 and 3.11 (#126) --- .github/workflows/tests.yml | 2 +- setup.py | 2 ++ tox.ini | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 799f52f..1288ee5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11.0-rc.2"] steps: - uses: actions/checkout@v2 diff --git a/setup.py b/setup.py index f95721d..4db698b 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,8 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], **extra_kwargs, ) diff --git a/tox.ini b/tox.ini index 372ecb9..c741f6f 100644 --- a/tox.ini +++ b/tox.ini @@ -14,6 +14,8 @@ commands = [testenv:black] deps = black==21.6b0 + # 8.1.0 breaks black < 22.3.0 + click==8.0.2 commands = black --check {posargs: cssselect setup.py tests} From 48bbfb1fb7c108bdbd501a3f81f8d37acc0426e0 Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Fri, 21 Oct 2022 18:37:38 +0500 Subject: [PATCH 157/208] Update tool versions (#127) --- cssselect/parser.py | 32 ++++++++++++++++---------------- cssselect/xpath.py | 6 +++--- pylintrc | 6 ++---- setup.py | 11 ++--------- tests/test_cssselect.py | 26 +++++++++++--------------- tox.ini | 8 +++----- 6 files changed, 37 insertions(+), 52 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9c733f9..97e146b 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -47,7 +47,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): #### Parsed objects -class Selector(object): +class Selector: """ Represents a parsed selector. @@ -118,7 +118,7 @@ def specificity(self): return a, b, c -class Class(object): +class Class: """ Represents selector.class_name """ @@ -139,7 +139,7 @@ def specificity(self): return a, b, c -class FunctionalPseudoElement(object): +class FunctionalPseudoElement: """ Represents selector::name(arguments) @@ -181,7 +181,7 @@ def specificity(self): return a, b, c -class Function(object): +class Function: """ Represents selector:name(expr) """ @@ -212,7 +212,7 @@ def specificity(self): return a, b, c -class Pseudo(object): +class Pseudo: """ Represents selector:ident """ @@ -233,7 +233,7 @@ def specificity(self): return a, b, c -class Negation(object): +class Negation: """ Represents selector:not(subselector) """ @@ -257,7 +257,7 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 -class Relation(object): +class Relation: """ Represents selector:has(subselector) """ @@ -292,7 +292,7 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 -class Matching(object): +class Matching: """ Represents selector:is(selector_list) """ @@ -316,10 +316,10 @@ def canonical(self): return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) def specificity(self): - return max([x.specificity() for x in self.selector_list]) + return max(x.specificity() for x in self.selector_list) -class SpecificityAdjustment(object): +class SpecificityAdjustment: """ Represents selector:where(selector_list) Same as selector:is(selector_list), but its specificity is always 0 @@ -350,7 +350,7 @@ def specificity(self): return 0, 0, 0 -class Attrib(object): +class Attrib: """ Represents selector[namespace|attrib operator value] """ @@ -397,7 +397,7 @@ def specificity(self): return a, b, c -class Element(object): +class Element: """ Represents namespace|element @@ -425,7 +425,7 @@ def specificity(self): return 0, 0, 0 -class Hash(object): +class Hash: """ Represents selector#id """ @@ -446,7 +446,7 @@ def specificity(self): return a, b, c -class CombinedSelector(object): +class CombinedSelector: def __init__(self, selector, combinator, subselector): assert selector is not None self.selector = selector @@ -621,7 +621,7 @@ def parse_simple_selector(stream, inside_negation=False): continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if result.__repr__() == "Pseudo[Element[*]:scope]": + if repr(result) == "Pseudo[Element[*]:scope]": if not ( len(stream.used) == 2 or (len(stream.used) == 3 and stream.used[0].type == "S") @@ -947,7 +947,7 @@ def tokenize(s): yield EOFToken(pos) -class TokenStream(object): +class TokenStream: def __init__(self, tokens, source=None): self.used = [] self.tokens = iter(tokens) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 3b68dd8..e644375 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -40,7 +40,7 @@ class ExpressionError(SelectorError, RuntimeError): #### XPath Helpers -class XPathExpr(object): +class XPathExpr: def __init__(self, path="", element="*", condition="", star_prefix=False): self.path = path self.element = element @@ -108,7 +108,7 @@ def join(self, combiner, other, closing_combiner=None, has_inner_condition=False #### Translation -class GenericTranslator(object): +class GenericTranslator: """ Translator for "generic" XML documents. @@ -760,7 +760,7 @@ def xpath_lang_function(self, xpath, function): def xpath_link_pseudo(self, xpath): return xpath.add_condition( - "@href and " "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" + "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) # Links are never visited, the implementation for :visited is the same diff --git a/pylintrc b/pylintrc index 7da580b..e35425e 100644 --- a/pylintrc +++ b/pylintrc @@ -3,9 +3,8 @@ persistent=no [MESSAGES CONTROL] disable=assignment-from-no-return, - bad-continuation, - bad-whitespace, c-extension-no-member, + consider-using-f-string, consider-using-in, fixme, inconsistent-return-statements, @@ -16,7 +15,6 @@ disable=assignment-from-no-return, multiple-imports, no-else-return, no-member, - no-self-use, raise-missing-from, redefined-builtin, redefined-outer-name, @@ -29,6 +27,6 @@ disable=assignment-from-no-return, too-many-statements, undefined-variable, unidiomatic-typecheck, + unspecified-encoding, unused-argument, unused-import, - useless-object-inheritance # Required for Python 2 support diff --git a/setup.py b/setup.py index 4db698b..cebc4c7 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,7 @@ import re import os.path -try: - from setuptools import setup - - extra_kwargs = {"test_suite": "cssselect.tests"} -except ImportError: - from distutils.core import setup - - extra_kwargs = {} +from setuptools import setup ROOT = os.path.dirname(__file__) @@ -33,6 +26,7 @@ url="https://github.com/scrapy/cssselect", license="BSD", packages=["cssselect"], + test_suite="cssselect.tests", python_requires=">=3.6", classifiers=[ "Development Status :: 4 - Beta", @@ -46,5 +40,4 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ], - **extra_kwargs, ) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index cdb2446..9dd1cf7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -37,7 +37,6 @@ def u(text): return text.decode("utf8") - else: # Python 3 def u(text): @@ -91,18 +90,15 @@ def parse_many(first, *others): assert parse_many("foo|bar") == ["Element[foo|bar]"] # This will never match, but it is valid: assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"] - assert ( - parse_many( - "div>.foo", - "div> .foo", - "div >.foo", - "div > .foo", - "div \n> \t \t .foo", - "div\r>\n\n\n.foo", - "div\f>\f.foo", - ) - == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] - ) + assert parse_many( + "div>.foo", + "div> .foo", + "div >.foo", + "div > .foo", + "div \n> \t \t .foo", + "div\r>\n\n\n.foo", + "div\f>\f.foo", + ) == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [ "Class[Element[td].foo]", "Class[Element[*].bar]", @@ -198,7 +194,7 @@ def test_pseudo_repr(css): result = parse(css) assert len(result) == 1 selector = result[0] - return selector.parsed_tree.__repr__() + return repr(selector.parsed_tree) assert parse_one("foo") == ("Element[foo]", None) assert parse_one("*") == ("Element[*]", None) @@ -258,7 +254,7 @@ def test_pseudo_repr(css): # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() - assert test_pseudo_repr(u":fİrst-child") == u"Pseudo[Element[*]:fİrst-child]" + assert test_pseudo_repr(":fİrst-child") == "Pseudo[Element[*]:fİrst-child]" assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self): diff --git a/tox.ini b/tox.ini index c741f6f..00898e2 100644 --- a/tox.ini +++ b/tox.ini @@ -13,22 +13,20 @@ commands = [testenv:black] deps = - black==21.6b0 - # 8.1.0 breaks black < 22.3.0 - click==8.0.2 + black==22.10.0 commands = black --check {posargs: cssselect setup.py tests} [testenv:flake8] deps = - flake8==3.9.2 + flake8==5.0.4 commands = flake8 {posargs: cssselect setup.py tests docs/conf.py} [testenv:pylint] deps = {[testenv]deps} - pylint==2.9.5 + pylint==2.15.3 commands = pylint {posargs: cssselect setup.py tests docs} From 9ba91e9f3c4a762167f1ad33e89b3ca85af8a501 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Oct 2022 16:12:02 +0600 Subject: [PATCH 158/208] Fix pylint and black issues. --- tests/test_cssselect.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 58be84e..589a081 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -710,14 +710,12 @@ def langid(selector): assert langid(":lang(es)") == [] def test_argument_types(self): - class CustomTranslator(GenericTranslator): - def __init__(self): self.argument_types = [] - def xpath_pseudo_element(self, xpath, function): - self.argument_types += function.argument_types() + def xpath_pseudo_element(self, xpath, pseudo_element): + self.argument_types += pseudo_element.argument_types() def argument_types(css): translator = CustomTranslator() @@ -725,13 +723,13 @@ def argument_types(css): return translator.argument_types mappings = ( - ('', []), - ('ident', ['IDENT']), - ('"string"', ['STRING']), - ('1', ['NUMBER']), + ("", []), + ("ident", ["IDENT"]), + ('"string"', ["STRING"]), + ("1", ["NUMBER"]), ) for argument_string, argument_list in mappings: - css = '::pseudo_element({})'.format(argument_string) + css = "::pseudo_element({})".format(argument_string) assert argument_types(css) == argument_list def test_select(self): From c9683b4f3f8453c8f8404e0e219ca1b23470d991 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Oct 2022 16:21:42 +0600 Subject: [PATCH 159/208] Add a CI action for docs. --- .github/workflows/checks.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index db380bb..6b4cc82 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -19,6 +19,9 @@ jobs: - python-version: 3 env: TOXENV: security + - python-version: 3 + env: + TOXENV: docs steps: - uses: actions/checkout@v2 From 65a1fa40a71fa25be7e31e124ebcf4c1e0b8fd07 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Oct 2022 16:27:56 +0600 Subject: [PATCH 160/208] Add support for recent sybil. --- docs/conftest.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/conftest.py b/docs/conftest.py index a98f9e5..9d16bb7 100644 --- a/docs/conftest.py +++ b/docs/conftest.py @@ -1,15 +1,19 @@ from doctest import ELLIPSIS, NORMALIZE_WHITESPACE from sybil import Sybil -from sybil.parsers.codeblock import CodeBlockParser from sybil.parsers.doctest import DocTestParser from sybil.parsers.skip import skip +try: + # sybil 3.0.0+ + from sybil.parsers.codeblock import PythonCodeBlockParser +except ImportError: + from sybil.parsers.codeblock import CodeBlockParser as PythonCodeBlockParser pytest_collect_file = Sybil( parsers=[ DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE), - CodeBlockParser(future_imports=['print_function']), + PythonCodeBlockParser(future_imports=['print_function']), skip, ], pattern='*.rst', From 72a922bd867dbd50efbc61d973f9df202091b367 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Oct 2022 17:20:11 +0600 Subject: [PATCH 161/208] Drop Python 3.6 support. --- .github/workflows/tests.yml | 2 +- README.rst | 1 + setup.py | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1288ee5..ca965bd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11.0-rc.2"] + python-version: [3.7, 3.8, 3.9, "3.10", "3.11.0-rc.2"] steps: - uses: actions/checkout@v2 diff --git a/README.rst b/README.rst index 9708616..d62b320 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,4 @@ + =================================== cssselect: CSS Selectors for Python =================================== diff --git a/setup.py b/setup.py index cebc4c7..f34a1a7 100644 --- a/setup.py +++ b/setup.py @@ -27,13 +27,12 @@ license="BSD", packages=["cssselect"], test_suite="cssselect.tests", - python_requires=">=3.6", + python_requires=">=3.7", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", From 3da92a4dd6a5a2712c72579b92ffb14176c4279d Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Tue, 25 Oct 2022 02:41:33 +0500 Subject: [PATCH 162/208] Enable flake8 for most files, remove py2 remnants (#128) --- .flake8 | 11 +++++----- cssselect/parser.py | 12 ++--------- cssselect/xpath.py | 18 ++++------------ tests/test_cssselect.py | 48 ++++++++++++----------------------------- 4 files changed, 25 insertions(+), 64 deletions(-) diff --git a/.flake8 b/.flake8 index 89e6e07..4315a12 100644 --- a/.flake8 +++ b/.flake8 @@ -1,15 +1,14 @@ [flake8] max-line-length = 99 -ignore = W503 +ignore = + W503 + E266 # too many leading '#' for block comment exclude = .git .tox venv* # pending revision - cssselect/__init__.py - cssselect/parser.py - cssselect/xpath.py docs/conf.py - setup.py - tests/test_cssselect.py +per-file-ignores = + cssselect/__init__.py:F401 diff --git a/cssselect/parser.py b/cssselect/parser.py index 584dfea..e166aa2 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -17,14 +17,6 @@ import operator -if sys.version_info[0] < 3: - _unicode = unicode - _unichr = unichr -else: - _unicode = str - _unichr = chr - - def ascii_lower(string): """Lower-case, but only in the ASCII range.""" return string.encode("utf8").lower().decode("utf8") @@ -617,7 +609,7 @@ def parse_simple_selector(stream, inside_negation=False): if ident.lower() in ("first-line", "first-letter", "before", "after"): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. - pseudo_element = _unicode(ident) + pseudo_element = str(ident) continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) @@ -876,7 +868,7 @@ def _replace_unicode(match): codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD - return _unichr(codepoint) + return chr(codepoint) def unescape_ident(value): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e644375..623b24c 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -12,21 +12,11 @@ """ -import sys import re -import copy from cssselect.parser import parse, parse_series, SelectorError -if sys.version_info[0] < 3: - _basestring = basestring - _unicode = unicode -else: - _basestring = str - _unicode = str - - def _unicode_safe_getattr(obj, name, default=None): # getattr() with a non-ASCII name fails on Python 2.x name = name.encode("ascii", "replace").decode("ascii") @@ -47,7 +37,7 @@ def __init__(self, path="", element="*", condition="", star_prefix=False): self.condition = condition def __str__(self): - path = _unicode(self.path) + _unicode(self.element) + path = str(self.path) + str(self.element) if self.condition: path += "[%s]" % self.condition return path @@ -77,7 +67,7 @@ def add_star_prefix(self): self.path += "*/" def join(self, combiner, other, closing_combiner=None, has_inner_condition=False): - path = _unicode(self) + combiner + path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path @@ -230,7 +220,7 @@ def selector_to_xpath( assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) - return (prefix or "") + _unicode(xpath) + return (prefix or "") + str(xpath) def xpath_pseudo_element(self, xpath, pseudo_element): """Translate a pseudo-element. @@ -243,7 +233,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): @staticmethod def xpath_literal(s): - s = _unicode(s) + s = str(s) if "'" not in s: s = "'%s'" % s elif '"' not in s: diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 693684b..d8521d6 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -28,33 +28,20 @@ SelectorSyntaxError, ExpressionError, ) -from cssselect.parser import tokenize, parse_series, _unicode, FunctionalPseudoElement +from cssselect.parser import tokenize, parse_series, FunctionalPseudoElement from cssselect.xpath import _unicode_safe_getattr, XPathExpr -if sys.version_info[0] < 3: - # Python 2 - def u(text): - return text.decode("utf8") - -else: - # Python 3 - def u(text): - return text - - class TestCssselect(unittest.TestCase): def test_tokenizer(self): - tokens = [ - _unicode(item) for item in tokenize(u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')) - ] + tokens = [str(item) for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')] assert tokens == [ - u(""), + "", "", "' at 5>", "", # the no-break space is not whitespace in CSS - u(""), # f\xa0 + "", # f\xa0 "", "", "", @@ -178,9 +165,9 @@ def parse_pseudo(css): result = [] for selector in parse(css): pseudo = selector.pseudo_element - pseudo = _unicode(pseudo) if pseudo else pseudo + pseudo = str(pseudo) if pseudo else pseudo # No Symbol here - assert pseudo is None or type(pseudo) is _unicode + assert pseudo is None or type(pseudo) is str selector = repr(selector.parsed_tree).replace("(u'", "('") result.append((selector, pseudo)) return result @@ -409,7 +396,7 @@ def get_error(css): def test_translation(self): def xpath(css): - return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) + return str(GenericTranslator().css_to_xpath(css, prefix="")) assert xpath("*") == "*" assert xpath("e") == "e" @@ -511,12 +498,12 @@ def xpath(css): assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]" # Invalid characters in XPath element names - assert xpath(r"di\a0 v") == (u("*[name() = 'di v']")) # di\xa0v + assert xpath(r"di\a0 v") == ("*[name() = 'di v']") # di\xa0v assert xpath(r"di\[v") == ("*[name() = 'di[v']") - assert xpath(r"[h\a0 ref]") == (u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref + assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") - self.assertRaises(ExpressionError, xpath, u(":fİrst-child")) + self.assertRaises(ExpressionError, xpath, ":fİrst-child") self.assertRaises(ExpressionError, xpath, ":first-of-type") self.assertRaises(ExpressionError, xpath, ":only-of-type") self.assertRaises(ExpressionError, xpath, ":last-of-type") @@ -531,11 +518,7 @@ def xpath(css): self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") def test_unicode(self): - if sys.version_info[0] < 3: - css = ".a\xc1b".decode("ISO-8859-1") - else: - css = ".a\xc1b" - + css = ".a\xc1b" xpath = GenericTranslator().css_to_xpath(css) assert css[1:] in xpath xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII") @@ -638,7 +621,7 @@ def xpath_first_or_second_pseudo(self, xpath): return xpath.add_condition("@id = 'first' or @id = 'second'") def xpath(css): - return _unicode(CustomTranslator().css_to_xpath(css)) + return str(CustomTranslator().css_to_xpath(css)) assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]" assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]" @@ -970,10 +953,7 @@ def test_select_shakespeare(self): body = document.xpath("//body")[0] css_to_xpath = GenericTranslator().css_to_xpath - try: - basestring_ = basestring - except NameError: - basestring_ = (str, bytes) + basestring_ = (str, bytes) def count(selector): xpath = css_to_xpath(selector) @@ -1425,7 +1405,7 @@ def count(selector): -""" +""" # noqa: W191,E101 if __name__ == "__main__": From 02eff5060f39b520e3820842883020c7b698e107 Mon Sep 17 00:00:00 2001 From: Pascal Corpet Date: Tue, 25 Oct 2022 11:50:32 +0200 Subject: [PATCH 163/208] Start typing: add mypy to tox and type parser module (#121) Co-authored-by: Andrey Rahmatullin --- .github/workflows/checks.yml | 3 + CHANGES | 7 + MANIFEST.in | 2 +- cssselect/__init__.py | 10 ++ cssselect/parser.py | 307 +++++++++++++++++++++-------------- cssselect/xpath.py | 228 ++++++++++++++++---------- docs/conf.py | 4 + py.typed | 0 tests/test_cssselect.py | 156 ++++++++++-------- tox.ini | 8 + 10 files changed, 448 insertions(+), 277 deletions(-) create mode 100644 py.typed diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 6b4cc82..85b51ce 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -22,6 +22,9 @@ jobs: - python-version: 3 env: TOXENV: docs + - python-version: 3 + env: + TOXENV: typing steps: - uses: actions/checkout@v2 diff --git a/CHANGES b/CHANGES index 4e7185f..12413df 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,13 @@ Changelog ========= +Version 1.x.x +------------- + +Unreleased. + +* Add type annotations (PEP 484 and PEP 561). + Version 1.1.0 ------------- diff --git a/MANIFEST.in b/MANIFEST.in index a367dc0..7fc2933 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc +include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc py.typed recursive-include docs * recursive-include tests * prune docs/_build diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 2e4f824..f9e200d 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -22,6 +22,16 @@ ) from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError +__all__ = ( + "ExpressionError", + "FunctionalPseudoElement", + "GenericTranslator", + "HTMLTranslator", + "parse", + "Selector", + "SelectorError", + "SelectorSyntaxError", +) VERSION = "1.1.0" __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index e166aa2..3a5ec15 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -15,9 +15,11 @@ import sys import re import operator +import typing +from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union -def ascii_lower(string): +def ascii_lower(string: str) -> str: """Lower-case, but only in the ASCII range.""" return string.encode("utf8").lower().decode("utf8") @@ -38,6 +40,21 @@ class SelectorSyntaxError(SelectorError, SyntaxError): #### Parsed objects +Tree = Union[ + "Element", + "Hash", + "Class", + "Function", + "Pseudo", + "Attrib", + "Negation", + "Relation", + "Matching", + "SpecificityAdjustment", + "CombinedSelector", +] +PseudoElement = Union["FunctionalPseudoElement", str] + class Selector: """ @@ -50,7 +67,7 @@ class Selector: """ - def __init__(self, tree, pseudo_element=None): + def __init__(self, tree: Tree, pseudo_element: Optional[PseudoElement] = None) -> None: self.parsed_tree = tree if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): pseudo_element = ascii_lower(pseudo_element) @@ -76,7 +93,7 @@ def __init__(self, tree, pseudo_element=None): #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement self.pseudo_element = pseudo_element - def __repr__(self): + def __repr__(self) -> str: if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: @@ -85,7 +102,7 @@ def __repr__(self): pseudo_element = "" return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) - def canonical(self): + def canonical(self) -> str: """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = "::%s" % self.pseudo_element.canonical() @@ -98,7 +115,7 @@ def canonical(self): res = res.lstrip("*") return res - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: """Return the specificity_ of this selector as a tuple of 3 integers. .. _specificity: http://www.w3.org/TR/selectors/#specificity @@ -115,17 +132,17 @@ class Class: Represents selector.class_name """ - def __init__(self, selector, class_name): + def __init__(self, selector: Tree, class_name: str) -> None: self.selector = selector self.class_name = class_name - def __repr__(self): + def __repr__(self) -> str: return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) - def canonical(self): + def canonical(self) -> str: return "%s.%s" % (self.selector.canonical(), self.class_name) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -149,41 +166,36 @@ class FunctionalPseudoElement: """ - def __init__(self, name, arguments): + def __init__(self, name: str, arguments: Sequence["Token"]): self.name = ascii_lower(name) self.arguments = arguments - def __repr__(self): + def __repr__(self) -> str: return "%s[::%s(%r)]" % ( self.__class__.__name__, self.name, [token.value for token in self.arguments], ) - def argument_types(self): + def argument_types(self) -> List[str]: return [token.type for token in self.arguments] - def canonical(self): + def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) return "%s(%s)" % (self.name, args) - def specificity(self): - a, b, c = self.selector.specificity() - b += 1 - return a, b, c - class Function: """ Represents selector:name(expr) """ - def __init__(self, selector, name, arguments): + def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:%s(%r)]" % ( self.__class__.__name__, self.selector, @@ -191,14 +203,14 @@ def __repr__(self): [token.value for token in self.arguments], ) - def argument_types(self): + def argument_types(self) -> List[str]: return [token.type for token in self.arguments] - def canonical(self): + def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -209,17 +221,17 @@ class Pseudo: Represents selector:ident """ - def __init__(self, selector, ident): + def __init__(self, selector: Tree, ident: str) -> None: self.selector = selector self.ident = ascii_lower(ident) - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) - def canonical(self): + def canonical(self) -> str: return "%s:%s" % (self.selector.canonical(), self.ident) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -230,20 +242,20 @@ class Negation: Represents selector:not(subselector) """ - def __init__(self, selector, subselector): + def __init__(self, selector: Tree, subselector: Tree) -> None: self.selector = selector self.subselector = subselector - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) - def canonical(self): + def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:not(%s)" % (self.selector.canonical(), subsel) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -254,31 +266,31 @@ class Relation: Represents selector:has(subselector) """ - def __init__(self, selector, combinator, subselector): + def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): self.selector = selector self.combinator = combinator self.subselector = subselector - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:has(%r)]" % ( self.__class__.__name__, self.selector, self.subselector, ) - def canonical(self): + def canonical(self) -> str: try: - subsel = self.subselector[0].canonical() + subsel = self.subselector[0].canonical() # type: ignore except TypeError: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: - a2, b2, c2 = self.subselector[-1].specificity() + a2, b2, c2 = self.subselector[-1].specificity() # type: ignore except TypeError: a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -289,25 +301,25 @@ class Matching: Represents selector:is(selector_list) """ - def __init__(self, selector, selector_list): + def __init__(self, selector: Tree, selector_list: Iterable[Tree]): self.selector = selector self.selector_list = selector_list - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:is(%s)]" % ( self.__class__.__name__, self.selector, ", ".join(map(repr, self.selector_list)), ) - def canonical(self): + def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -317,18 +329,18 @@ class SpecificityAdjustment: Same as selector:is(selector_list), but its specificity is always 0 """ - def __init__(self, selector, selector_list): + def __init__(self, selector: Tree, selector_list: List[Tree]): self.selector = selector self.selector_list = selector_list - def __repr__(self): + def __repr__(self) -> str: return "%s[%r:where(%s)]" % ( self.__class__.__name__, self.selector, ", ".join(map(repr, self.selector_list)), ) - def canonical(self): + def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() @@ -338,7 +350,7 @@ def canonical(self): ", ".join(map(str, selector_arguments)), ) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: return 0, 0, 0 @@ -347,14 +359,38 @@ class Attrib: Represents selector[namespace|attrib operator value] """ - def __init__(self, selector, namespace, attrib, operator, value): + @typing.overload + def __init__( + self, + selector: Tree, + namespace: Optional[str], + attrib: str, + operator: 'typing.Literal["exists"]', + value: None, + ) -> None: + ... + + @typing.overload + def __init__( + self, selector: Tree, namespace: Optional[str], attrib: str, operator: str, value: "Token" + ) -> None: + ... + + def __init__( + self, + selector: Tree, + namespace: Optional[str], + attrib: str, + operator: str, + value: Optional["Token"], + ) -> None: self.selector = selector self.namespace = namespace self.attrib = attrib self.operator = operator self.value = value - def __repr__(self): + def __repr__(self) -> str: if self.namespace: attrib = "%s|%s" % (self.namespace, self.attrib) else: @@ -367,10 +403,10 @@ def __repr__(self): self.selector, attrib, self.operator, - self.value.value, + typing.cast("Token", self.value).value, ) - def canonical(self): + def canonical(self) -> str: if self.namespace: attrib = "%s|%s" % (self.namespace, self.attrib) else: @@ -379,11 +415,11 @@ def canonical(self): if self.operator == "exists": op = attrib else: - op = "%s%s%s" % (attrib, self.operator, self.value.css()) + op = "%s%s%s" % (attrib, self.operator, typing.cast("Token", self.value).css()) return "%s[%s]" % (self.selector.canonical(), op) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -397,20 +433,20 @@ class Element: """ - def __init__(self, namespace=None, element=None): + def __init__(self, namespace: Optional[str] = None, element: Optional[str] = None) -> None: self.namespace = namespace self.element = element - def __repr__(self): + def __repr__(self) -> str: return "%s[%s]" % (self.__class__.__name__, self.canonical()) - def canonical(self): + def canonical(self) -> str: element = self.element or "*" if self.namespace: element = "%s|%s" % (self.namespace, element) return element - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: if self.element: return 0, 0, 1 else: @@ -422,43 +458,43 @@ class Hash: Represents selector#id """ - def __init__(self, selector, id): + def __init__(self, selector: Tree, id: str) -> None: self.selector = selector self.id = id - def __repr__(self): + def __repr__(self) -> str: return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) - def canonical(self): + def canonical(self) -> str: return "%s#%s" % (self.selector.canonical(), self.id) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a, b, c = self.selector.specificity() a += 1 return a, b, c class CombinedSelector: - def __init__(self, selector, combinator, subselector): + def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: assert selector is not None self.selector = selector self.combinator = combinator self.subselector = subselector - def __repr__(self): + def __repr__(self) -> str: if self.combinator == " ": comb = "" else: comb = self.combinator return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) - def canonical(self): + def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) - def specificity(self): + def specificity(self) -> Tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -476,7 +512,7 @@ def specificity(self): _class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") -def parse(css): +def parse(css: str) -> List[Selector]: """Parse a CSS *group of selectors*. If you don't care about pseudo-elements or selector specificity, @@ -516,7 +552,7 @@ def parse(css): # raise -def parse_selector_group(stream): +def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) @@ -527,7 +563,7 @@ def parse_selector_group(stream): break -def parse_selector(stream): +def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() @@ -540,7 +576,7 @@ def parse_selector(stream): ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = stream.next().value + combinator = typing.cast(str, stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -551,7 +587,9 @@ def parse_selector(stream): return result, pseudo_element -def parse_simple_selector(stream, inside_negation=False): +def parse_simple_selector( + stream: "TokenStream", inside_negation: bool = False +) -> Tuple[Tree, Optional[PseudoElement]]: stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() @@ -569,8 +607,8 @@ def parse_simple_selector(stream, inside_negation=False): namespace = None else: element = namespace = None - result = Element(namespace, element) - pseudo_element = None + result: Tree = Element(namespace, element) + pseudo_element: Optional[PseudoElement] = None while 1: peek = stream.peek() if ( @@ -584,7 +622,7 @@ def parse_simple_selector(stream, inside_negation=False): "Got pseudo-element ::%s not at the end of a selector" % pseudo_element ) if peek.type == "HASH": - result = Hash(result, stream.next().value) + result = Hash(result, typing.cast(str, stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -665,8 +703,8 @@ def parse_simple_selector(stream, inside_negation=False): return result, pseudo_element -def parse_arguments(stream): - arguments = [] +def parse_arguments(stream: "TokenStream") -> List["Token"]: + arguments: List["Token"] = [] while 1: stream.skip_whitespace() next = stream.next() @@ -678,7 +716,7 @@ def parse_arguments(stream): raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) -def parse_relative_selector(stream): +def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: stream.skip_whitespace() subselector = "" next = stream.next() @@ -692,7 +730,7 @@ def parse_relative_selector(stream): while 1: if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: - subselector += next.value + subselector += typing.cast(str, next.value) elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] @@ -701,7 +739,7 @@ def parse_relative_selector(stream): next = stream.next() -def parse_simple_selector_arguments(stream): +def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: arguments = [] while 1: result, pseudo_element = parse_simple_selector(stream, True) @@ -723,11 +761,13 @@ def parse_simple_selector_arguments(stream): return arguments -def parse_attrib(selector, stream): +def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) + namespace: Optional[str] + op: Optional[str] if stream.peek() == ("DELIM", "|"): stream.next() if stream.peek() == ("DELIM", "="): @@ -744,11 +784,11 @@ def parse_attrib(selector, stream): stream.skip_whitespace() next = stream.next() if next == ("DELIM", "]"): - return Attrib(selector, namespace, attrib, "exists", None) + return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) elif next == ("DELIM", "="): op = "=" elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): - op = next.value + "=" + op = typing.cast(str, next.value) + "=" stream.next() else: raise SelectorSyntaxError("Operator expected, got %s" % (next,)) @@ -760,10 +800,10 @@ def parse_attrib(selector, stream): next = stream.next() if next != ("DELIM", "]"): raise SelectorSyntaxError("Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, attrib, op, value) + return Attrib(selector, namespace, typing.cast(str, attrib), op, value) -def parse_series(tokens): +def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: """ Parses the arguments for :nth-child() and friends. @@ -774,7 +814,7 @@ def parse_series(tokens): for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(token.value for token in tokens).strip() + s = "".join(typing.cast(str, token.value) for token in tokens).strip() if s == "odd": return 2, 1 elif s == "even": @@ -785,49 +825,71 @@ def parse_series(tokens): # Just b return 0, int(s) a, b = s.split("n", 1) + a_as_int: int if not a: - a = 1 + a_as_int = 1 elif a == "-" or a == "+": - a = int(a + "1") + a_as_int = int(a + "1") else: - a = int(a) + a_as_int = int(a) + b_as_int: int if not b: - b = 0 + b_as_int = 0 else: - b = int(b) - return a, b + b_as_int = int(b) + return a_as_int, b_as_int #### Token objects -class Token(tuple): - def __new__(cls, type_, value, pos): +class Token(Tuple[str, Optional[str]]): + @typing.overload + def __new__( + cls, + type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', + value: str, + pos: int, + ) -> "Token": + ... + + @typing.overload + def __new__(cls, type_: 'typing.Literal["EOF"]', value: None, pos: int) -> "Token": + ... + + def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj - def __repr__(self): + def __repr__(self) -> str: return "<%s '%s' at %i>" % (self.type, self.value, self.pos) - def is_delim(self, *values): + def is_delim(self, *values: str) -> bool: return self.type == "DELIM" and self.value in values - type = property(operator.itemgetter(0)) - value = property(operator.itemgetter(1)) + pos: int + + @property + def type(self) -> str: + return self[0] - def css(self): + @property + def value(self) -> Optional[str]: + return self[1] + + def css(self) -> str: if self.type == "STRING": return repr(self.value) else: - return self.value + return typing.cast(str, self.value) class EOFToken(Token): - def __new__(cls, pos): - return Token.__new__(cls, "EOF", None, pos) + def __new__(cls, pos: int) -> "EOFToken": + return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) - def __repr__(self): + def __repr__(self) -> str: return "<%s at %i>" % (self.type, self.pos) @@ -843,7 +905,16 @@ class TokenMacros: nmstart = "[_a-z]|%s|%s" % (escape, nonascii) -def _compile(pattern): +if typing.TYPE_CHECKING: + + class MatchFunc(typing.Protocol): + def __call__( + self, string: str, pos: int = ..., endpos: int = ... + ) -> Optional["re.Match[str]"]: + ... + + +def _compile(pattern: str) -> "MatchFunc": return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match @@ -864,20 +935,20 @@ def _compile(pattern): _replace_simple = operator.methodcaller("group", 1) -def _replace_unicode(match): +def _replace_unicode(match: "re.Match[str]") -> str: codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD return chr(codepoint) -def unescape_ident(value): +def unescape_ident(value: str) -> str: value = _sub_unicode_escape(_replace_unicode, value) value = _sub_simple_escape(_replace_simple, value) return value -def tokenize(s): +def tokenize(s: str) -> Iterator[Token]: pos = 0 len_s = len(s) while pos < len_s: @@ -946,41 +1017,37 @@ def tokenize(s): class TokenStream: - def __init__(self, tokens, source=None): - self.used = [] + def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: + self.used: List[Token] = [] self.tokens = iter(tokens) self.source = source - self.peeked = None + self.peeked: Optional[Token] = None self._peeking = False - try: - self.next_token = self.tokens.next - except AttributeError: - # Python 3 - self.next_token = self.tokens.__next__ + self.next_token = self.tokens.__next__ - def next(self): + def next(self) -> Token: if self._peeking: self._peeking = False - self.used.append(self.peeked) - return self.peeked + self.used.append(typing.cast(Token, self.peeked)) + return typing.cast(Token, self.peeked) else: next = self.next_token() self.used.append(next) return next - def peek(self): + def peek(self) -> Token: if not self._peeking: self.peeked = self.next_token() self._peeking = True - return self.peeked + return typing.cast(Token, self.peeked) - def next_ident(self): + def next_ident(self) -> str: next = self.next() if next.type != "IDENT": raise SelectorSyntaxError("Expected ident, got %s" % (next,)) - return next.value + return typing.cast(str, next.value) - def next_ident_or_star(self): + def next_ident_or_star(self) -> Optional[str]: next = self.next() if next.type == "IDENT": return next.value @@ -989,7 +1056,7 @@ def next_ident_or_star(self): else: raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) - def skip_whitespace(self): + def skip_whitespace(self) -> None: peek = self.peek() if peek.type == "S": self.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 623b24c..2d1ce37 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -13,14 +13,28 @@ """ import re - -from cssselect.parser import parse, parse_series, SelectorError - - -def _unicode_safe_getattr(obj, name, default=None): - # getattr() with a non-ASCII name fails on Python 2.x - name = name.encode("ascii", "replace").decode("ascii") - return getattr(obj, name, default) +import typing +from typing import Optional + +from cssselect.parser import ( + parse, + parse_series, + PseudoElement, + Selector, + SelectorError, + Tree, + Element, + Hash, + Class, + Function, + Pseudo, + Attrib, + Negation, + Relation, + Matching, + SpecificityAdjustment, + CombinedSelector, +) class ExpressionError(SelectorError, RuntimeError): @@ -31,42 +45,50 @@ class ExpressionError(SelectorError, RuntimeError): class XPathExpr: - def __init__(self, path="", element="*", condition="", star_prefix=False): + def __init__( + self, path: str = "", element: str = "*", condition: str = "", star_prefix: bool = False + ) -> None: self.path = path self.element = element self.condition = condition - def __str__(self): + def __str__(self) -> str: path = str(self.path) + str(self.element) if self.condition: path += "[%s]" % self.condition return path - def __repr__(self): + def __repr__(self) -> str: return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition, conjuction="and"): + def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": if self.condition: self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: self.condition = condition return self - def add_name_test(self): + def add_name_test(self) -> None: if self.element == "*": # We weren't doing a test anyway return self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element)) self.element = "*" - def add_star_prefix(self): + def add_star_prefix(self) -> None: """ Append '*/' to the path to keep the context constrained to a single parent. """ self.path += "*/" - def join(self, combiner, other, closing_combiner=None, has_inner_condition=False): + def join( + self, + combiner: str, + other: "XPathExpr", + closing_combiner: Optional[str] = None, + has_inner_condition: bool = False, + ) -> "XPathExpr": path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": @@ -166,7 +188,7 @@ class GenericTranslator: # class used to represent and xpath expression xpathexpr_cls = XPathExpr - def css_to_xpath(self, css, prefix="descendant-or-self::"): + def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: """Translate a *group of selectors* to XPath. Pseudo-elements are not supported here since XPath only knows @@ -191,8 +213,11 @@ def css_to_xpath(self, css, prefix="descendant-or-self::"): ) def selector_to_xpath( - self, selector, prefix="descendant-or-self::", translate_pseudo_elements=False - ): + self, + selector: Selector, + prefix: str = "descendant-or-self::", + translate_pseudo_elements: bool = False, + ) -> str: """Translate a parsed selector to XPath. @@ -222,7 +247,7 @@ def selector_to_xpath( xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) return (prefix or "") + str(xpath) - def xpath_pseudo_element(self, xpath, pseudo_element): + def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) -> XPathExpr: """Translate a pseudo-element. Defaults to not supporting pseudo-elements at all, @@ -232,7 +257,7 @@ def xpath_pseudo_element(self, xpath, pseudo_element): raise ExpressionError("Pseudo-elements are not supported.") @staticmethod - def xpath_literal(s): + def xpath_literal(s: str) -> str: s = str(s) if "'" not in s: s = "'%s'" % s @@ -248,23 +273,25 @@ def xpath_literal(s): ) return s - def xpath(self, parsed_selector): + def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ method = getattr(self, "xpath_%s" % type_name.lower(), None) if method is None: raise ExpressionError("%s is not supported." % type_name) - return method(parsed_selector) + return typing.cast(XPathExpr, method(parsed_selector)) # Dispatched by parsed object type - def xpath_combinedselector(self, combined): + def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] method = getattr(self, "xpath_%s_combinator" % combinator) - return method(self.xpath(combined.selector), self.xpath(combined.subselector)) + return typing.cast( + XPathExpr, method(self.xpath(combined.selector), self.xpath(combined.subselector)) + ) - def xpath_negation(self, negation): + def xpath_negation(self, negation: Negation) -> XPathExpr: xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() @@ -273,18 +300,19 @@ def xpath_negation(self, negation): else: return xpath.add_condition("0") - def xpath_relation(self, relation): + def xpath_relation(self, relation: Relation) -> XPathExpr: xpath = self.xpath(relation.selector) combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) method = getattr( self, - "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + "xpath_relation_%s_combinator" + % self.combinator_mapping[typing.cast(str, combinator.value)], ) - return method(xpath, right) + return typing.cast(XPathExpr, method(xpath, right)) - def xpath_matching(self, matching): + def xpath_matching(self, matching: Matching) -> XPathExpr: xpath = self.xpath(matching.selector) exprs = [self.xpath(selector) for selector in matching.selector_list] for e in exprs: @@ -293,7 +321,7 @@ def xpath_matching(self, matching): xpath.add_condition(e.condition, "or") return xpath - def xpath_specificityadjustment(self, matching): + def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr: xpath = self.xpath(matching.selector) exprs = [self.xpath(selector) for selector in matching.selector_list] for e in exprs: @@ -302,24 +330,24 @@ def xpath_specificityadjustment(self, matching): xpath.add_condition(e.condition, "or") return xpath - def xpath_function(self, function): + def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" - method = "xpath_%s_function" % function.name.replace("-", "_") - method = _unicode_safe_getattr(self, method, None) + method_name = "xpath_%s_function" % function.name.replace("-", "_") + method = getattr(self, method_name, None) if not method: raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) - return method(self.xpath(function.selector), function) + return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) - def xpath_pseudo(self, pseudo): + def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" - method = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = _unicode_safe_getattr(self, method, None) + method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") + method = getattr(self, method_name, None) if not method: # TODO: better error message for pseudo-elements? raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) - return method(self.xpath(pseudo.selector)) + return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) - def xpath_attrib(self, selector): + def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] method = getattr(self, "xpath_attrib_%s" % operator) @@ -338,37 +366,37 @@ def xpath_attrib(self, selector): if selector.value is None: value = None elif self.lower_case_attribute_values: - value = selector.value.value.lower() + value = typing.cast(str, selector.value.value).lower() else: value = selector.value.value - return method(self.xpath(selector.selector), attrib, value) + return typing.cast(XPathExpr, method(self.xpath(selector.selector), attrib, value)) - def xpath_class(self, class_selector): + def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" # .foo is defined as [class~=foo] in the spec. xpath = self.xpath(class_selector.selector) return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name) - def xpath_hash(self, id_selector): + def xpath_hash(self, id_selector: Hash) -> XPathExpr: """Translate an ID selector.""" xpath = self.xpath(id_selector.selector) return self.xpath_attrib_equals(xpath, "@id", id_selector.id) - def xpath_element(self, selector): + def xpath_element(self, selector: Element) -> XPathExpr: """Translate a type or universal selector.""" element = selector.element if not element: element = "*" safe = True else: - safe = is_safe_name(element) + safe = bool(is_safe_name(element)) if self.lower_case_element_names: element = element.lower() if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes element = "%s:%s" % (selector.namespace, element) - safe = safe and is_safe_name(selector.namespace) + safe = safe and bool(is_safe_name(selector.namespace)) xpath = self.xpathexpr_cls(element=element) if not safe: xpath.add_name_test() @@ -376,46 +404,52 @@ def xpath_element(self, selector): # CombinedSelector: dispatch by combinator - def xpath_descendant_combinator(self, left, right): + def xpath_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is a child, grand-child or further descendant of left""" return left.join("/descendant-or-self::*/", right) - def xpath_child_combinator(self, left, right): + def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is an immediate child of left""" return left.join("/", right) - def xpath_direct_adjacent_combinator(self, left, right): + def xpath_direct_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is a sibling immediately after left""" xpath = left.join("/following-sibling::", right) xpath.add_name_test() return xpath.add_condition("position() = 1") - def xpath_indirect_adjacent_combinator(self, left, right): + def xpath_indirect_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) - def xpath_relation_descendant_combinator(self, left, right): + def xpath_relation_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is a child, grand-child or further descendant of left; select left""" return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) - def xpath_relation_child_combinator(self, left, right): + def xpath_relation_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: """right is an immediate child of left; select left""" return left.join("[./", right, closing_combiner="]") - def xpath_relation_direct_adjacent_combinator(self, left, right): + def xpath_relation_direct_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling immediately after left; select left""" xpath = left.add_condition( "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) ) return xpath - def xpath_relation_indirect_adjacent_combinator(self, left, right): + def xpath_relation_indirect_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling after left, immediately or not; select left""" return left.join("[following-sibling::", right, closing_combiner="]") # Function: dispatch by function/pseudo-class name - def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): + def xpath_nth_child_function( + self, xpath: XPathExpr, function: Function, last: bool = False, add_name_test: bool = True + ) -> XPathExpr: try: a, b = parse_series(function.arguments) except ValueError: @@ -534,8 +568,8 @@ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=Tr b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg = "+%s" % b_neg - left = "(%s %s)" % (left, b_neg) + b_neg_as_str = "+%s" % b_neg + left = "(%s %s)" % (left, b_neg_as_str) expressions.append("%s mod %s = 0" % (left, a)) @@ -546,40 +580,40 @@ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=Tr xpath.add_condition(" and ".join(template % expression for expression in expressions)) return xpath - def xpath_nth_last_child_function(self, xpath, function): + def xpath_nth_last_child_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: return self.xpath_nth_child_function(xpath, function, last=True) - def xpath_nth_of_type_function(self, xpath, function): + def xpath_nth_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") return self.xpath_nth_child_function(xpath, function, add_name_test=False) - def xpath_nth_last_of_type_function(self, xpath, function): + def xpath_nth_last_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False) - def xpath_contains_function(self, xpath, function): + def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: # Defined there, removed in later drafts: # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( "Expected a single string or ident for :contains(), got %r" % function.arguments ) - value = function.arguments[0].value + value = typing.cast(str, function.arguments[0].value) return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) - def xpath_lang_function(self, xpath, function): + def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( "Expected a single string or ident for :lang(), got %r" % function.arguments ) - value = function.arguments[0].value + value = typing.cast(str, function.arguments[0].value) return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name - def xpath_root_pseudo(self, xpath): + def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("not(parent::*)") # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div") @@ -587,37 +621,37 @@ def xpath_root_pseudo(self, xpath): # Needed to get immediate children of a processed selector in Scrapy # for product in response.css('.product'): # description = product.css(':scope > div::text').get() - def xpath_scope_pseudo(self, xpath): + def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("1") - def xpath_first_child_pseudo(self, xpath): + def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(preceding-sibling::*) = 0") - def xpath_last_child_pseudo(self, xpath): + def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(following-sibling::*) = 0") - def xpath_first_of_type_pseudo(self, xpath): + def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:first-of-type is not implemented") return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) - def xpath_last_of_type_pseudo(self, xpath): + def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:last-of-type is not implemented") return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) - def xpath_only_child_pseudo(self, xpath): + def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(parent::*/child::*) = 1") - def xpath_only_of_type_pseudo(self, xpath): + def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:only-of-type is not implemented") return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) - def xpath_empty_pseudo(self, xpath): + def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("not(*) and not(string-length())") - def pseudo_never_matches(self, xpath): + def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: """Common implementation for pseudo-classes that never match.""" return xpath.add_condition("0") @@ -633,16 +667,20 @@ def pseudo_never_matches(self, xpath): # Attrib: dispatch by attribute operator - def xpath_attrib_exists(self, xpath, name, value): + def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: assert not value xpath.add_condition(name) return xpath - def xpath_attrib_equals(self, xpath, name, value): + def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: + assert value xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath - def xpath_attrib_different(self, xpath, name, value): + def xpath_attrib_different( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: + assert value # FIXME: this seems like a weird hack... if value: xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) @@ -650,8 +688,10 @@ def xpath_attrib_different(self, xpath, name, value): xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) return xpath - def xpath_attrib_includes(self, xpath, name, value): - if is_non_whitespace(value): + def xpath_attrib_includes( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: + if value and is_non_whitespace(value): xpath.add_condition( "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" % (name, name, self.xpath_literal(" " + value + " ")) @@ -660,7 +700,10 @@ def xpath_attrib_includes(self, xpath, name, value): xpath.add_condition("0") return xpath - def xpath_attrib_dashmatch(self, xpath, name, value): + def xpath_attrib_dashmatch( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: + assert value # Weird, but true... xpath.add_condition( "%s and (%s = %s or starts-with(%s, %s))" @@ -668,7 +711,9 @@ def xpath_attrib_dashmatch(self, xpath, name, value): ) return xpath - def xpath_attrib_prefixmatch(self, xpath, name, value): + def xpath_attrib_prefixmatch( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: if value: xpath.add_condition( "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) @@ -677,7 +722,9 @@ def xpath_attrib_prefixmatch(self, xpath, name, value): xpath.add_condition("0") return xpath - def xpath_attrib_suffixmatch(self, xpath, name, value): + def xpath_attrib_suffixmatch( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( @@ -688,7 +735,9 @@ def xpath_attrib_suffixmatch(self, xpath, name, value): xpath.add_condition("0") return xpath - def xpath_attrib_substringmatch(self, xpath, name, value): + def xpath_attrib_substringmatch( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: if value: # Attribute selectors are case sensitive xpath.add_condition( @@ -718,14 +767,14 @@ class HTMLTranslator(GenericTranslator): lang_attribute = "lang" - def __init__(self, xhtml=False): + def __init__(self, xhtml: bool = False) -> None: self.xhtml = xhtml # Might be useful for sub-classes? if not xhtml: # See their definition in GenericTranslator. self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath): + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -734,12 +783,13 @@ def xpath_checked_pseudo(self, xpath): "and (@type = 'checkbox' or @type = 'radio'))" ) - def xpath_lang_function(self, xpath, function): + def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( "Expected a single string or ident for :lang(), got %r" % function.arguments ) value = function.arguments[0].value + assert value return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... @@ -748,7 +798,7 @@ def xpath_lang_function(self, xpath, function): "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) ) - def xpath_link_pseudo(self, xpath): + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -756,7 +806,7 @@ def xpath_link_pseudo(self, xpath): # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath): + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -786,7 +836,7 @@ def xpath_disabled_pseudo(self, xpath): # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath): + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/docs/conf.py b/docs/conf.py index 19730c1..d63672f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -253,3 +253,7 @@ # --- Nitpicking options ------------------------------------------------------ nitpicky = True +nitpick_ignore = [ + # explicitly not a part of the public API + ('py:class', 'cssselect.parser.Token'), +] diff --git a/py.typed b/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index d8521d6..2c9e94c 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -18,7 +18,9 @@ """ import sys +import typing import unittest +from typing import List, Optional, Sequence, Tuple from lxml import etree, html from cssselect import ( @@ -28,12 +30,19 @@ SelectorSyntaxError, ExpressionError, ) -from cssselect.parser import tokenize, parse_series, FunctionalPseudoElement -from cssselect.xpath import _unicode_safe_getattr, XPathExpr +from cssselect.parser import ( + tokenize, + parse_series, + PseudoElement, + FunctionalPseudoElement, + Function, + Token, +) +from cssselect.xpath import XPathExpr class TestCssselect(unittest.TestCase): - def test_tokenizer(self): + def test_tokenizer(self) -> None: tokens = [str(item) for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')] assert tokens == [ "", @@ -56,14 +65,14 @@ def test_tokenizer(self): "", ] - def test_parser(self): - def repr_parse(css): + def test_parser(self) -> None: + def repr_parse(css: str) -> List[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] - def parse_many(first, *others): + def parse_many(first: str, *others: str) -> List[str]: result = repr_parse(first) for other in others: assert repr_parse(other) == result @@ -160,24 +169,24 @@ def parse_many(first, *others): "Hash[Element[*]#foo]] Hash[Element[*]#bar]]" ] - def test_pseudo_elements(self): - def parse_pseudo(css): - result = [] + def test_pseudo_elements(self) -> None: + def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: + result: List[Tuple[str, Optional[str]]] = [] for selector in parse(css): pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo # No Symbol here assert pseudo is None or type(pseudo) is str - selector = repr(selector.parsed_tree).replace("(u'", "('") - result.append((selector, pseudo)) + selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") + result.append((selector_as_str, pseudo)) return result - def parse_one(css): + def parse_one(css: str) -> Tuple[str, Optional[str]]: result = parse_pseudo(css) assert len(result) == 1 return result[0] - def test_pseudo_repr(css): + def test_pseudo_repr(css: str) -> str: result = parse(css) assert len(result) == 1 selector = result[0] @@ -252,8 +261,8 @@ def test_pseudo_repr(css): assert test_pseudo_repr(":fİrst-child") == "Pseudo[Element[*]:fİrst-child]" assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" - def test_specificity(self): - def specificity(css): + def test_specificity(self) -> None: + def specificity(css: str) -> Tuple[int, int, int]: selectors = parse(css) assert len(selectors) == 1 return selectors[0].specificity() @@ -294,8 +303,8 @@ def specificity(css): assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3) - def test_css_export(self): - def css2css(css, res=None): + def test_css_export(self) -> None: + def css2css(css: str, res: Optional[str] = None) -> None: selectors = parse(css) assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) @@ -333,13 +342,14 @@ def css2css(css, res=None): css2css("#lorem + foo#ipsum:first-child > bar::first-line") css2css("foo > *") - def test_parse_errors(self): - def get_error(css): + def test_parse_errors(self) -> None: + def get_error(css: str) -> Optional[str]: try: parse(css) except SelectorSyntaxError: # Py2, Py3, ... return str(sys.exc_info()[1]).replace("(u'", "('") + return None assert get_error("attributes(href)/html/body/a") == ( "Expected selector, got " @@ -394,8 +404,8 @@ def get_error(css): assert get_error(":has(a, b)") == ("Expected an argument, got ") assert get_error(":has()") == ("Expected selector, got ") - def test_translation(self): - def xpath(css): + def test_translation(self) -> None: + def xpath(css: str) -> str: return str(GenericTranslator().css_to_xpath(css, prefix="")) assert xpath("*") == "*" @@ -517,7 +527,7 @@ def xpath(css): self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") - def test_unicode(self): + def test_unicode(self) -> None: css = ".a\xc1b" xpath = GenericTranslator().css_to_xpath(css) assert css[1:] in xpath @@ -527,7 +537,7 @@ def test_unicode(self): "concat(' ', normalize-space(@class), ' '), ' aÁb ')]" ) - def test_quoting(self): + def test_quoting(self) -> None: css_to_xpath = GenericTranslator().css_to_xpath assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""") assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""") @@ -537,7 +547,7 @@ def test_quoting(self): "descendant-or-self::*[1]/div[@dataimg = '']" ) - def test_unicode_escapes(self): + def test_unicode_escapes(self) -> None: # \22 == '"' \20 == ' ' css_to_xpath = GenericTranslator().css_to_xpath assert css_to_xpath(r'*[aval="\'\22\'"]') == ( @@ -553,22 +563,26 @@ def test_unicode_escapes(self): """descendant-or-self::*[@aval = "' '"]""" ) - def test_xpath_pseudo_elements(self): + def test_xpath_pseudo_elements(self) -> None: class CustomTranslator(GenericTranslator): - def xpath_pseudo_element(self, xpath, pseudo_element): + def xpath_pseudo_element( + self, xpath: XPathExpr, pseudo_element: PseudoElement + ) -> XPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): - method = "xpath_%s_functional_pseudo_element" % ( + method_name = "xpath_%s_functional_pseudo_element" % ( pseudo_element.name.replace("-", "_") ) - method = _unicode_safe_getattr(self, method, None) + method = getattr(self, method_name, None) if not method: raise ExpressionError( "The functional pseudo-element ::%s() is unknown" % pseudo_element.name ) xpath = method(xpath, pseudo_element.arguments) else: - method = "xpath_%s_simple_pseudo_element" % (pseudo_element.replace("-", "_")) - method = _unicode_safe_getattr(self, method, None) + method_name = "xpath_%s_simple_pseudo_element" % ( + pseudo_element.replace("-", "_") + ) + method = getattr(self, method_name, None) if not method: raise ExpressionError( "The pseudo-element ::%s is unknown" % pseudo_element @@ -578,18 +592,21 @@ def xpath_pseudo_element(self, xpath, pseudo_element): # functional pseudo-class: # elements that have a certain number of attributes - def xpath_nb_attr_function(self, xpath, function): + def xpath_nb_attr_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + assert function.arguments[0].value nb_attributes = int(function.arguments[0].value) return xpath.add_condition("count(@*)=%d" % nb_attributes) # pseudo-class: # elements that have 5 attributes - def xpath_five_attributes_pseudo(self, xpath): + def xpath_five_attributes_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(@*)=5") # functional pseudo-element: # element's attribute by name - def xpath_attr_functional_pseudo_element(self, xpath, arguments): + def xpath_attr_functional_pseudo_element( + self, xpath: XPathExpr, arguments: Sequence[Token] + ) -> XPathExpr: attribute_name = arguments[0].value other = XPathExpr( "@%s" % attribute_name, @@ -599,7 +616,7 @@ def xpath_attr_functional_pseudo_element(self, xpath, arguments): # pseudo-element: # element's text() nodes - def xpath_text_node_simple_pseudo_element(self, xpath): + def xpath_text_node_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: other = XPathExpr( "text()", "", @@ -608,7 +625,7 @@ def xpath_text_node_simple_pseudo_element(self, xpath): # pseudo-element: # element's href attribute - def xpath_attr_href_simple_pseudo_element(self, xpath): + def xpath_attr_href_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: other = XPathExpr( "@href", "", @@ -617,10 +634,10 @@ def xpath_attr_href_simple_pseudo_element(self, xpath): # pseudo-element: # used to demonstrate operator precedence - def xpath_first_or_second_pseudo(self, xpath): + def xpath_first_or_second_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("@id = 'first' or @id = 'second'") - def xpath(css): + def xpath(css: str) -> str: return str(CustomTranslator().css_to_xpath(css)) assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]" @@ -639,11 +656,11 @@ def xpath(css): assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ - def operator_id(selector): + def operator_id(selector: str) -> List[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = document.xpath(xpath) + items = typing.cast(List["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -651,10 +668,10 @@ def operator_id(selector): assert operator_id(":first-or-second[href]") == ["second"] assert operator_id("[href]:first-or-second") == ["second"] - def test_series(self): - def series(css): + def test_series(self) -> None: + def series(css: str) -> Optional[Tuple[int, int]]: (selector,) = parse(":nth-child(%s)" % css) - args = selector.parsed_tree.arguments + args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments try: return parse_series(args) except ValueError: @@ -679,14 +696,14 @@ def series(css): assert series("foo") is None assert series("n+") is None - def test_lang(self): + def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath - def langid(selector): + def langid(selector: str) -> List[str]: xpath = css_to_xpath(selector) - items = document.xpath(xpath) + items = typing.cast(List["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -700,46 +717,51 @@ def langid(selector): assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"] assert langid(":lang(es)") == [] - def test_argument_types(self): + def test_argument_types(self) -> None: class CustomTranslator(GenericTranslator): - def __init__(self): - self.argument_types = [] - - def xpath_pseudo_element(self, xpath, pseudo_element): - self.argument_types += pseudo_element.argument_types() + def __init__(self) -> None: + self.argument_types: List[str] = [] + + def xpath_pseudo_element( + self, xpath: XPathExpr, pseudo_element: PseudoElement + ) -> XPathExpr: + self.argument_types += typing.cast( + FunctionalPseudoElement, pseudo_element + ).argument_types() + return xpath - def argument_types(css): + def argument_types(css: str) -> List[str]: translator = CustomTranslator() translator.css_to_xpath(css) return translator.argument_types - mappings = ( + mappings: List[Tuple[str, List[str]]] = [ ("", []), ("ident", ["IDENT"]), ('"string"', ["STRING"]), ("1", ["NUMBER"]), - ) + ] for argument_string, argument_list in mappings: - css = "::pseudo_element({})".format(argument_string) + css = f"::pseudo_element({argument_string})" assert argument_types(css) == argument_list - def test_select(self): + def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.getiterator())).__getitem__ + sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath - def select_ids(selector, html_only): + def select_ids(selector: str, html_only: bool) -> List[str]: xpath = css_to_xpath(selector) - items = document.xpath(xpath) + items = typing.cast(List["etree._Element"], document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = document.xpath(xpath) + items = typing.cast(List["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] - def pcss(main, *selectors, **kwargs): + def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: @@ -948,16 +970,16 @@ def pcss(main, *selectors, **kwargs): "checkbox-disabled-checked", ] - def test_select_shakespeare(self): + def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = document.xpath("//body")[0] + body = typing.cast(List["etree._Element"], document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) - def count(selector): + def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = body.xpath(xpath) + results = typing.cast(List["etree._Element"], body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: diff --git a/tox.ini b/tox.ini index c62a09c..a400382 100644 --- a/tox.ini +++ b/tox.ini @@ -44,3 +44,11 @@ deps = sphinx_rtd_theme commands = sphinx-build -W -b html . {envtmpdir}/html + +[testenv:typing] +deps = + {[testenv]deps} + lxml-stubs==0.4.0 + mypy==0.982 +commands = + mypy --strict {posargs: cssselect tests} From 0d5e3809b146b5cf624604595f5b0ebec5dfb469 Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Tue, 25 Oct 2022 15:30:25 +0500 Subject: [PATCH 164/208] Allow empty strings in asserts (#130) --- cssselect/xpath.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 2d1ce37..f51cfb4 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -673,14 +673,14 @@ def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) return xpath def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: - assert value + assert value is not None xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_different( self, xpath: XPathExpr, name: str, value: Optional[str] ) -> XPathExpr: - assert value + assert value is not None # FIXME: this seems like a weird hack... if value: xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) @@ -703,7 +703,7 @@ def xpath_attrib_includes( def xpath_attrib_dashmatch( self, xpath: XPathExpr, name: str, value: Optional[str] ) -> XPathExpr: - assert value + assert value is not None # Weird, but true... xpath.add_condition( "%s and (%s = %s or starts-with(%s, %s))" From 4e80ef0d64fbc1509fbfb68bf15fdf2ae770dce6 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 25 Oct 2022 17:05:37 +0600 Subject: [PATCH 165/208] Reformat a long line. --- docs/index.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index c7f0c1a..52fd598 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -108,7 +108,10 @@ in the Level 3 specification: * ``:not()`` accepts a *sequence of simple selectors*, not just single *simple selector*. For example, ``:not(a.important[rel])`` is allowed, even though the negation contains 3 *simple selectors*. -* ``:scope`` allows to access immediate children of a selector: ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must be used at the start of a selector. Simplified version of `level 4 reference`_. +* ``:scope`` allows to access immediate children of a selector: + ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must + be used at the start of a selector. Simplified version of + `level 4 reference`_. .. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors .. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope From b7954f98bf1203bca7a70003d0d861c6f4cd771c Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 25 Oct 2022 17:14:09 +0600 Subject: [PATCH 166/208] Release notes for 1.2.0. --- CHANGES | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 12413df..e4d7238 100644 --- a/CHANGES +++ b/CHANGES @@ -1,13 +1,34 @@ Changelog ========= -Version 1.x.x +Version 1.2.0 ------------- Unreleased. +* Drop support for Python 2.7, 3.4-3.6, add support for Python 3.7-3.11. + * Add type annotations (PEP 484 and PEP 561). +* Many CI additions and improvements. + +* Include tests in the PyPI tarball. + +* Improve the test coverage. + +* More features from the CSS Selectors Level 4: + + * The ``:is()`` pseudo-class. + + * The ``:where()`` pseudo-class. + + * The ``:has()`` pseudo-class, with some limitations. + +* Fix parsing ``:scope`` after a comma. + +* Add parentheses to fix condition precedence in some cases. + + Version 1.1.0 ------------- From baf3f902edca3f18b346deb1d5a1cf64e265a4f7 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 26 Oct 2022 20:01:24 +0600 Subject: [PATCH 167/208] Describe Level 4 support. --- docs/index.rst | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 52fd598..a024f20 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -99,22 +99,29 @@ These applicable pseudo-classes are not yet implemented: you specify an element type, but not with ``*`` On the other hand, *cssselect* supports some selectors that are not -in the Level 3 specification: +in the Level 3 specification. + +These parts of the Level 4 specification are supported (note that a large part +of the Level 4 additions is not applicable to cssselect similarly to ``:hover`` +or not representable in XPath 1.0 so the complete specification is unlikely to +be implemented): + +* The ``:scope`` pseudo-class. Limitation: it can only be used at a start of a + selector. +* The ``:is()``, ``:where()`` and ``:has()`` pseudo-classes. Limitation: + ``:has()`` cannot contain nested ``:has()`` or ``:not()``. + +These are non-standard extensions: * The ``:contains(text)`` pseudo-class that existed in `an early draft`_ but was then removed. * The ``!=`` attribute operator. ``[foo!=bar]`` is the same as - ``:not([foo=bar])`` + ``:not([foo=bar])``. * ``:not()`` accepts a *sequence of simple selectors*, not just single *simple selector*. For example, ``:not(a.important[rel])`` is allowed, even though the negation contains 3 *simple selectors*. -* ``:scope`` allows to access immediate children of a selector: - ``product.css(':scope > div::text')``, simillar to XPath ``child::div``. Must - be used at the start of a selector. Simplified version of - `level 4 reference`_. .. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors -.. _level 4 reference: https://developer.mozilla.org/en-US/docs/Web/CSS/:scope .. The following claim was copied from lxml: From e26aa4d87ed6416eca2514838647549a2e4c759b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 26 Oct 2022 20:03:14 +0600 Subject: [PATCH 168/208] Replace "Unicode string" with just "string". --- cssselect/parser.py | 2 +- cssselect/xpath.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 3a5ec15..25a650c 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -519,7 +519,7 @@ def parse(css: str) -> List[Selector]: you can skip this and use :meth:`~GenericTranslator.css_to_xpath`. :param css: - A *group of selectors* as an Unicode string. + A *group of selectors* as a string. :raises: :class:`SelectorSyntaxError` on invalid selectors. :returns: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f51cfb4..61e0f7f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -195,7 +195,7 @@ def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: about "real" elements. :param css: - A *group of selectors* as an Unicode string. + A *group of selectors* as a string. :param prefix: This string is prepended to the XPath expression for each selector. The default makes selectors scoped to the context node’s subtree. @@ -204,7 +204,7 @@ def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: :class:`ExpressionError` on unknown/unsupported selectors, including pseudo-elements. :returns: - The equivalent XPath 1.0 expression as an Unicode string. + The equivalent XPath 1.0 expression as a string. """ return " | ".join( @@ -235,7 +235,7 @@ def selector_to_xpath( :raises: :class:`ExpressionError` on unknown/unsupported selectors. :returns: - The equivalent XPath 1.0 expression as an Unicode string. + The equivalent XPath 1.0 expression as a string. """ tree = getattr(selector, "parsed_tree", None) From d21b85d9b239dcbf64d8badc7b0f9f1051682800 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 26 Oct 2022 23:11:03 +0600 Subject: [PATCH 169/208] Fix installing py.typed. --- py.typed => cssselect/py.typed | 0 setup.py | 4 ++++ 2 files changed, 4 insertions(+) rename py.typed => cssselect/py.typed (100%) diff --git a/py.typed b/cssselect/py.typed similarity index 100% rename from py.typed rename to cssselect/py.typed diff --git a/setup.py b/setup.py index f34a1a7..48f3d40 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,10 @@ license="BSD", packages=["cssselect"], test_suite="cssselect.tests", + package_data={ + "cssselect": ["py.typed"], + }, + include_package_data=True, python_requires=">=3.7", classifiers=[ "Development Status :: 4 - Beta", From faa595c1948ddb2379f868894b530e61536cfac6 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 27 Oct 2022 15:00:43 +0600 Subject: [PATCH 170/208] Add a changelog entry about private API changes. --- CHANGES | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index e4d7238..1fb0220 100644 --- a/CHANGES +++ b/CHANGES @@ -10,12 +10,6 @@ Unreleased. * Add type annotations (PEP 484 and PEP 561). -* Many CI additions and improvements. - -* Include tests in the PyPI tarball. - -* Improve the test coverage. - * More features from the CSS Selectors Level 4: * The ``:is()`` pseudo-class. @@ -28,6 +22,21 @@ Unreleased. * Add parentheses to fix condition precedence in some cases. +* Private API changes related to the removal of the Python 2 support: + + * Remove ``_unicode`` and ``_unichr`` aliases from ``csselect.parser``. + + * Remove ``_basestring`` and ``_unicode`` aliases from ``csselect.xpath``. + + * Deprecate ``csselect.xpath._unicode_safe_getattr()`` and change it to just + call ``getattr()``. + +* Include tests in the PyPI tarball. + +* Many CI additions and improvements. + +* Improve the test coverage. + Version 1.1.0 ------------- From 2c7c1ea8e7be1309e6828bec36047db77af062b1 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 27 Oct 2022 15:03:12 +0600 Subject: [PATCH 171/208] Switch to the released 3.11. --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ca965bd..28a230f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11.0-rc.2"] + python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] steps: - uses: actions/checkout@v2 From 60c61469b8cdb5010ad0f81ff24a1329653dde69 Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Thu, 27 Oct 2022 16:13:55 +0500 Subject: [PATCH 172/208] Restore and deprecate _unicode_safe_getattr (#133) --- cssselect/xpath.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f51cfb4..2f546e6 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import re import typing +import warnings from typing import Optional from cssselect.parser import ( @@ -37,6 +38,17 @@ ) +@typing.no_type_check +def _unicode_safe_getattr(obj, name, default=None): + warnings.warn( + "_unicode_safe_getattr is deprecated and will be removed in the" + " next release, use getattr() instead", + DeprecationWarning, + stacklevel=2, + ) + return getattr(obj, name, default) + + class ExpressionError(SelectorError, RuntimeError): """Unknown or unsupported selector (eg. pseudo-class).""" From 97cc51789b75f65492ec9e3c208d802ed07974f7 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 27 Oct 2022 18:32:50 +0600 Subject: [PATCH 173/208] =?UTF-8?q?Bump=20version:=201.1.0=20=E2=86=92=201?= =?UTF-8?q?.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 3 +-- CHANGES | 2 +- cssselect/__init__.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 122d3d4..56cfabc 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,7 +1,6 @@ [bumpversion] -current_version = 1.1.0 +current_version = 1.2.0 commit = True tag = True [bumpversion:file:cssselect/__init__.py] - diff --git a/CHANGES b/CHANGES index 1fb0220..dc38826 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 1.2.0 ------------- -Unreleased. +Released on 2022-10-27. * Drop support for Python 2.7, 3.4-3.6, add support for Python 3.7-3.11. diff --git a/cssselect/__init__.py b/cssselect/__init__.py index f9e200d..77f028b 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -33,5 +33,5 @@ "SelectorSyntaxError", ) -VERSION = "1.1.0" +VERSION = "1.2.0" __version__ = VERSION From e4493e9a75ec4f74a0b408beb989f75f5b037eb9 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 27 Oct 2022 19:20:55 +0600 Subject: [PATCH 174/208] Fix the tag format in the publish action. --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7c0f8d0..977a42d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -17,7 +17,7 @@ jobs: - name: Check Tag id: check-release-tag run: | - if [[ ${{ github.event.ref }} =~ ^refs/tags/[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then + if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then echo ::set-output name=release_tag::true fi From 43ef5331ccaede4df13d4242c7aaf01c2940869f Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 6 Jul 2023 09:54:38 +0400 Subject: [PATCH 175/208] Drop Python 3.7 support (#141) --- .github/workflows/tests.yml | 2 +- docs/conf.py | 2 +- setup.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 28a230f..369fd68 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] + python-version: [3.8, 3.9, "3.10", "3.11"] steps: - uses: actions/checkout@v2 diff --git a/docs/conf.py b/docs/conf.py index d63672f..5524479 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -247,7 +247,7 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} # --- Nitpicking options ------------------------------------------------------ diff --git a/setup.py b/setup.py index 48f3d40..88dc31a 100644 --- a/setup.py +++ b/setup.py @@ -31,13 +31,12 @@ "cssselect": ["py.typed"], }, include_package_data=True, - python_requires=">=3.7", + python_requires=">=3.8", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From 8e7ad5083300e487c1fa43f4c4291ff3368d835c Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 6 Oct 2023 13:10:10 +0400 Subject: [PATCH 176/208] Add Python 3.12 to supported versions. (#142) * Add Python 3.12 to supported versions. * Add setuptools to tox.ini deps. * Actually run tests on 3.12. --- .flake8 | 3 ++- .github/workflows/checks.yml | 16 ++++++++-------- .github/workflows/publish.yml | 8 ++++---- .github/workflows/tests.yml | 6 +++--- setup.py | 1 + tox.ini | 5 +++-- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/.flake8 b/.flake8 index 4315a12..8b0608f 100644 --- a/.flake8 +++ b/.flake8 @@ -2,7 +2,8 @@ max-line-length = 99 ignore = W503 - E266 # too many leading '#' for block comment + # too many leading '#' for block comment + E266 exclude = .git .tox diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 85b51ce..847d788 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -7,30 +7,30 @@ jobs: strategy: matrix: include: - - python-version: 3 + - python-version: 3.12 env: TOXENV: black - - python-version: 3 + - python-version: 3.12 env: TOXENV: flake8 - - python-version: 3 + - python-version: 3.12 env: TOXENV: pylint - - python-version: 3 + - python-version: 3.12 env: TOXENV: security - - python-version: 3 + - python-version: 3.12 env: TOXENV: docs - - python-version: 3 + - python-version: 3.12 env: TOXENV: typing steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 977a42d..67d9c5a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -7,12 +7,12 @@ jobs: if: startsWith(github.event.ref, 'refs/tags/') steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 + - name: Set up Python 3.12 + uses: actions/setup-python@v4 with: - python-version: 3 + python-version: 3.12 - name: Check Tag id: check-release-tag diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 369fd68..0de2aa2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,13 +6,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/setup.py b/setup.py index 88dc31a..f7b51eb 100644 --- a/setup.py +++ b/setup.py @@ -41,5 +41,6 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) diff --git a/tox.ini b/tox.ini index a400382..c618dfb 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ deps = lxml>=4.4 pytest-cov>=2.8 pytest>=5.4 + setuptools sybil commands = pytest --cov=cssselect \ @@ -20,14 +21,14 @@ commands = [testenv:flake8] deps = - flake8==5.0.4 + flake8==6.1.0 commands = flake8 {posargs: cssselect setup.py tests docs/conf.py} [testenv:pylint] deps = {[testenv]deps} - pylint==2.15.3 + pylint==3.0.0 commands = pylint {posargs: cssselect setup.py tests docs} From ec4d1ea6a1c71a2c21d83ce77f9ac08eccd73f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 19 Dec 2023 12:54:06 +0100 Subject: [PATCH 177/208] Add .readthedocs.yml (#143) --- .readthedocs.yml | 15 +++++++++++++++ docs/conf.py | 2 +- docs/requirements.txt | 2 ++ tox.ini | 3 +-- 4 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 .readthedocs.yml create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..7d13c50 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,15 @@ +version: 2 +formats: all +sphinx: + configuration: docs/conf.py + fail_on_warning: true +build: + os: ubuntu-22.04 + tools: + # For available versions, see: + # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python + python: "3.12" # Keep in sync with .github/workflows/checks.yml +python: + install: + - requirements: docs/requirements.txt + - path: . diff --git a/docs/conf.py b/docs/conf.py index 5524479..811de25 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -95,7 +95,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'classic' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..d5476d8 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx==7.2.6 +sphinx-rtd-theme==2.0.0 diff --git a/tox.ini b/tox.ini index c618dfb..24dec48 100644 --- a/tox.ini +++ b/tox.ini @@ -41,8 +41,7 @@ commands = [testenv:docs] changedir = docs deps = - sphinx - sphinx_rtd_theme + -r docs/requirements.txt commands = sphinx-build -W -b html . {envtmpdir}/html From e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb Mon Sep 17 00:00:00 2001 From: Emmanuel Rondan Date: Thu, 11 Apr 2024 12:58:57 -0300 Subject: [PATCH 178/208] applying pre-commit hooks and adding config files --- .bandit.yml | 4 + .flake8 | 1 + .git-blame-ignore-revs | 1 + .github/workflows/checks.yml | 12 +-- .isort.cfg | 2 + .pre-commit-config.yaml | 18 ++++ cssselect/__init__.py | 6 +- cssselect/parser.py | 93 ++++++++++++----- cssselect/xpath.py | 139 ++++++++++++++++++------- docs/conf.py | 147 +++++++++++++------------- docs/conftest.py | 5 +- pyproject.toml | 11 +- setup.py | 3 +- tests/test_cssselect.py | 196 ++++++++++++++++++++++++++--------- tox.ini | 25 ++--- 15 files changed, 443 insertions(+), 220 deletions(-) create mode 100644 .git-blame-ignore-revs create mode 100644 .isort.cfg create mode 100644 .pre-commit-config.yaml diff --git a/.bandit.yml b/.bandit.yml index 7fcde04..4f60a02 100644 --- a/.bandit.yml +++ b/.bandit.yml @@ -1,2 +1,6 @@ skips: - B101 +- B311 +- B320 +- B410 +exclude_dirs: ['tests'] diff --git a/.flake8 b/.flake8 index 8b0608f..2417f2e 100644 --- a/.flake8 +++ b/.flake8 @@ -4,6 +4,7 @@ ignore = W503 # too many leading '#' for block comment E266 + E704 exclude = .git .tox diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..e746ff9 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +# applying pre-commit hooks to the project \ No newline at end of file diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 847d788..1e9a243 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -7,12 +7,6 @@ jobs: strategy: matrix: include: - - python-version: 3.12 - env: - TOXENV: black - - python-version: 3.12 - env: - TOXENV: flake8 - python-version: 3.12 env: TOXENV: pylint @@ -40,3 +34,9 @@ jobs: pip install -U pip pip install -U tox tox + + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: pre-commit/action@v3.0.0 diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..6860bdb --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,2 @@ +[settings] +profile = black \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a27d3db --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: +- repo: https://github.com/PyCQA/bandit + rev: 1.7.8 + hooks: + - id: bandit + args: [-r, -c, .bandit.yml] +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 +- repo: https://github.com/psf/black.git + rev: 24.3.0 + hooks: + - id: black +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort \ No newline at end of file diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 77f028b..a59995c 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -14,13 +14,13 @@ """ from cssselect.parser import ( - parse, - Selector, FunctionalPseudoElement, + Selector, SelectorError, SelectorSyntaxError, + parse, ) -from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError +from cssselect.xpath import ExpressionError, GenericTranslator, HTMLTranslator __all__ = ( "ExpressionError", diff --git a/cssselect/parser.py b/cssselect/parser.py index 25a650c..354713d 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -12,9 +12,9 @@ """ -import sys -import re import operator +import re +import sys import typing from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union @@ -67,9 +67,13 @@ class Selector: """ - def __init__(self, tree: Tree, pseudo_element: Optional[PseudoElement] = None) -> None: + def __init__( + self, tree: Tree, pseudo_element: Optional[PseudoElement] = None + ) -> None: self.parsed_tree = tree - if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): + if pseudo_element is not None and not isinstance( + pseudo_element, FunctionalPseudoElement + ): pseudo_element = ascii_lower(pseudo_element) #: A :class:`FunctionalPseudoElement`, #: or the identifier for the pseudo-element as a string, @@ -247,7 +251,11 @@ def __init__(self, selector: Tree, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:not(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) def canonical(self) -> str: subsel = self.subselector.canonical() @@ -317,7 +325,10 @@ def canonical(self) -> str: for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) + return "%s:is(%s)" % ( + self.selector.canonical(), + ", ".join(map(str, selector_arguments)), + ) def specificity(self) -> Tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -367,14 +378,17 @@ def __init__( attrib: str, operator: 'typing.Literal["exists"]', value: None, - ) -> None: - ... + ) -> None: ... @typing.overload def __init__( - self, selector: Tree, namespace: Optional[str], attrib: str, operator: str, value: "Token" - ) -> None: - ... + self, + selector: Tree, + namespace: Optional[str], + attrib: str, + operator: str, + value: "Token", + ) -> None: ... def __init__( self, @@ -415,7 +429,11 @@ def canonical(self) -> str: if self.operator == "exists": op = attrib else: - op = "%s%s%s" % (attrib, self.operator, typing.cast("Token", self.value).css()) + op = "%s%s%s" % ( + attrib, + self.operator, + typing.cast("Token", self.value).css(), + ) return "%s[%s]" % (self.selector.canonical(), op) @@ -433,7 +451,9 @@ class Element: """ - def __init__(self, namespace: Optional[str] = None, element: Optional[str] = None) -> None: + def __init__( + self, namespace: Optional[str] = None, element: Optional[str] = None + ) -> None: self.namespace = namespace self.element = element @@ -486,7 +506,12 @@ def __repr__(self) -> str: comb = "" else: comb = self.combinator - return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) + return "%s[%r %s %r]" % ( + self.__class__.__name__, + self.selector, + comb, + self.subselector, + ) def canonical(self) -> str: subsel = self.subselector.canonical() @@ -509,7 +534,9 @@ def specificity(self) -> Tuple[int, int, int]: _id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") # foo.bar or .bar -_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") +_class_re = re.compile( + r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" +) def parse(css: str) -> List[Selector]: @@ -536,7 +563,9 @@ def parse(css: str) -> List[Selector]: return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] match = _class_re.match(css) if match is not None: - return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))] + return [ + Selector(Class(Element(element=match.group(1) or None), match.group(2))) + ] stream = TokenStream(tokenize(css)) stream.source = css @@ -708,7 +737,10 @@ def parse_arguments(stream: "TokenStream") -> List["Token"]: while 1: stream.skip_whitespace() next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + ("DELIM", "+"), + ("DELIM", "-"), + ]: arguments.append(next) elif next == ("DELIM", ")"): return arguments @@ -729,7 +761,10 @@ def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: combinator = Token("DELIM", " ", pos=0) while 1: - if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + ("DELIM", "."), + ("DELIM", "*"), + ]: subselector += typing.cast(str, next.value) elif next == ("DELIM", ")"): result = parse(subselector) @@ -787,7 +822,9 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) elif next == ("DELIM", "="): op = "=" - elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): + elif next.is_delim("^", "$", "*", "~", "|", "!") and ( + stream.peek() == ("DELIM", "=") + ): op = typing.cast(str, next.value) + "=" stream.next() else: @@ -850,12 +887,12 @@ def __new__( type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', value: str, pos: int, - ) -> "Token": - ... + ) -> "Token": ... @typing.overload - def __new__(cls, type_: 'typing.Literal["EOF"]', value: None, pos: int) -> "Token": - ... + def __new__( + cls, type_: 'typing.Literal["EOF"]', value: None, pos: int + ) -> "Token": ... def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": obj = tuple.__new__(cls, (type_, value)) @@ -910,8 +947,7 @@ class TokenMacros: class MatchFunc(typing.Protocol): def __call__( self, string: str, pos: int = ..., endpos: int = ... - ) -> Optional["re.Match[str]"]: - ... + ) -> Optional["re.Match[str]"]: ... def _compile(pattern: str) -> "MatchFunc": @@ -970,7 +1006,8 @@ def tokenize(s: str) -> Iterator[Token]: match = _match_hash(s, pos=pos) if match: value = _sub_simple_escape( - _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:]) + _replace_simple, + _sub_unicode_escape(_replace_unicode, match.group()[1:]), ) yield Token("HASH", value, pos) pos = match.end() @@ -987,7 +1024,9 @@ def tokenize(s: str) -> Iterator[Token]: raise SelectorSyntaxError("Invalid string at %s" % pos) value = _sub_simple_escape( _replace_simple, - _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())), + _sub_unicode_escape( + _replace_unicode, _sub_newline_escape("", match.group()) + ), ) yield Token("STRING", value, pos) pos = end_pos + 1 diff --git a/cssselect/xpath.py b/cssselect/xpath.py index fd28c47..4255f66 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -18,23 +18,23 @@ from typing import Optional from cssselect.parser import ( - parse, - parse_series, - PseudoElement, - Selector, - SelectorError, - Tree, - Element, - Hash, + Attrib, Class, + CombinedSelector, + Element, Function, - Pseudo, - Attrib, + Hash, + Matching, Negation, + Pseudo, + PseudoElement, Relation, - Matching, + Selector, + SelectorError, SpecificityAdjustment, - CombinedSelector, + Tree, + parse, + parse_series, ) @@ -58,7 +58,11 @@ class ExpressionError(SelectorError, RuntimeError): class XPathExpr: def __init__( - self, path: str = "", element: str = "*", condition: str = "", star_prefix: bool = False + self, + path: str = "", + element: str = "*", + condition: str = "", + star_prefix: bool = False, ) -> None: self.path = path self.element = element @@ -84,7 +88,9 @@ def add_name_test(self) -> None: if self.element == "*": # We weren't doing a test anyway return - self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element)) + self.add_condition( + "name() = %s" % GenericTranslator.xpath_literal(self.element) + ) self.element = "*" def add_star_prefix(self) -> None: @@ -107,7 +113,9 @@ def join( path += other.path self.path = path if not has_inner_condition: - self.element = other.element + closing_combiner if closing_combiner else other.element + self.element = ( + other.element + closing_combiner if closing_combiner else other.element + ) self.condition = other.condition else: self.element = other.element @@ -259,7 +267,9 @@ def selector_to_xpath( xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) return (prefix or "") + str(xpath) - def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) -> XPathExpr: + def xpath_pseudo_element( + self, xpath: XPathExpr, pseudo_element: PseudoElement + ) -> XPathExpr: """Translate a pseudo-element. Defaults to not supporting pseudo-elements at all, @@ -300,7 +310,8 @@ def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: combinator = self.combinator_mapping[combined.combinator] method = getattr(self, "xpath_%s_combinator" % combinator) return typing.cast( - XPathExpr, method(self.xpath(combined.selector), self.xpath(combined.subselector)) + XPathExpr, + method(self.xpath(combined.selector), self.xpath(combined.subselector)), ) def xpath_negation(self, negation: Negation) -> XPathExpr: @@ -381,7 +392,9 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: value = typing.cast(str, selector.value.value).lower() else: value = selector.value.value - return typing.cast(XPathExpr, method(self.xpath(selector.selector), attrib, value)) + return typing.cast( + XPathExpr, method(self.xpath(selector.selector), attrib, value) + ) def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" @@ -416,7 +429,9 @@ def xpath_element(self, selector: Element) -> XPathExpr: # CombinedSelector: dispatch by combinator - def xpath_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_descendant_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a child, grand-child or further descendant of left""" return left.join("/descendant-or-self::*/", right) @@ -424,21 +439,31 @@ def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr """right is an immediate child of left""" return left.join("/", right) - def xpath_direct_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_direct_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling immediately after left""" xpath = left.join("/following-sibling::", right) xpath.add_name_test() return xpath.add_condition("position() = 1") - def xpath_indirect_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_indirect_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) - def xpath_relation_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_relation_descendant_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a child, grand-child or further descendant of left; select left""" - return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) + return left.join( + "[descendant::", right, closing_combiner="]", has_inner_condition=True + ) - def xpath_relation_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_relation_child_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is an immediate child of left; select left""" return left.join("[./", right, closing_combiner="]") @@ -447,7 +472,9 @@ def xpath_relation_direct_adjacent_combinator( ) -> XPathExpr: """right is a sibling immediately after left; select left""" xpath = left.add_condition( - "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + "following-sibling::*[(name() = '{}') and (position() = 1)]".format( + right.element + ) ) return xpath @@ -460,7 +487,11 @@ def xpath_relation_indirect_adjacent_combinator( # Function: dispatch by function/pseudo-class name def xpath_nth_child_function( - self, xpath: XPathExpr, function: Function, last: bool = False, add_name_test: bool = True + self, + xpath: XPathExpr, + function: Function, + last: bool = False, + add_name_test: bool = True, ) -> XPathExpr: try: a, b = parse_series(function.arguments) @@ -589,28 +620,41 @@ def xpath_nth_child_function( template = "(%s)" else: template = "%s" - xpath.add_condition(" and ".join(template % expression for expression in expressions)) + xpath.add_condition( + " and ".join(template % expression for expression in expressions) + ) return xpath - def xpath_nth_last_child_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_last_child_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: return self.xpath_nth_child_function(xpath, function, last=True) - def xpath_nth_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_of_type_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") return self.xpath_nth_child_function(xpath, function, add_name_test=False) - def xpath_nth_last_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_last_of_type_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False) + return self.xpath_nth_child_function( + xpath, function, last=True, add_name_test=False + ) - def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_contains_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: # Defined there, removed in later drafts: # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" % function.arguments + "Expected a single string or ident for :contains(), got %r" + % function.arguments ) value = typing.cast(str, function.arguments[0].value) return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) @@ -618,7 +662,8 @@ def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPath def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" % function.arguments + "Expected a single string or ident for :lang(), got %r" + % function.arguments ) value = typing.cast(str, function.arguments[0].value) return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) @@ -679,12 +724,16 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: # Attrib: dispatch by attribute operator - def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: + def xpath_attrib_exists( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: assert not value xpath.add_condition(name) return xpath - def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: + def xpath_attrib_equals( + self, xpath: XPathExpr, name: str, value: Optional[str] + ) -> XPathExpr: assert value is not None xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath @@ -695,7 +744,9 @@ def xpath_attrib_different( assert value is not None # FIXME: this seems like a weird hack... if value: - xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) + xpath.add_condition( + "not(%s) or %s != %s" % (name, name, self.xpath_literal(value)) + ) else: xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) return xpath @@ -719,7 +770,13 @@ def xpath_attrib_dashmatch( # Weird, but true... xpath.add_condition( "%s and (%s = %s or starts-with(%s, %s))" - % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-")) + % ( + name, + name, + self.xpath_literal(value), + name, + self.xpath_literal(value + "-"), + ) ) return xpath @@ -798,7 +855,8 @@ def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" % function.arguments + "Expected a single string or ident for :lang(), got %r" + % function.arguments ) value = function.arguments[0].value assert value @@ -807,7 +865,8 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr # XPath 1.0 has no lower-case function... "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + "'-'), %s)]" + % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) ) def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore diff --git a/docs/conf.py b/docs/conf.py index 811de25..aa5ae22 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,83 +12,86 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os, re +import os +import re +import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', - 'sphinx.ext.doctest'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.doctest"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'cssselect' -copyright = '2012-2017, Simon Sapin, Scrapy developers' +project = "cssselect" +copyright = "2012-2017, Simon Sapin, Scrapy developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. -with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file: +with open( + os.path.join(os.path.dirname(__file__), "..", "cssselect", "__init__.py") +) as init_file: init_py = init_file.read() release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. -version = release.rstrip('dev') +version = release.rstrip("dev") # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- @@ -100,129 +103,123 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'cssselectdoc' +htmlhelp_basename = "cssselectdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'cssselect.tex', 'cssselect Documentation', - 'Simon Sapin', 'manual'), + ("index", "cssselect.tex", "cssselect Documentation", "Simon Sapin", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'cssselect', 'cssselect Documentation', - ['Simon Sapin'], 1) -] +man_pages = [("index", "cssselect", "cssselect Documentation", ["Simon Sapin"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -231,23 +228,29 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'cssselect', 'cssselect Documentation', - 'Simon Sapin', 'cssselect', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "cssselect", + "cssselect Documentation", + "Simon Sapin", + "cssselect", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # --- Nitpicking options ------------------------------------------------------ @@ -255,5 +258,5 @@ nitpicky = True nitpick_ignore = [ # explicitly not a part of the public API - ('py:class', 'cssselect.parser.Token'), + ("py:class", "cssselect.parser.Token"), ] diff --git a/docs/conftest.py b/docs/conftest.py index 9d16bb7..a71d108 100644 --- a/docs/conftest.py +++ b/docs/conftest.py @@ -3,6 +3,7 @@ from sybil import Sybil from sybil.parsers.doctest import DocTestParser from sybil.parsers.skip import skip + try: # sybil 3.0.0+ from sybil.parsers.codeblock import PythonCodeBlockParser @@ -13,8 +14,8 @@ pytest_collect_file = Sybil( parsers=[ DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE), - PythonCodeBlockParser(future_imports=['print_function']), + PythonCodeBlockParser(future_imports=["print_function"]), skip, ], - pattern='*.rst', + pattern="*.rst", ).pytest() diff --git a/pyproject.toml b/pyproject.toml index 57a5583..261fe3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,11 @@ +[tool.isort] +profile = "black" +multi_line_output = 3 + +[tool.mypy] +check_untyped_defs = true +ignore_missing_imports = true +no_warn_no_return = true + [tool.black] -line-length = 99 +target-version = ["py38", "py39", "py310", "py311", "py312"] \ No newline at end of file diff --git a/setup.py b/setup.py index f7b51eb..43eecc0 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- -import re import os.path +import re from setuptools import setup - ROOT = os.path.dirname(__file__) with open(os.path.join(ROOT, "README.rst")) as readme_file: README = readme_file.read() diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2c9e94c..32c1683 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -23,27 +23,31 @@ from typing import List, Optional, Sequence, Tuple from lxml import etree, html + from cssselect import ( - parse, + ExpressionError, GenericTranslator, HTMLTranslator, SelectorSyntaxError, - ExpressionError, + parse, ) from cssselect.parser import ( - tokenize, - parse_series, - PseudoElement, - FunctionalPseudoElement, Function, + FunctionalPseudoElement, + PseudoElement, Token, + parse_series, + tokenize, ) from cssselect.xpath import XPathExpr class TestCssselect(unittest.TestCase): def test_tokenizer(self) -> None: - tokens = [str(item) for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')] + tokens = [ + str(item) + for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)') + ] assert tokens == [ "", "", @@ -70,7 +74,10 @@ def repr_parse(css: str) -> List[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] + return [ + repr(selector.parsed_tree).replace("(u'", "('") + for selector in selectors + ] def parse_many(first: str, *others: str) -> List[str]: result = repr_parse(first) @@ -95,7 +102,9 @@ def parse_many(first: str, *others: str) -> List[str]: "div\r>\n\n\n.foo", "div\f>\f.foo", ) == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] - assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [ + assert parse_many( + "td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar" + ) == [ "Class[Element[td].foo]", "Class[Element[*].bar]", ] @@ -123,11 +132,15 @@ def parse_many(first: str, *others: str) -> List[str]: assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ "Attrib[Element[a][hreflang |= 'en']]" ] - assert parse_many("div:nth-child(10)") == ["Function[Element[div]:nth-child(['10'])]"] + assert parse_many("div:nth-child(10)") == [ + "Function[Element[div]:nth-child(['10'])]" + ] assert parse_many(":nth-child(2n+2)") == [ "Function[Element[*]:nth-child(['2', 'n', '+2'])]" ] - assert parse_many("div:nth-of-type(10)") == ["Function[Element[div]:nth-of-type(['10'])]"] + assert parse_many("div:nth-of-type(10)") == [ + "Function[Element[div]:nth-of-type(['10'])]" + ] assert parse_many("div div:nth-of-type(10) .aclass") == [ "CombinedSelector[CombinedSelector[Element[div] " "Function[Element[div]:nth-of-type(['10'])]] " @@ -135,7 +148,9 @@ def parse_many(first: str, *others: str) -> List[str]: ] assert parse_many("label:only") == ["Pseudo[Element[label]:only]"] assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] - assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] + assert parse_many('div:contains("foo")') == [ + "Function[Element[div]:contains(['foo'])]" + ] assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" @@ -253,7 +268,10 @@ def test_pseudo_repr(css: str) -> str: assert selector.pseudo_element == "foo" assert tr.selector_to_xpath(selector, prefix="") == "e" self.assertRaises( - ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True + ExpressionError, + tr.selector_to_xpath, + selector, + translate_pseudo_elements=True, ) # Special test for the unicode symbols and ':scope' element if check @@ -301,7 +319,11 @@ def specificity(css: str) -> Tuple[int, int, int]: assert specificity("foo::before") == (0, 0, 2) assert specificity("foo:empty::before") == (0, 1, 2) - assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3) + assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == ( + 2, + 1, + 3, + ) def test_css_export(self) -> None: def css2css(css: str, res: Optional[str] = None) -> None: @@ -354,7 +376,9 @@ def get_error(css: str) -> Optional[str]: assert get_error("attributes(href)/html/body/a") == ( "Expected selector, got " ) - assert get_error("attributes(href)") == ("Expected selector, got ") + assert get_error("attributes(href)") == ( + "Expected selector, got " + ) assert get_error("html/body/a") == ("Expected selector, got ") assert get_error(" ") == ("Expected selector, got ") assert get_error("div, ") == ("Expected selector, got ") @@ -369,10 +393,14 @@ def get_error(css: str) -> Optional[str]: assert get_error("[*]") == ("Expected '|', got ") assert get_error("[foo|]") == ("Expected ident, got ") assert get_error("[#]") == ("Expected ident or '*', got ") - assert get_error("[foo=#]") == ("Expected string or ident, got ") + assert get_error("[foo=#]") == ( + "Expected string or ident, got " + ) assert get_error("[href]a") == ("Expected selector, got ") assert get_error("[rel=stylesheet]") is None - assert get_error("[rel:stylesheet]") == ("Operator expected, got ") + assert get_error("[rel:stylesheet]") == ( + "Operator expected, got " + ) assert get_error("[rel=stylesheet") == ("Expected ']', got ") assert get_error(":lang(fr)") is None assert get_error(":lang(fr") == ("Expected an argument, got ") @@ -386,12 +414,20 @@ def get_error(css: str) -> Optional[str]: assert get_error("li:before a") == ( "Got pseudo-element ::before not at the end of a selector" ) - assert get_error(":not(:before)") == ("Got pseudo-element ::before inside :not() at 12") + assert get_error(":not(:before)") == ( + "Got pseudo-element ::before inside :not() at 12" + ) assert get_error(":not(:not(a))") == ("Got nested :not()") - assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function") + assert get_error(":is(:before)") == ( + "Got pseudo-element ::before inside function" + ) assert get_error(":is(a b)") == ("Expected an argument, got ") - assert get_error(":where(:before)") == ("Got pseudo-element ::before inside function") - assert get_error(":where(a b)") == ("Expected an argument, got ") + assert get_error(":where(:before)") == ( + "Got pseudo-element ::before inside function" + ) + assert get_error(":where(a b)") == ( + "Expected an argument, got " + ) assert get_error(":scope > div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) @@ -446,19 +482,29 @@ def xpath(css: str) -> str: "e[(count(preceding-sibling::*) >= 1) and " "((count(preceding-sibling::*) +2) mod 3 = 0)]" ) - assert xpath("e:nth-child(3n-2)") == ("e[count(preceding-sibling::*) mod 3 = 0]") + assert xpath("e:nth-child(3n-2)") == ( + "e[count(preceding-sibling::*) mod 3 = 0]" + ) assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]") assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]") - assert xpath("e:nth-last-child(2n)") == ("e[(count(following-sibling::*) +1) mod 2 = 0]") - assert xpath("e:nth-last-child(2n+1)") == ("e[count(following-sibling::*) mod 2 = 0]") + assert xpath("e:nth-last-child(2n)") == ( + "e[(count(following-sibling::*) +1) mod 2 = 0]" + ) + assert xpath("e:nth-last-child(2n+1)") == ( + "e[count(following-sibling::*) mod 2 = 0]" + ) assert xpath("e:nth-last-child(2n+2)") == ( "e[(count(following-sibling::*) >= 1) and " "((count(following-sibling::*) +1) mod 2 = 0)]" ) - assert xpath("e:nth-last-child(3n+1)") == ("e[count(following-sibling::*) mod 3 = 0]") + assert xpath("e:nth-last-child(3n+1)") == ( + "e[count(following-sibling::*) mod 3 = 0]" + ) # represents the two last e elements - assert xpath("e:nth-last-child(-n+2)") == ("e[count(following-sibling::*) <= 1]") + assert xpath("e:nth-last-child(-n+2)") == ( + "e[count(following-sibling::*) <= 1]" + ) assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]") assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]") @@ -486,24 +532,32 @@ def xpath(css: str) -> str: assert xpath("e:has(f)") == "e[descendant::f]" assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( - xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + xpath("e:has(+ f)") + == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") assert xpath("e.warning") == ( - "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]" + "e[@class and contains(" + "concat(' ', normalize-space(@class), ' '), ' warning ')]" ) assert xpath("e#myid") == ("e[@id = 'myid']") - assert xpath("e:not(:nth-child(odd))") == ("e[not(count(preceding-sibling::*) mod 2 = 0)]") + assert xpath("e:not(:nth-child(odd))") == ( + "e[not(count(preceding-sibling::*) mod 2 = 0)]" + ) assert xpath("e:nOT(*)") == ("e[0]") # never matches assert xpath("e f") == ("e/descendant-or-self::*/f") assert xpath("e > f") == ("e/f") - assert xpath("e + f") == ("e/following-sibling::*[(name() = 'f') and (position() = 1)]") + assert xpath("e + f") == ( + "e/following-sibling::*[(name() = 'f') and (position() = 1)]" + ) assert xpath("e ~ f") == ("e/following-sibling::f") assert xpath("e ~ f:nth-child(3)") == ( "e/following-sibling::f[count(preceding-sibling::*) = 2]" ) - assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") + assert xpath("div#container p") == ( + "div[@id = 'container']/descendant-or-self::*/p" + ) assert xpath("e:where(foo)") == "e[name() = 'foo']" assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]" @@ -539,10 +593,18 @@ def test_unicode(self) -> None: def test_quoting(self) -> None: css_to_xpath = GenericTranslator().css_to_xpath - assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""") - assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""") - assert css_to_xpath("*[aval='\"']") == ("""descendant-or-self::*[@aval = '"']""") - assert css_to_xpath('*[aval=\'"""\']') == ('''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath('*[aval="\'"]') == ( + """descendant-or-self::*[@aval = "'"]""" + ) + assert css_to_xpath("*[aval=\"'''\"]") == ( + """descendant-or-self::*[@aval = "'''"]""" + ) + assert css_to_xpath("*[aval='\"']") == ( + """descendant-or-self::*[@aval = '"']""" + ) + assert css_to_xpath('*[aval=\'"""\']') == ( + '''descendant-or-self::*[@aval = '"""']''' + ) assert css_to_xpath(':scope > div[dataimg=""]') == ( "descendant-or-self::*[1]/div[@dataimg = '']" ) @@ -575,7 +637,8 @@ def xpath_pseudo_element( method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" % pseudo_element.name + "The functional pseudo-element ::%s() is unknown" + % pseudo_element.name ) xpath = method(xpath, pseudo_element.arguments) else: @@ -592,7 +655,9 @@ def xpath_pseudo_element( # functional pseudo-class: # elements that have a certain number of attributes - def xpath_nb_attr_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nb_attr_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: assert function.arguments[0].value nb_attributes = int(function.arguments[0].value) return xpath.add_condition("count(@*)=%d" % nb_attributes) @@ -616,7 +681,9 @@ def xpath_attr_functional_pseudo_element( # pseudo-element: # element's text() nodes - def xpath_text_node_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: + def xpath_text_node_simple_pseudo_element( + self, xpath: XPathExpr + ) -> XPathExpr: other = XPathExpr( "text()", "", @@ -625,7 +692,9 @@ def xpath_text_node_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: # pseudo-element: # element's href attribute - def xpath_attr_href_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: + def xpath_attr_href_simple_pseudo_element( + self, xpath: XPathExpr + ) -> XPathExpr: other = XPathExpr( "@href", "", @@ -656,7 +725,9 @@ def xpath(css: str) -> str: assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = dict( + (el, count) for count, el in enumerate(document.iter()) + ).__getitem__ def operator_id(selector: str) -> List[str]: xpath = CustomTranslator().css_to_xpath(selector) @@ -698,7 +769,9 @@ def series(css: str) -> Optional[Tuple[int, int]]: def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = dict( + (el, count) for count, el in enumerate(document.iter()) + ).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath def langid(selector: str) -> List[str]: @@ -714,7 +787,13 @@ def langid(selector: str) -> List[str]: assert langid(":lang(ru)") == ["sixth"] assert langid(":lang('ZH')") == ["eighth"] assert langid(":lang(de) :lang(zh)") == ["eighth"] - assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"] + assert langid(":lang(en), :lang(zh)") == [ + "first", + "second", + "third", + "fourth", + "eighth", + ] assert langid(":lang(es)") == [] def test_argument_types(self) -> None: @@ -747,7 +826,9 @@ def argument_types(css: str) -> List[str]: def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = dict( + (el, count) for count, el in enumerate(document.iter()) + ).__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath @@ -769,7 +850,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: return result all_ids = pcss("*") - assert all_ids[:6] == ["html", "nil", "link-href", "link-nohref", "nil", "outer-div"] + assert all_ids[:6] == [ + "html", + "nil", + "link-href", + "link-nohref", + "nil", + "outer-div", + ] assert all_ids[-1:] == ["foobar-span"] assert pcss("div") == ["outer-div", "li-div", "foobar-div"] assert pcss("DIV", html_only=True) == [ @@ -780,7 +868,9 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss("div div") == ["li-div"] assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"] assert pcss("a[name]") == ["name-anchor"] - assert pcss("a[NAme]", html_only=True) == ["name-anchor"] # case-insensitive in HTML: + assert pcss("a[NAme]", html_only=True) == [ + "name-anchor" + ] # case-insensitive in HTML: assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"] assert pcss('a[rel="tag"]') == ["tag-anchor"] assert pcss('a[href*="localhost"]') == ["tag-anchor"] @@ -798,7 +888,10 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] assert pcss('*[lang|="e"]') == [] # ... :lang() is not. - assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == ["second-li", "li-div"] + assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == [ + "second-li", + "li-div", + ] assert pcss(':lang("e")', html_only=True) == [] assert pcss(":scope > div") == [] assert pcss(":scope body") == ["nil"] @@ -852,7 +945,11 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: "seventh-li", ] assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"] - assert pcss("li:nth-last-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("li:nth-last-child(3n+1)") == [ + "first-li", + "fourth-li", + "seventh-li", + ] assert pcss("ol:first-of-type") == ["first-ol"] assert pcss("ol:nth-child(1)") == [] assert pcss("ol:nth-of-type(2)") == ["second-ol"] @@ -901,7 +998,10 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss('*:contains("E")') == [] # case-sensitive assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"] assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"] - assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == ["third-li", "fourth-li"] + assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == [ + "third-li", + "fourth-li", + ] assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"] assert pcss("li div", "li > div", "div div") == ["li-div"] assert pcss("div > div") == [] diff --git a/tox.ini b/tox.ini index 24dec48..6831d3f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = black,flake8,pylint,security,py,docs +envlist = pre-commit,pylint,py,docs,typing [testenv] deps = @@ -13,18 +13,6 @@ commands = --cov-report=term-missing --cov-report=html --cov-report=xml \ --verbose {posargs: cssselect tests docs} -[testenv:black] -deps = - black==22.10.0 -commands = - black --check {posargs: cssselect setup.py tests} - -[testenv:flake8] -deps = - flake8==6.1.0 -commands = - flake8 {posargs: cssselect setup.py tests docs/conf.py} - [testenv:pylint] deps = {[testenv]deps} @@ -32,12 +20,6 @@ deps = commands = pylint {posargs: cssselect setup.py tests docs} -[testenv:security] -deps = - bandit -commands = - bandit -r -c .bandit.yml {posargs: cssselect} - [testenv:docs] changedir = docs deps = @@ -52,3 +34,8 @@ deps = mypy==0.982 commands = mypy --strict {posargs: cssselect tests} + +[testenv:pre-commit] +deps = pre-commit +commands = pre-commit run --all-files --show-diff-on-failure +skip_install = true \ No newline at end of file From ae04981df42b59c9cdaecfcb1a02a00534052360 Mon Sep 17 00:00:00 2001 From: Emmanuel Rondan Date: Thu, 11 Apr 2024 12:59:32 -0300 Subject: [PATCH 179/208] ignoring pre-commit commit from blame --- .git-blame-ignore-revs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index e746ff9..9d2c8f6 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1 +1,2 @@ -# applying pre-commit hooks to the project \ No newline at end of file +# applying pre-commit hooks to the project +e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb \ No newline at end of file From 24ae330f70acd56bd54a0f98c261efe2201f6e14 Mon Sep 17 00:00:00 2001 From: Emmanuel Rondan Date: Thu, 11 Apr 2024 14:22:18 -0300 Subject: [PATCH 180/208] removing security from github CI --- .github/workflows/checks.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 1e9a243..5b6cfbf 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -10,9 +10,6 @@ jobs: - python-version: 3.12 env: TOXENV: pylint - - python-version: 3.12 - env: - TOXENV: security - python-version: 3.12 env: TOXENV: docs From 15df23ef2176521c45fde954e5476ee2a4696b03 Mon Sep 17 00:00:00 2001 From: Laerte Pereira <5853172+Laerte@users.noreply.github.com> Date: Mon, 22 Apr 2024 04:57:29 -0300 Subject: [PATCH 181/208] Add official PyPy support (#147) --- .github/workflows/tests.yml | 2 +- setup.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0de2aa2..a1a0524 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10"] steps: - uses: actions/checkout@v4 diff --git a/setup.py b/setup.py index 43eecc0..f01a174 100644 --- a/setup.py +++ b/setup.py @@ -41,5 +41,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", ], ) From 9e2ff26036d7e026b76bf48b328a1de281f7c66e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 16 Oct 2024 13:34:27 +0500 Subject: [PATCH 182/208] Add Python 3.13, drop Python 3.8, update tool versions, add twinecheck (#148) --- .github/workflows/checks.yml | 14 +++++++++----- .github/workflows/publish.yml | 6 +++--- .github/workflows/tests.yml | 5 +++-- .pre-commit-config.yaml | 8 ++++---- MANIFEST.in | 2 +- pylintrc | 1 + setup.py | 6 +++--- tox.ini | 17 +++++++++++++---- 8 files changed, 37 insertions(+), 22 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 5b6cfbf..cf0e689 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -5,23 +5,27 @@ jobs: checks: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: include: - - python-version: 3.12 + - python-version: 3.13 env: TOXENV: pylint - - python-version: 3.12 + - python-version: 3.12 # Keep in sync with .readthedocs.yml env: TOXENV: docs - - python-version: 3.12 + - python-version: 3.13 env: TOXENV: typing + - python-version: 3.13 + env: + TOXENV: twinecheck steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -36,4 +40,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 67d9c5a..36f80b5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,10 +9,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.12 - uses: actions/setup-python@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v5 with: - python-version: 3.12 + python-version: 3.13 - name: Check Tag id: check-release-tag diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a1a0524..70b6c77 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,14 +5,15 @@ jobs: tests: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a27d3db..ab99544 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,18 @@ repos: - repo: https://github.com/PyCQA/bandit - rev: 1.7.8 + rev: 1.7.10 hooks: - id: bandit args: [-r, -c, .bandit.yml] - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 + rev: 7.1.1 hooks: - id: flake8 - repo: https://github.com/psf/black.git - rev: 24.3.0 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - - id: isort \ No newline at end of file + - id: isort diff --git a/MANIFEST.in b/MANIFEST.in index 7fc2933..5561683 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc py.typed +include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc cssselect/py.typed recursive-include docs * recursive-include tests * prune docs/_build diff --git a/pylintrc b/pylintrc index e35425e..5a4647b 100644 --- a/pylintrc +++ b/pylintrc @@ -23,6 +23,7 @@ disable=assignment-from-no-return, too-many-branches, too-many-function-args, too-many-lines, + too-many-positional-arguments, too-many-public-methods, too-many-statements, undefined-variable, diff --git a/setup.py b/setup.py index f01a174..4c5d49d 100644 --- a/setup.py +++ b/setup.py @@ -22,25 +22,25 @@ maintainer_email="paul.tremberth@gmail.com", description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", long_description=README, + long_description_content_type="text/x-rst", url="https://github.com/scrapy/cssselect", license="BSD", packages=["cssselect"], - test_suite="cssselect.tests", package_data={ "cssselect": ["py.typed"], }, include_package_data=True, - python_requires=">=3.8", + python_requires=">=3.9", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], diff --git a/tox.ini b/tox.ini index 6831d3f..616d223 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.0.0 + pylint==3.3.1 commands = pylint {posargs: cssselect setup.py tests docs} @@ -30,12 +30,21 @@ commands = [testenv:typing] deps = {[testenv]deps} - lxml-stubs==0.4.0 - mypy==0.982 + mypy==1.11.2 + types-lxml==2024.9.16 commands = mypy --strict {posargs: cssselect tests} [testenv:pre-commit] deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure -skip_install = true \ No newline at end of file +skip_install = true + +[testenv:twinecheck] +basepython = python3 +deps = + twine==5.1.1 + build==1.2.2 +commands = + python -m build --sdist + twine check dist/* From 0b3b9f278a1e50fc3236483d1b3d25b78db20021 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 27 Jan 2025 14:16:28 +0400 Subject: [PATCH 183/208] Remove a deprecated function. (#149) --- cssselect/xpath.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 4255f66..ee59f89 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,7 +14,6 @@ import re import typing -import warnings from typing import Optional from cssselect.parser import ( @@ -38,17 +37,6 @@ ) -@typing.no_type_check -def _unicode_safe_getattr(obj, name, default=None): - warnings.warn( - "_unicode_safe_getattr is deprecated and will be removed in the" - " next release, use getattr() instead", - DeprecationWarning, - stacklevel=2, - ) - return getattr(obj, name, default) - - class ExpressionError(SelectorError, RuntimeError): """Unknown or unsupported selector (eg. pseudo-class).""" From 58b436f4adf9eaf9844ada7d1003d6dfd672c2a8 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 3 Feb 2025 20:36:00 +0400 Subject: [PATCH 184/208] Migrate to ruff (#150) * Add pyupgrade. * Bump tool versions. * Migrate to ruff. * Simplify typing casts. * Cleanup pylintrc. * More cleanup. * Move tool configs to pyproject.toml. * Update the nitpicky config. * Exclude TYPE_CHECKING blocks from coverage. * Remove an extra newline. --- .bandit.yml | 6 - .bumpversion.cfg | 6 - .coveragerc | 10 -- .flake8 | 16 --- .github/workflows/tests.yml | 2 +- .isort.cfg | 2 - .pre-commit-config.yaml | 21 +-- cssselect/__init__.py | 17 ++- cssselect/parser.py | 276 +++++++++++++++++------------------- cssselect/xpath.py | 169 +++++++++++----------- docs/conf.py | 11 +- pylintrc | 33 ----- pyproject.toml | 178 ++++++++++++++++++++++- setup.cfg | 13 -- setup.py | 12 +- tests/test_cssselect.py | 102 +++++++------ tox.ini | 6 +- 17 files changed, 463 insertions(+), 417 deletions(-) delete mode 100644 .bandit.yml delete mode 100644 .bumpversion.cfg delete mode 100644 .coveragerc delete mode 100644 .flake8 delete mode 100644 .isort.cfg delete mode 100644 pylintrc delete mode 100644 setup.cfg diff --git a/.bandit.yml b/.bandit.yml deleted file mode 100644 index 4f60a02..0000000 --- a/.bandit.yml +++ /dev/null @@ -1,6 +0,0 @@ -skips: -- B101 -- B311 -- B320 -- B410 -exclude_dirs: ['tests'] diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 56cfabc..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 1.2.0 -commit = True -tag = True - -[bumpversion:file:cssselect/__init__.py] diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index ed1fac6..0000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -branch = True -source = cssselect - -[report] -exclude_lines = - pragma: no cover - def __repr__ - if sys.version_info - if __name__ == '__main__': diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 2417f2e..0000000 --- a/.flake8 +++ /dev/null @@ -1,16 +0,0 @@ -[flake8] -max-line-length = 99 -ignore = - W503 - # too many leading '#' for block comment - E266 - E704 -exclude = - .git - .tox - venv* - - # pending revision - docs/conf.py -per-file-ignores = - cssselect/__init__.py:F401 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70b6c77..427c4ad 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,4 +24,4 @@ jobs: tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) + uses: codecov/codecov-action@v5 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index 6860bdb..0000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile = black \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab99544..b1829a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,7 @@ repos: -- repo: https://github.com/PyCQA/bandit - rev: 1.7.10 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.4 hooks: - - id: bandit - args: [-r, -c, .bandit.yml] -- repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 -- repo: https://github.com/psf/black.git - rev: 24.10.0 - hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/cssselect/__init__.py b/cssselect/__init__.py index a59995c..c53b539 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,15 +1,14 @@ -# -*- coding: utf-8 -*- """ - CSS Selectors based on XPath - ============================ +CSS Selectors based on XPath +============================ - This module supports selecting XML/HTML elements based on CSS selectors. - See the `CSSSelector` class for details. +This module supports selecting XML/HTML elements based on CSS selectors. +See the `CSSSelector` class for details. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -27,10 +26,10 @@ "FunctionalPseudoElement", "GenericTranslator", "HTMLTranslator", - "parse", "Selector", "SelectorError", "SelectorSyntaxError", + "parse", ) VERSION = "1.2.0" diff --git a/cssselect/parser.py b/cssselect/parser.py index 354713d..d16751f 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,22 +1,28 @@ -# -*- coding: utf-8 -*- """ - cssselect.parser - ================ +cssselect.parser +================ - Tokenizer, parser and parsed objects for CSS selectors. +Tokenizer, parser and parsed objects for CSS selectors. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import operator import re import sys -import typing -from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + # typing.Self requires Python 3.11 + from typing_extensions import Self def ascii_lower(string: str) -> str: @@ -67,9 +73,7 @@ class Selector: """ - def __init__( - self, tree: Tree, pseudo_element: Optional[PseudoElement] = None - ) -> None: + def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: self.parsed_tree = tree if pseudo_element is not None and not isinstance( pseudo_element, FunctionalPseudoElement @@ -119,7 +123,7 @@ def canonical(self) -> str: res = res.lstrip("*") return res - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: """Return the specificity_ of this selector as a tuple of 3 integers. .. _specificity: http://www.w3.org/TR/selectors/#specificity @@ -146,7 +150,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s.%s" % (self.selector.canonical(), self.class_name) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -170,7 +174,7 @@ class FunctionalPseudoElement: """ - def __init__(self, name: str, arguments: Sequence["Token"]): + def __init__(self, name: str, arguments: Sequence[Token]): self.name = ascii_lower(name) self.arguments = arguments @@ -181,7 +185,7 @@ def __repr__(self) -> str: [token.value for token in self.arguments], ) - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: @@ -194,7 +198,7 @@ class Function: Represents selector:name(expr) """ - def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: + def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments @@ -207,14 +211,14 @@ def __repr__(self) -> str: [token.value for token in self.arguments], ) - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -235,7 +239,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s:%s" % (self.selector.canonical(), self.ident) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -263,7 +267,7 @@ def canonical(self) -> str: subsel = subsel.lstrip("*") return "%s:not(%s)" % (self.selector.canonical(), subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -274,7 +278,7 @@ class Relation: Represents selector:has(subselector) """ - def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): + def __init__(self, selector: Tree, combinator: Token, subselector: Selector): self.selector = selector self.combinator = combinator self.subselector = subselector @@ -288,17 +292,17 @@ def __repr__(self) -> str: def canonical(self) -> str: try: - subsel = self.subselector[0].canonical() # type: ignore + subsel = self.subselector[0].canonical() # type: ignore[index] except TypeError: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: - a2, b2, c2 = self.subselector[-1].specificity() # type: ignore + a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] except TypeError: a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -330,7 +334,7 @@ def canonical(self) -> str: ", ".join(map(str, selector_arguments)), ) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -340,7 +344,7 @@ class SpecificityAdjustment: Same as selector:is(selector_list), but its specificity is always 0 """ - def __init__(self, selector: Tree, selector_list: List[Tree]): + def __init__(self, selector: Tree, selector_list: list[Tree]): self.selector = selector self.selector_list = selector_list @@ -361,7 +365,7 @@ def canonical(self) -> str: ", ".join(map(str, selector_arguments)), ) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return 0, 0, 0 @@ -370,33 +374,33 @@ class Attrib: Represents selector[namespace|attrib operator value] """ - @typing.overload + @overload def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, - operator: 'typing.Literal["exists"]', + operator: Literal["exists"], value: None, ) -> None: ... - @typing.overload + @overload def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: "Token", + value: Token, ) -> None: ... def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: Optional["Token"], + value: Token | None, ) -> None: self.selector = selector self.namespace = namespace @@ -411,14 +415,14 @@ def __repr__(self) -> str: attrib = self.attrib if self.operator == "exists": return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) - else: - return "%s[%r[%s %s %r]]" % ( - self.__class__.__name__, - self.selector, - attrib, - self.operator, - typing.cast("Token", self.value).value, - ) + assert self.value is not None + return "%s[%r[%s %s %r]]" % ( + self.__class__.__name__, + self.selector, + attrib, + self.operator, + self.value.value, + ) def canonical(self) -> str: if self.namespace: @@ -429,15 +433,16 @@ def canonical(self) -> str: if self.operator == "exists": op = attrib else: + assert self.value is not None op = "%s%s%s" % ( attrib, self.operator, - typing.cast("Token", self.value).css(), + self.value.css(), ) return "%s[%s]" % (self.selector.canonical(), op) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -452,7 +457,7 @@ class Element: """ def __init__( - self, namespace: Optional[str] = None, element: Optional[str] = None + self, namespace: str | None = None, element: str | None = None ) -> None: self.namespace = namespace self.element = element @@ -466,11 +471,10 @@ def canonical(self) -> str: element = "%s|%s" % (self.namespace, element) return element - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: if self.element: return 0, 0, 1 - else: - return 0, 0, 0 + return 0, 0, 0 class Hash: @@ -488,7 +492,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s#%s" % (self.selector.canonical(), self.id) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() a += 1 return a, b, c @@ -502,10 +506,7 @@ def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - if self.combinator == " ": - comb = "" - else: - comb = self.combinator + comb = "" if self.combinator == " " else self.combinator return "%s[%r %s %r]" % ( self.__class__.__name__, self.selector, @@ -519,7 +520,7 @@ def canonical(self) -> str: subsel = subsel.lstrip("*") return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -539,7 +540,7 @@ def specificity(self) -> Tuple[int, int, int]: ) -def parse(css: str) -> List[Selector]: +def parse(css: str) -> list[Selector]: """Parse a CSS *group of selectors*. If you don't care about pseudo-elements or selector specificity, @@ -581,7 +582,7 @@ def parse(css: str) -> List[Selector]: # raise -def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: +def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) @@ -592,7 +593,7 @@ def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: break -def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: +def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() @@ -605,7 +606,7 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = typing.cast(str, stream.next().value) + combinator = cast(str, stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -617,8 +618,8 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] def parse_simple_selector( - stream: "TokenStream", inside_negation: bool = False -) -> Tuple[Tree, Optional[PseudoElement]]: + stream: TokenStream, inside_negation: bool = False +) -> tuple[Tree, PseudoElement | None]: stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() @@ -637,7 +638,7 @@ def parse_simple_selector( else: element = namespace = None result: Tree = Element(namespace, element) - pseudo_element: Optional[PseudoElement] = None + pseudo_element: PseudoElement | None = None while 1: peek = stream.peek() if ( @@ -651,7 +652,7 @@ def parse_simple_selector( "Got pseudo-element ::%s not at the end of a selector" % pseudo_element ) if peek.type == "HASH": - result = Hash(result, typing.cast(str, stream.next().value)) + result = Hash(result, cast(str, stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -680,21 +681,20 @@ def parse_simple_selector( continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if repr(result) == "Pseudo[Element[*]:scope]": - if not ( - len(stream.used) == 2 - or (len(stream.used) == 3 and stream.used[0].type == "S") - or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) - or ( - len(stream.used) >= 4 - and stream.used[-3].type == "S" - and stream.used[-4].is_delim(",") - ) - ): - raise SelectorSyntaxError( - 'Got immediate child pseudo-element ":scope" ' - "not at the start of a selector" - ) + if repr(result) == "Pseudo[Element[*]:scope]" and not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) + or ( + len(stream.used) >= 4 + and stream.used[-3].type == "S" + and stream.used[-4].is_delim(",") + ) + ): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() @@ -732,9 +732,9 @@ def parse_simple_selector( return result, pseudo_element -def parse_arguments(stream: "TokenStream") -> List["Token"]: - arguments: List["Token"] = [] - while 1: +def parse_arguments(stream: TokenStream) -> list[Token]: + arguments: list[Token] = [] + while 1: # noqa: RET503 stream.skip_whitespace() next = stream.next() if next.type in ("IDENT", "STRING", "NUMBER") or next in [ @@ -748,7 +748,7 @@ def parse_arguments(stream: "TokenStream") -> List["Token"]: raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) -def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: stream.skip_whitespace() subselector = "" next = stream.next() @@ -760,12 +760,12 @@ def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: else: combinator = Token("DELIM", " ", pos=0) - while 1: + while 1: # noqa: RET503 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ ("DELIM", "."), ("DELIM", "*"), ]: - subselector += typing.cast(str, next.value) + subselector += cast(str, next.value) elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] @@ -774,7 +774,7 @@ def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: next = stream.next() -def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: +def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: arguments = [] while 1: result, pseudo_element = parse_simple_selector(stream, True) @@ -796,13 +796,13 @@ def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: return arguments -def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: +def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) - namespace: Optional[str] - op: Optional[str] + namespace: str | None + op: str | None if stream.peek() == ("DELIM", "|"): stream.next() if stream.peek() == ("DELIM", "="): @@ -819,13 +819,13 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: stream.skip_whitespace() next = stream.next() if next == ("DELIM", "]"): - return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) - elif next == ("DELIM", "="): + return Attrib(selector, namespace, cast(str, attrib), "exists", None) + if next == ("DELIM", "="): op = "=" elif next.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = typing.cast(str, next.value) + "=" + op = cast(str, next.value) + "=" stream.next() else: raise SelectorSyntaxError("Operator expected, got %s" % (next,)) @@ -837,10 +837,10 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: next = stream.next() if next != ("DELIM", "]"): raise SelectorSyntaxError("Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, typing.cast(str, attrib), op, value) + return Attrib(selector, namespace, cast(str, attrib), op, value) -def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: +def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: """ Parses the arguments for :nth-child() and friends. @@ -851,12 +851,12 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(typing.cast(str, token.value) for token in tokens).strip() + s = "".join(cast(str, token.value) for token in tokens).strip() if s == "odd": return 2, 1 - elif s == "even": + if s == "even": return 2, 0 - elif s == "n": + if s == "n": return 1, 0 if "n" not in s: # Just b @@ -865,36 +865,30 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: a_as_int: int if not a: a_as_int = 1 - elif a == "-" or a == "+": + elif a in {"-", "+"}: a_as_int = int(a + "1") else: a_as_int = int(a) - b_as_int: int - if not b: - b_as_int = 0 - else: - b_as_int = int(b) + b_as_int = int(b) if b else 0 return a_as_int, b_as_int #### Token objects -class Token(Tuple[str, Optional[str]]): - @typing.overload +class Token(tuple[str, Optional[str]]): # noqa: SLOT001 + @overload def __new__( cls, - type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', + type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, - ) -> "Token": ... + ) -> Self: ... - @typing.overload - def __new__( - cls, type_: 'typing.Literal["EOF"]', value: None, pos: int - ) -> "Token": ... + @overload + def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... - def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": + def __new__(cls, type_: str, value: str | None, pos: int) -> Self: obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj @@ -912,19 +906,18 @@ def type(self) -> str: return self[0] @property - def value(self) -> Optional[str]: + def value(self) -> str | None: return self[1] def css(self) -> str: if self.type == "STRING": return repr(self.value) - else: - return typing.cast(str, self.value) + return cast(str, self.value) class EOFToken(Token): - def __new__(cls, pos: int) -> "EOFToken": - return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) + def __new__(cls, pos: int) -> Self: + return Token.__new__(cls, "EOF", None, pos) def __repr__(self) -> str: return "<%s at %i>" % (self.type, self.pos) @@ -942,15 +935,13 @@ class TokenMacros: nmstart = "[_a-z]|%s|%s" % (escape, nonascii) -if typing.TYPE_CHECKING: +class MatchFunc(Protocol): + def __call__( + self, string: str, pos: int = ..., endpos: int = ... + ) -> re.Match[str] | None: ... - class MatchFunc(typing.Protocol): - def __call__( - self, string: str, pos: int = ..., endpos: int = ... - ) -> Optional["re.Match[str]"]: ... - -def _compile(pattern: str) -> "MatchFunc": +def _compile(pattern: str) -> MatchFunc: return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match @@ -964,14 +955,14 @@ def _compile(pattern: str) -> "MatchFunc": } _sub_simple_escape = re.compile(r"\\(.)").sub -_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub +_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython _replace_simple = operator.methodcaller("group", 1) -def _replace_unicode(match: "re.Match[str]") -> str: +def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD @@ -980,8 +971,7 @@ def _replace_unicode(match: "re.Match[str]") -> str: def unescape_ident(value: str) -> str: value = _sub_unicode_escape(_replace_unicode, value) - value = _sub_simple_escape(_replace_simple, value) - return value + return _sub_simple_escape(_replace_simple, value) def tokenize(s: str) -> Iterator[Token]: @@ -1056,44 +1046,44 @@ def tokenize(s: str) -> Iterator[Token]: class TokenStream: - def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: - self.used: List[Token] = [] + def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: + self.used: list[Token] = [] self.tokens = iter(tokens) self.source = source - self.peeked: Optional[Token] = None + self.peeked: Token | None = None self._peeking = False self.next_token = self.tokens.__next__ def next(self) -> Token: if self._peeking: self._peeking = False - self.used.append(typing.cast(Token, self.peeked)) - return typing.cast(Token, self.peeked) - else: - next = self.next_token() - self.used.append(next) - return next + assert self.peeked is not None + self.used.append(self.peeked) + return self.peeked + next = self.next_token() + self.used.append(next) + return next def peek(self) -> Token: if not self._peeking: self.peeked = self.next_token() self._peeking = True - return typing.cast(Token, self.peeked) + assert self.peeked is not None + return self.peeked def next_ident(self) -> str: next = self.next() if next.type != "IDENT": raise SelectorSyntaxError("Expected ident, got %s" % (next,)) - return typing.cast(str, next.value) + return cast(str, next.value) - def next_ident_or_star(self) -> Optional[str]: + def next_ident_or_star(self) -> str | None: next = self.next() if next.type == "IDENT": return next.value - elif next == ("DELIM", "*"): + if next == ("DELIM", "*"): return None - else: - raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) def skip_whitespace(self) -> None: peek = self.peek() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index ee59f89..e9d1065 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,20 +1,21 @@ -# -*- coding: utf-8 -*- """ - cssselect.xpath - =============== +cssselect.xpath +=============== - Translation of parsed CSS selectors to XPath expressions. +Translation of parsed CSS selectors to XPath expressions. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import re -import typing -from typing import Optional +from collections.abc import Callable +from typing import TYPE_CHECKING, Optional, cast from cssselect.parser import ( Attrib, @@ -36,6 +37,10 @@ parse_series, ) +if TYPE_CHECKING: + # typing.Self requires Python 3.11 + from typing_extensions import Self + class ExpressionError(SelectorError, RuntimeError): """Unknown or unsupported selector (eg. pseudo-class).""" @@ -65,7 +70,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": + def add_condition(self, condition: str, conjuction: str = "and") -> Self: if self.condition: self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: @@ -91,10 +96,10 @@ def add_star_prefix(self) -> None: def join( self, combiner: str, - other: "XPathExpr", - closing_combiner: Optional[str] = None, + other: XPathExpr, + closing_combiner: str | None = None, has_inner_condition: bool = False, - ) -> "XPathExpr": + ) -> Self: path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": @@ -274,33 +279,35 @@ def xpath_literal(s: str) -> str: elif '"' not in s: s = '"%s"' % s else: - s = "concat(%s)" % ",".join( - [ - (("'" in part) and '"%s"' or "'%s'") % part - for part in split_at_single_quotes(s) - if part - ] - ) + parts_quoted = [ + f'"{part}"' if "'" in part else f"'{part}'" + for part in split_at_single_quotes(s) + if part + ] + s = "concat({})".format(",".join(parts_quoted)) return s def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, "xpath_%s" % type_name.lower(), None) + method = cast( + Optional[Callable[[Tree], XPathExpr]], + getattr(self, "xpath_%s" % type_name.lower(), None), + ) if method is None: raise ExpressionError("%s is not supported." % type_name) - return typing.cast(XPathExpr, method(parsed_selector)) + return method(parsed_selector) # Dispatched by parsed object type def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, "xpath_%s_combinator" % combinator) - return typing.cast( - XPathExpr, - method(self.xpath(combined.selector), self.xpath(combined.subselector)), + method = cast( + Callable[[XPathExpr, XPathExpr], XPathExpr], + getattr(self, "xpath_%s_combinator" % combinator), ) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation: Negation) -> XPathExpr: xpath = self.xpath(negation.selector) @@ -308,20 +315,22 @@ def xpath_negation(self, negation: Negation) -> XPathExpr: sub_xpath.add_name_test() if sub_xpath.condition: return xpath.add_condition("not(%s)" % sub_xpath.condition) - else: - return xpath.add_condition("0") + return xpath.add_condition("0") def xpath_relation(self, relation: Relation) -> XPathExpr: xpath = self.xpath(relation.selector) combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) - method = getattr( - self, - "xpath_relation_%s_combinator" - % self.combinator_mapping[typing.cast(str, combinator.value)], + method = cast( + Callable[[XPathExpr, XPathExpr], XPathExpr], + getattr( + self, + "xpath_relation_%s_combinator" + % self.combinator_mapping[cast(str, combinator.value)], + ), ) - return typing.cast(XPathExpr, method(xpath, right)) + return method(xpath, right) def xpath_matching(self, matching: Matching) -> XPathExpr: xpath = self.xpath(matching.selector) @@ -344,24 +353,32 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" method_name = "xpath_%s_function" % function.name.replace("-", "_") - method = getattr(self, method_name, None) + method = cast( + Optional[Callable[[XPathExpr, Function], XPathExpr]], + getattr(self, method_name, None), + ) if not method: raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) - return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) + return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = getattr(self, method_name, None) + method = cast( + Optional[Callable[[XPathExpr], XPathExpr]], getattr(self, method_name, None) + ) if not method: # TODO: better error message for pseudo-elements? raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) - return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) + return method(self.xpath(pseudo.selector)) def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, "xpath_attrib_%s" % operator) + method = cast( + Callable[[XPathExpr, str, Optional[str]], XPathExpr], + getattr(self, "xpath_attrib_%s" % operator), + ) if self.lower_case_attribute_names: name = selector.attrib.lower() else: @@ -377,12 +394,10 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: if selector.value is None: value = None elif self.lower_case_attribute_values: - value = typing.cast(str, selector.value.value).lower() + value = cast(str, selector.value.value).lower() else: value = selector.value.value - return typing.cast( - XPathExpr, method(self.xpath(selector.selector), attrib, value) - ) + return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" @@ -459,12 +474,9 @@ def xpath_relation_direct_adjacent_combinator( self, left: XPathExpr, right: XPathExpr ) -> XPathExpr: """right is a sibling immediately after left; select left""" - xpath = left.add_condition( - "following-sibling::*[(name() = '{}') and (position() = 1)]".format( - right.element - ) + return left.add_condition( + f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]" ) - return xpath def xpath_relation_indirect_adjacent_combinator( self, left: XPathExpr, right: XPathExpr @@ -483,8 +495,8 @@ def xpath_nth_child_function( ) -> XPathExpr: try: a, b = parse_series(function.arguments) - except ValueError: - raise ExpressionError("Invalid series: '%r'" % function.arguments) + except ValueError as ex: + raise ExpressionError("Invalid series: '%r'" % function.arguments) from ex # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # @@ -546,10 +558,7 @@ def xpath_nth_child_function( # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: - nodetest = "*" - else: - nodetest = "%s" % xpath.element + nodetest = "*" if add_name_test else "%s" % xpath.element # count siblings before or after the element if not last: @@ -604,10 +613,7 @@ def xpath_nth_child_function( expressions.append("%s mod %s = 0" % (left, a)) - if len(expressions) > 1: - template = "(%s)" - else: - template = "%s" + template = "(%s)" if len(expressions) > 1 else "%s" xpath.add_condition( " and ".join(template % expression for expression in expressions) ) @@ -644,7 +650,7 @@ def xpath_contains_function( "Expected a single string or ident for :contains(), got %r" % function.arguments ) - value = typing.cast(str, function.arguments[0].value) + value = cast(str, function.arguments[0].value) return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: @@ -653,7 +659,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr "Expected a single string or ident for :lang(), got %r" % function.arguments ) - value = typing.cast(str, function.arguments[0].value) + value = cast(str, function.arguments[0].value) return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name @@ -713,21 +719,21 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: # Attrib: dispatch by attribute operator def xpath_attrib_exists( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert not value xpath.add_condition(name) return xpath def xpath_attrib_equals( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_different( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None # FIXME: this seems like a weird hack... @@ -740,36 +746,31 @@ def xpath_attrib_different( return xpath def xpath_attrib_includes( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value and is_non_whitespace(value): + arg = self.xpath_literal(" " + value + " ") xpath.add_condition( - "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(" " + value + " ")) + f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})" ) else: xpath.add_condition("0") return xpath def xpath_attrib_dashmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None + arg = self.xpath_literal(value) + arg_dash = self.xpath_literal(value + "-") # Weird, but true... xpath.add_condition( - "%s and (%s = %s or starts-with(%s, %s))" - % ( - name, - name, - self.xpath_literal(value), - name, - self.xpath_literal(value + "-"), - ) + f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))" ) return xpath def xpath_attrib_prefixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: xpath.add_condition( @@ -780,7 +781,7 @@ def xpath_attrib_prefixmatch( return xpath def xpath_attrib_suffixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with @@ -793,7 +794,7 @@ def xpath_attrib_suffixmatch( return xpath def xpath_attrib_substringmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Attribute selectors are case sensitive @@ -831,7 +832,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -848,16 +849,16 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr ) value = function.arguments[0].value assert value + arg = self.xpath_literal(value.lower() + "-") return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + f"'-'), {arg})]" ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -865,7 +866,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -895,7 +896,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/docs/conf.py b/docs/conf.py index aa5ae22..ceeb2d2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # cssselect documentation build configuration file, created by # sphinx-quickstart on Tue Mar 27 14:20:34 2012. @@ -12,9 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import os import re -import sys +from pathlib import Path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -51,10 +49,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -with open( - os.path.join(os.path.dirname(__file__), "..", "cssselect", "__init__.py") -) as init_file: - init_py = init_file.read() +init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text() release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. version = release.rstrip("dev") @@ -258,5 +253,5 @@ nitpicky = True nitpick_ignore = [ # explicitly not a part of the public API - ("py:class", "cssselect.parser.Token"), + ("py:class", "Token"), ] diff --git a/pylintrc b/pylintrc deleted file mode 100644 index 5a4647b..0000000 --- a/pylintrc +++ /dev/null @@ -1,33 +0,0 @@ -[MASTER] -persistent=no - -[MESSAGES CONTROL] -disable=assignment-from-no-return, - c-extension-no-member, - consider-using-f-string, - consider-using-in, - fixme, - inconsistent-return-statements, - invalid-name, - missing-class-docstring, - missing-function-docstring, - missing-module-docstring, - multiple-imports, - no-else-return, - no-member, - raise-missing-from, - redefined-builtin, - redefined-outer-name, - too-few-public-methods, - too-many-arguments, - too-many-branches, - too-many-function-args, - too-many-lines, - too-many-positional-arguments, - too-many-public-methods, - too-many-statements, - undefined-variable, - unidiomatic-typecheck, - unspecified-encoding, - unused-argument, - unused-import, diff --git a/pyproject.toml b/pyproject.toml index 261fe3e..5ddbeb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,179 @@ -[tool.isort] -profile = "black" -multi_line_output = 3 +[tool.bumpversion] +current_version = "1.2.0" +commit = true +tag = true + +[[tool.bumpversion.files]] +filename = "cssselect/__init__.py" + +[tool.coverage.run] +branch = true +source = ["cssselect"] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if sys.version_info", + "if __name__ == '__main__':", + "if TYPE_CHECKING:", +] [tool.mypy] check_untyped_defs = true ignore_missing_imports = true no_warn_no_return = true -[tool.black] -target-version = ["py38", "py39", "py310", "py311", "py312"] \ No newline at end of file +[tool.pylint.MASTER] +persistent = "no" +extension-pkg-allow-list = ["lxml"] + +[tool.pylint."MESSAGES CONTROL"] +enable = [ + "useless-suppression", +] +disable = [ + "consider-using-f-string", + "fixme", + "invalid-name", + "line-too-long", + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "no-member", + "not-callable", + "redefined-builtin", + "redefined-outer-name", + "too-few-public-methods", + "too-many-arguments", + "too-many-branches", + "too-many-function-args", + "too-many-lines", + "too-many-locals", + "too-many-positional-arguments", + "too-many-public-methods", + "too-many-statements", + "unused-argument", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] + +[tool.ruff.lint] +extend-select = [ + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # First word of the first line should be properly capitalized + "D403", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", + # Using lxml to parse untrusted data is known to be vulnerable to XML attacks + "S320", + + # TODO: Use format specifiers instead of percent format + "UP031", +] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b8c93b1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,13 +0,0 @@ -[build_sphinx] -source-dir = docs -build-dir = docs/_build -#all_files = 1 - -[upload_sphinx] # Sphinx-PyPI-upload -upload-dir = docs/_build/html - -[tool:pytest] -testpaths = tests - -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py index 4c5d49d..cb870dd 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,11 @@ -# -*- coding: utf-8 -*- - -import os.path import re +from pathlib import Path from setuptools import setup -ROOT = os.path.dirname(__file__) -with open(os.path.join(ROOT, "README.rst")) as readme_file: - README = readme_file.read() -with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: - INIT_PY = init_file.read() +ROOT = Path(__file__).parent +README = (ROOT / "README.rst").read_text(encoding="utf-8") +INIT_PY = (ROOT / "cssselect" / "__init__.py").read_text(encoding="utf-8") VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 32c1683..0a95f92 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -1,26 +1,27 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ - Tests for cssselect - =================== +Tests for cssselect +=================== - These tests can be run either by py.test or by the standard library's - unittest. They use plain ``assert`` statements and do little reporting - themselves in case of failure. +These tests can be run either by py.test or by the standard library's +unittest. They use plain ``assert`` statements and do little reporting +themselves in case of failure. - Use py.test to get fancy error reporting and assert introspection. +Use py.test to get fancy error reporting and assert introspection. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import sys import typing import unittest -from typing import List, Optional, Sequence, Tuple +from typing import TYPE_CHECKING from lxml import etree, html @@ -41,6 +42,9 @@ ) from cssselect.xpath import XPathExpr +if TYPE_CHECKING: + from collections.abc import Sequence + class TestCssselect(unittest.TestCase): def test_tokenizer(self) -> None: @@ -70,16 +74,13 @@ def test_tokenizer(self) -> None: ] def test_parser(self) -> None: - def repr_parse(css: str) -> List[str]: + def repr_parse(css: str) -> list[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [ - repr(selector.parsed_tree).replace("(u'", "('") - for selector in selectors - ] + return [repr(selector.parsed_tree) for selector in selectors] - def parse_many(first: str, *others: str) -> List[str]: + def parse_many(first: str, *others: str) -> list[str]: result = repr_parse(first) for other in others: assert repr_parse(other) == result @@ -185,18 +186,18 @@ def parse_many(first: str, *others: str) -> List[str]: ] def test_pseudo_elements(self) -> None: - def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: - result: List[Tuple[str, Optional[str]]] = [] + def parse_pseudo(css: str) -> list[tuple[str, str | None]]: + result: list[tuple[str, str | None]] = [] for selector in parse(css): pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo # No Symbol here - assert pseudo is None or type(pseudo) is str - selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") + assert pseudo is None or isinstance(pseudo, str) + selector_as_str = repr(selector.parsed_tree) result.append((selector_as_str, pseudo)) return result - def parse_one(css: str) -> Tuple[str, Optional[str]]: + def parse_one(css: str) -> tuple[str, str | None]: result = parse_pseudo(css) assert len(result) == 1 return result[0] @@ -280,7 +281,7 @@ def test_pseudo_repr(css: str) -> str: assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self) -> None: - def specificity(css: str) -> Tuple[int, int, int]: + def specificity(css: str) -> tuple[int, int, int]: selectors = parse(css) assert len(selectors) == 1 return selectors[0].specificity() @@ -326,7 +327,7 @@ def specificity(css: str) -> Tuple[int, int, int]: ) def test_css_export(self) -> None: - def css2css(css: str, res: Optional[str] = None) -> None: + def css2css(css: str, res: str | None = None) -> None: selectors = parse(css) assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) @@ -365,12 +366,11 @@ def css2css(css: str, res: Optional[str] = None) -> None: css2css("foo > *") def test_parse_errors(self) -> None: - def get_error(css: str) -> Optional[str]: + def get_error(css: str) -> str | None: try: parse(css) except SelectorSyntaxError: - # Py2, Py3, ... - return str(sys.exc_info()[1]).replace("(u'", "('") + return str(sys.exc_info()[1]) return None assert get_error("attributes(href)/html/body/a") == ( @@ -452,7 +452,7 @@ def xpath(css: str) -> str: assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]" ) assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( @@ -460,7 +460,7 @@ def xpath(css: str) -> str: ) assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" ) # --- nth-* and nth-last-* ------------------------------------- @@ -719,19 +719,17 @@ def xpath(css: str) -> str: ) assert xpath(":scope") == "descendant-or-self::*[1]" assert xpath(":first-or-second[href]") == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]" ) assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ - def operator_id(selector: str) -> List[str]: + def operator_id(selector: str) -> list[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -740,7 +738,7 @@ def operator_id(selector: str) -> List[str]: assert operator_id("[href]:first-or-second") == ["second"] def test_series(self) -> None: - def series(css: str) -> Optional[Tuple[int, int]]: + def series(css: str) -> tuple[int, int] | None: (selector,) = parse(":nth-child(%s)" % css) args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments try: @@ -769,14 +767,12 @@ def series(css: str) -> Optional[Tuple[int, int]]: def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath - def langid(selector: str) -> List[str]: + def langid(selector: str) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -799,7 +795,7 @@ def langid(selector: str) -> List[str]: def test_argument_types(self) -> None: class CustomTranslator(GenericTranslator): def __init__(self) -> None: - self.argument_types: List[str] = [] + self.argument_types: list[str] = [] def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement @@ -809,12 +805,12 @@ def xpath_pseudo_element( ).argument_types() return xpath - def argument_types(css: str) -> List[str]: + def argument_types(css: str) -> list[str]: translator = CustomTranslator() translator.css_to_xpath(css) return translator.argument_types - mappings: List[Tuple[str, List[str]]] = [ + mappings: list[tuple[str, list[str]]] = [ ("", []), ("ident", ["IDENT"]), ('"string"', ["STRING"]), @@ -826,23 +822,21 @@ def argument_types(css: str) -> List[str]: def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath - def select_ids(selector: str, html_only: bool) -> List[str]: + def select_ids(selector: str, html_only: bool) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] - def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: + def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: @@ -1072,14 +1066,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = typing.cast(List["etree._Element"], document.xpath("//body"))[0] + body = typing.cast(list["etree._Element"], document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = typing.cast(List["etree._Element"], body.xpath(xpath)) + results = typing.cast(list["etree._Element"], body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: @@ -1527,7 +1521,7 @@ def count(selector: str) -> int: -""" # noqa: W191,E101 +""" if __name__ == "__main__": diff --git a/tox.ini b/tox.ini index 616d223..3585406 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.1 + pylint==3.3.4 commands = pylint {posargs: cssselect setup.py tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.11.2 - types-lxml==2024.9.16 + mypy==1.14.1 + types-lxml==2024.12.13 commands = mypy --strict {posargs: cssselect tests} From c8f18fa7ba39bb9d751ba42dae33d50c3fb2f7aa Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 4 Feb 2025 13:08:27 +0400 Subject: [PATCH 185/208] Upgrade string formatting. (#151) --- cssselect/parser.py | 169 +++++++++++++++------------------------- cssselect/xpath.py | 96 ++++++++++------------- pyproject.toml | 3 - tests/test_cssselect.py | 17 ++-- 4 files changed, 113 insertions(+), 172 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d16751f..13ae959 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -105,20 +105,20 @@ def __repr__(self) -> str: if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) + return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]" def canonical(self) -> str: """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = "::%s" % self.pseudo_element.canonical() + pseudo_element = f"::{self.pseudo_element.canonical()}" elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) + res = f"{self.parsed_tree.canonical()}{pseudo_element}" if len(res) > 1: res = res.lstrip("*") return res @@ -145,10 +145,10 @@ def __init__(self, selector: Tree, class_name: str) -> None: self.class_name = class_name def __repr__(self) -> str: - return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) + return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]" def canonical(self) -> str: - return "%s.%s" % (self.selector.canonical(), self.class_name) + return f"{self.selector.canonical()}.{self.class_name}" def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() @@ -179,18 +179,15 @@ def __init__(self, name: str, arguments: Sequence[Token]): self.arguments = arguments def __repr__(self) -> str: - return "%s[::%s(%r)]" % ( - self.__class__.__name__, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[::{self.name}({token_values!r})]" def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s(%s)" % (self.name, args) + return f"{self.name}({args})" class Function: @@ -204,19 +201,15 @@ def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> Non self.arguments = arguments def __repr__(self) -> str: - return "%s[%r:%s(%r)]" % ( - self.__class__.__name__, - self.selector, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]" def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) + return f"{self.selector.canonical()}:{self.name}({args})" def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() @@ -234,10 +227,10 @@ def __init__(self, selector: Tree, ident: str) -> None: self.ident = ascii_lower(ident) def __repr__(self) -> str: - return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) + return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]" def canonical(self) -> str: - return "%s:%s" % (self.selector.canonical(), self.ident) + return f"{self.selector.canonical()}:{self.ident}" def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() @@ -255,17 +248,13 @@ def __init__(self, selector: Tree, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:not(%r)]" % ( - self.__class__.__name__, - self.selector, - self.subselector, - ) + return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]" def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:not(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:not({subsel})" def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() @@ -284,11 +273,7 @@ def __init__(self, selector: Tree, combinator: Token, subselector: Selector): self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:has(%r)]" % ( - self.__class__.__name__, - self.selector, - self.subselector, - ) + return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]" def canonical(self) -> str: try: @@ -297,7 +282,7 @@ def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:has(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:has({subsel})" def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() @@ -318,21 +303,16 @@ def __init__(self, selector: Tree, selector_list: Iterable[Tree]): self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:is(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:is(%s)" % ( - self.selector.canonical(), - ", ".join(map(str, selector_arguments)), - ) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:is({args_str})" def specificity(self) -> tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -349,21 +329,16 @@ def __init__(self, selector: Tree, selector_list: list[Tree]): self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:where(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:where(%s)" % ( - self.selector.canonical(), - ", ".join(map(str, selector_arguments)), - ) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:where({args_str})" def specificity(self) -> tuple[int, int, int]: return 0, 0, 0 @@ -409,38 +384,22 @@ def __init__( self.value = value def __repr__(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": - return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) + return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]" assert self.value is not None - return "%s[%r[%s %s %r]]" % ( - self.__class__.__name__, - self.selector, - attrib, - self.operator, - self.value.value, - ) + return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]" def canonical(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": op = attrib else: assert self.value is not None - op = "%s%s%s" % ( - attrib, - self.operator, - self.value.css(), - ) + op = f"{attrib}{self.operator}{self.value.css()}" - return "%s[%s]" % (self.selector.canonical(), op) + return f"{self.selector.canonical()}[{op}]" def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() @@ -463,12 +422,12 @@ def __init__( self.element = element def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self.canonical()) + return f"{self.__class__.__name__}[{self.canonical()}]" def canonical(self) -> str: element = self.element or "*" if self.namespace: - element = "%s|%s" % (self.namespace, element) + element = f"{self.namespace}|{element}" return element def specificity(self) -> tuple[int, int, int]: @@ -487,10 +446,10 @@ def __init__(self, selector: Tree, id: str) -> None: self.id = id def __repr__(self) -> str: - return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) + return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" def canonical(self) -> str: - return "%s#%s" % (self.selector.canonical(), self.id) + return f"{self.selector.canonical()}#{self.id}" def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() @@ -507,18 +466,15 @@ def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: def __repr__(self) -> str: comb = "" if self.combinator == " " else self.combinator - return "%s[%r %s %r]" % ( - self.__class__.__name__, - self.selector, - comb, - self.subselector, + return ( + f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]" ) def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) + return f"{self.selector.canonical()} {self.combinator} {subsel}" def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() @@ -602,7 +558,7 @@ def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.is_delim("+", ">", "~"): # A combinator @@ -649,7 +605,7 @@ def parse_simple_selector( break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.type == "HASH": result = Hash(result, cast(str, stream.next().value)) @@ -707,11 +663,10 @@ def parse_simple_selector( next = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside :not() at %s" - % (argument_pseudo_element, next.pos) + f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}" ) if next != ("DELIM", ")"): - raise SelectorSyntaxError("Expected ')', got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ')', got {next}") result = Negation(result, argument) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) @@ -726,9 +681,9 @@ def parse_simple_selector( else: result = Function(result, ident, parse_arguments(stream)) else: - raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) + raise SelectorSyntaxError(f"Expected selector, got {peek}") if len(stream.used) == selector_start: - raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) + raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}") return result, pseudo_element @@ -745,7 +700,7 @@ def parse_arguments(stream: TokenStream) -> list[Token]: elif next == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next}") def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: @@ -770,7 +725,7 @@ def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next}") next = stream.next() @@ -780,7 +735,7 @@ def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside function" % (pseudo_element,) + f"Got pseudo-element ::{pseudo_element} inside function" ) stream.skip_whitespace() next = stream.next() @@ -792,7 +747,7 @@ def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: arguments.append(result) break else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next}") return arguments @@ -800,7 +755,7 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): - raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) + raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}") namespace: str | None op: str | None if stream.peek() == ("DELIM", "|"): @@ -828,15 +783,15 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: op = cast(str, next.value) + "=" stream.next() else: - raise SelectorSyntaxError("Operator expected, got %s" % (next,)) + raise SelectorSyntaxError(f"Operator expected, got {next}") stream.skip_whitespace() value = stream.next() if value.type not in ("IDENT", "STRING"): - raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) + raise SelectorSyntaxError(f"Expected string or ident, got {value}") stream.skip_whitespace() next = stream.next() if next != ("DELIM", "]"): - raise SelectorSyntaxError("Expected ']', got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ']', got {next}") return Attrib(selector, namespace, cast(str, attrib), op, value) @@ -894,7 +849,7 @@ def __new__(cls, type_: str, value: str | None, pos: int) -> Self: return obj def __repr__(self) -> str: - return "<%s '%s' at %i>" % (self.type, self.value, self.pos) + return f"<{self.type} '{self.value}' at {self.pos}>" def is_delim(self, *values: str) -> bool: return self.type == "DELIM" and self.value in values @@ -920,7 +875,7 @@ def __new__(cls, pos: int) -> Self: return Token.__new__(cls, "EOF", None, pos) def __repr__(self) -> str: - return "<%s at %i>" % (self.type, self.pos) + return f"<{self.type} at {self.pos}>" #### Tokenizer @@ -931,8 +886,8 @@ class TokenMacros: escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape nonascii = r"[^\0-\177]" - nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) - nmstart = "[_a-z]|%s|%s" % (escape, nonascii) + nmchar = f"[_a-z0-9-]|{escape}|{nonascii}" + nmstart = f"[_a-z]|{escape}|{nonascii}" class MatchFunc(Protocol): @@ -1009,9 +964,9 @@ def tokenize(s: str) -> Iterator[Token]: assert match, "Should have found at least an empty match" end_pos = match.end() if end_pos == len_s: - raise SelectorSyntaxError("Unclosed string at %s" % pos) + raise SelectorSyntaxError(f"Unclosed string at {pos}") if s[end_pos] != quote: - raise SelectorSyntaxError("Invalid string at %s" % pos) + raise SelectorSyntaxError(f"Invalid string at {pos}") value = _sub_simple_escape( _replace_simple, _sub_unicode_escape( @@ -1074,7 +1029,7 @@ def peek(self) -> Token: def next_ident(self) -> str: next = self.next() if next.type != "IDENT": - raise SelectorSyntaxError("Expected ident, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ident, got {next}") return cast(str, next.value) def next_ident_or_star(self) -> str | None: @@ -1083,7 +1038,7 @@ def next_ident_or_star(self) -> str | None: return next.value if next == ("DELIM", "*"): return None - raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ident or '*', got {next}") def skip_whitespace(self) -> None: peek = self.peek() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e9d1065..4018bcf 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -64,15 +64,15 @@ def __init__( def __str__(self) -> str: path = str(self.path) + str(self.element) if self.condition: - path += "[%s]" % self.condition + path += f"[{self.condition}]" return path def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self) + return f"{self.__class__.__name__}[{self}]" def add_condition(self, condition: str, conjuction: str = "and") -> Self: if self.condition: - self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) + self.condition = f"({self.condition}) {conjuction} ({condition})" else: self.condition = condition return self @@ -81,9 +81,7 @@ def add_name_test(self) -> None: if self.element == "*": # We weren't doing a test anyway return - self.add_condition( - "name() = %s" % GenericTranslator.xpath_literal(self.element) - ) + self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}") self.element = "*" def add_star_prefix(self) -> None: @@ -253,7 +251,7 @@ def selector_to_xpath( """ tree = getattr(selector, "parsed_tree", None) if not tree: - raise TypeError("Expected a parsed selector, got %r" % (selector,)) + raise TypeError(f"Expected a parsed selector, got {selector!r}") xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: @@ -275,9 +273,9 @@ def xpath_pseudo_element( def xpath_literal(s: str) -> str: s = str(s) if "'" not in s: - s = "'%s'" % s + s = f"'{s}'" elif '"' not in s: - s = '"%s"' % s + s = f'"{s}"' else: parts_quoted = [ f'"{part}"' if "'" in part else f"'{part}'" @@ -292,10 +290,10 @@ def xpath(self, parsed_selector: Tree) -> XPathExpr: type_name = type(parsed_selector).__name__ method = cast( Optional[Callable[[Tree], XPathExpr]], - getattr(self, "xpath_%s" % type_name.lower(), None), + getattr(self, f"xpath_{type_name.lower()}", None), ) if method is None: - raise ExpressionError("%s is not supported." % type_name) + raise ExpressionError(f"{type_name} is not supported.") return method(parsed_selector) # Dispatched by parsed object type @@ -305,7 +303,7 @@ def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: combinator = self.combinator_mapping[combined.combinator] method = cast( Callable[[XPathExpr, XPathExpr], XPathExpr], - getattr(self, "xpath_%s_combinator" % combinator), + getattr(self, f"xpath_{combinator}_combinator"), ) return method(self.xpath(combined.selector), self.xpath(combined.subselector)) @@ -314,7 +312,7 @@ def xpath_negation(self, negation: Negation) -> XPathExpr: sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() if sub_xpath.condition: - return xpath.add_condition("not(%s)" % sub_xpath.condition) + return xpath.add_condition(f"not({sub_xpath.condition})") return xpath.add_condition("0") def xpath_relation(self, relation: Relation) -> XPathExpr: @@ -326,8 +324,7 @@ def xpath_relation(self, relation: Relation) -> XPathExpr: Callable[[XPathExpr, XPathExpr], XPathExpr], getattr( self, - "xpath_relation_%s_combinator" - % self.combinator_mapping[cast(str, combinator.value)], + f"xpath_relation_{self.combinator_mapping[cast(str, combinator.value)]}_combinator", ), ) return method(xpath, right) @@ -352,24 +349,24 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" - method_name = "xpath_%s_function" % function.name.replace("-", "_") + method_name = "xpath_{}_function".format(function.name.replace("-", "_")) method = cast( Optional[Callable[[XPathExpr, Function], XPathExpr]], getattr(self, method_name, None), ) if not method: - raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) + raise ExpressionError(f"The pseudo-class :{function.name}() is unknown") return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" - method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") + method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_")) method = cast( Optional[Callable[[XPathExpr], XPathExpr]], getattr(self, method_name, None) ) if not method: # TODO: better error message for pseudo-elements? - raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) + raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown") return method(self.xpath(pseudo.selector)) def xpath_attrib(self, selector: Attrib) -> XPathExpr: @@ -377,7 +374,7 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: operator = self.attribute_operator_mapping[selector.operator] method = cast( Callable[[XPathExpr, str, Optional[str]], XPathExpr], - getattr(self, "xpath_attrib_%s" % operator), + getattr(self, f"xpath_attrib_{operator}"), ) if self.lower_case_attribute_names: name = selector.attrib.lower() @@ -385,12 +382,12 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: name = selector.attrib safe = is_safe_name(name) if selector.namespace: - name = "%s:%s" % (selector.namespace, name) + name = f"{selector.namespace}:{name}" safe = safe and is_safe_name(selector.namespace) if safe: attrib = "@" + name else: - attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) + attrib = f"attribute::*[name() = {self.xpath_literal(name)}]" if selector.value is None: value = None elif self.lower_case_attribute_values: @@ -423,7 +420,7 @@ def xpath_element(self, selector: Element) -> XPathExpr: if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes - element = "%s:%s" % (selector.namespace, element) + element = f"{selector.namespace}:{element}" safe = safe and bool(is_safe_name(selector.namespace)) xpath = self.xpathexpr_cls(element=element) if not safe: @@ -496,7 +493,7 @@ def xpath_nth_child_function( try: a, b = parse_series(function.arguments) except ValueError as ex: - raise ExpressionError("Invalid series: '%r'" % function.arguments) from ex + raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # @@ -558,20 +555,20 @@ def xpath_nth_child_function( # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - nodetest = "*" if add_name_test else "%s" % xpath.element + nodetest = "*" if add_name_test else f"{xpath.element}" # count siblings before or after the element if not last: - siblings_count = "count(preceding-sibling::%s)" % nodetest + siblings_count = f"count(preceding-sibling::{nodetest})" else: - siblings_count = "count(following-sibling::%s)" % nodetest + siblings_count = f"count(following-sibling::{nodetest})" # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) + return xpath.add_condition(f"{siblings_count} = {b_min_1}") expressions = [] @@ -580,12 +577,12 @@ def xpath_nth_child_function( # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expressions.append("%s >= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} >= {b_min_1}") else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expressions.append("%s <= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} <= {b_min_1}") # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -608,10 +605,9 @@ def xpath_nth_child_function( b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg_as_str = "+%s" % b_neg - left = "(%s %s)" % (left, b_neg_as_str) + left = f"({left} +{b_neg})" - expressions.append("%s mod %s = 0" % (left, a)) + expressions.append(f"{left} mod {a} = 0") template = "(%s)" if len(expressions) > 1 else "%s" xpath.add_condition( @@ -647,20 +643,18 @@ def xpath_contains_function( # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" - % function.arguments + f"Expected a single string or ident for :contains(), got {function.arguments!r}" ) value = cast(str, function.arguments[0].value) - return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) + return xpath.add_condition(f"contains(., {self.xpath_literal(value)})") def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) value = cast(str, function.arguments[0].value) - return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) + return xpath.add_condition(f"lang({self.xpath_literal(value)})") # Pseudo: dispatch by pseudo-class name @@ -684,12 +678,12 @@ def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:first-of-type is not implemented") - return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0") def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:last-of-type is not implemented") - return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0") def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(parent::*/child::*) = 1") @@ -697,7 +691,7 @@ def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:only-of-type is not implemented") - return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) + return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1") def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("not(*) and not(string-length())") @@ -729,7 +723,7 @@ def xpath_attrib_equals( self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None - xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} = {self.xpath_literal(value)}") return xpath def xpath_attrib_different( @@ -738,11 +732,9 @@ def xpath_attrib_different( assert value is not None # FIXME: this seems like a weird hack... if value: - xpath.add_condition( - "not(%s) or %s != %s" % (name, name, self.xpath_literal(value)) - ) + xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}") else: - xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} != {self.xpath_literal(value)}") return xpath def xpath_attrib_includes( @@ -774,7 +766,7 @@ def xpath_attrib_prefixmatch( ) -> XPathExpr: if value: xpath.add_condition( - "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and starts-with({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") @@ -786,8 +778,7 @@ def xpath_attrib_suffixmatch( if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( - "%s and substring(%s, string-length(%s)-%s) = %s" - % (name, name, name, len(value) - 1, self.xpath_literal(value)) + f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}" ) else: xpath.add_condition("0") @@ -799,7 +790,7 @@ def xpath_attrib_substringmatch( if value: # Attribute selectors are case sensitive xpath.add_condition( - "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and contains({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") @@ -844,8 +835,7 @@ def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[o def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) value = function.arguments[0].value assert value diff --git a/pyproject.toml b/pyproject.toml index 5ddbeb6..7e43445 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -170,9 +170,6 @@ ignore = [ "S101", # Using lxml to parse untrusted data is known to be vulnerable to XML attacks "S320", - - # TODO: Use format specifiers instead of percent format - "UP031", ] [tool.ruff.lint.pydocstyle] diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 0a95f92..e2e3ba5 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -258,7 +258,7 @@ def test_pseudo_repr(css: str) -> str: # Special cases for CSS 2.1 pseudo-elements are ignored by default for pseudo in ("after", "before", "first-line", "first-letter"): - (selector,) = parse("e:%s" % pseudo) + (selector,) = parse(f"e:{pseudo}") assert selector.pseudo_element == pseudo assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e" @@ -631,24 +631,23 @@ def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): - method_name = "xpath_%s_functional_pseudo_element" % ( + method_name = "xpath_{}_functional_pseudo_element".format( pseudo_element.name.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" - % pseudo_element.name + f"The functional pseudo-element ::{pseudo_element.name}() is unknown" ) xpath = method(xpath, pseudo_element.arguments) else: - method_name = "xpath_%s_simple_pseudo_element" % ( + method_name = "xpath_{}_simple_pseudo_element".format( pseudo_element.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The pseudo-element ::%s is unknown" % pseudo_element + f"The pseudo-element ::{pseudo_element} is unknown" ) xpath = method(xpath) return xpath @@ -660,7 +659,7 @@ def xpath_nb_attr_function( ) -> XPathExpr: assert function.arguments[0].value nb_attributes = int(function.arguments[0].value) - return xpath.add_condition("count(@*)=%d" % nb_attributes) + return xpath.add_condition(f"count(@*)={nb_attributes}") # pseudo-class: # elements that have 5 attributes @@ -674,7 +673,7 @@ def xpath_attr_functional_pseudo_element( ) -> XPathExpr: attribute_name = arguments[0].value other = XPathExpr( - "@%s" % attribute_name, + f"@{attribute_name}", "", ) return xpath.join("/", other) @@ -739,7 +738,7 @@ def operator_id(selector: str) -> list[str]: def test_series(self) -> None: def series(css: str) -> tuple[int, int] | None: - (selector,) = parse(":nth-child(%s)" % css) + (selector,) = parse(f":nth-child({css})") args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments try: return parse_series(args) From f6ef188e19387a1df53f9870b46ae0743c40d178 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 10 Mar 2025 11:06:39 +0400 Subject: [PATCH 186/208] Update tool versions, setup trusted publishing. (#152) --- .github/workflows/checks.yml | 2 +- .github/workflows/publish.yml | 31 ++++++++++++++++--------------- .pre-commit-config.yaml | 2 +- .readthedocs.yml | 4 ++-- MANIFEST.in | 2 +- docs/requirements.txt | 4 ++-- pyproject.toml | 5 ----- tox.ini | 10 +++++----- 8 files changed, 28 insertions(+), 32 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index cf0e689..666aaba 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -11,7 +11,7 @@ jobs: - python-version: 3.13 env: TOXENV: pylint - - python-version: 3.12 # Keep in sync with .readthedocs.yml + - python-version: 3.13 # Keep in sync with .readthedocs.yml env: TOXENV: docs - python-version: 3.13 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 36f80b5..ad470a8 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,31 +1,32 @@ name: Publish -on: [push] +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' jobs: publish: runs-on: ubuntu-latest - if: startsWith(github.event.ref, 'refs/tags/') + + environment: + name: pypi + url: https://pypi.org/p/cssselect + + permissions: + id-token: write steps: - uses: actions/checkout@v4 - - name: Set up Python 3.13 + - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.13 - - name: Check Tag - id: check-release-tag + - name: Build run: | - if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then - echo ::set-output name=release_tag::true - fi + python -m pip install --upgrade build + python -m build - name: Publish to PyPI - if: steps.check-release-tag.outputs.release_tag == 'true' - run: | - pip install --upgrade setuptools wheel twine - python setup.py sdist bdist_wheel - export TWINE_USERNAME=__token__ - export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} - twine upload dist/* + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1829a6..66f262f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.4 + rev: v0.9.10 hooks: - id: ruff args: [ --fix ] diff --git a/.readthedocs.yml b/.readthedocs.yml index 7d13c50..46f5f14 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,11 +4,11 @@ sphinx: configuration: docs/conf.py fail_on_warning: true build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: # For available versions, see: # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python - python: "3.12" # Keep in sync with .github/workflows/checks.yml + python: "3.13" # Keep in sync with .github/workflows/checks.yml python: install: - requirements: docs/requirements.txt diff --git a/MANIFEST.in b/MANIFEST.in index 5561683..18022a7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc cssselect/py.typed +include AUTHORS CHANGES LICENSE README.rst tox.ini cssselect/py.typed recursive-include docs * recursive-include tests * prune docs/_build diff --git a/docs/requirements.txt b/docs/requirements.txt index d5476d8..21cb2eb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx==7.2.6 -sphinx-rtd-theme==2.0.0 +sphinx==8.2.3 +sphinx-rtd-theme==3.0.2 diff --git a/pyproject.toml b/pyproject.toml index 7e43445..fa1a140 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,11 +18,6 @@ exclude_also = [ "if TYPE_CHECKING:", ] -[tool.mypy] -check_untyped_defs = true -ignore_missing_imports = true -no_warn_no_return = true - [tool.pylint.MASTER] persistent = "no" extension-pkg-allow-list = ["lxml"] diff --git a/tox.ini b/tox.ini index 3585406..7746739 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.4 + pylint==3.3.5 commands = pylint {posargs: cssselect setup.py tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.14.1 - types-lxml==2024.12.13 + mypy==1.15.0 + types-lxml==2025.3.4 commands = mypy --strict {posargs: cssselect tests} @@ -43,8 +43,8 @@ skip_install = true [testenv:twinecheck] basepython = python3 deps = - twine==5.1.1 - build==1.2.2 + twine==6.1.0 + build==1.2.2.post1 commands = python -m build --sdist twine check dist/* From b6ccd9cbb4db30a79d49eb2247a8a5276af922ce Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 10 Mar 2025 13:24:00 +0500 Subject: [PATCH 187/208] Add release notes for 1.3.0. --- CHANGES | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGES b/CHANGES index dc38826..0bf3129 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,21 @@ Changelog ========= +Version 1.3.0 +------------- + +Released on 2025-MM-DD. + +* Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and + PyPy 3.10. + +* Removed ``_unicode_safe_getattr()``, deprecated in 1.2.0. + +* Added ``pre-commit`` and formatted the code with ``ruff``. + +* Many CI additions and improvements. + + Version 1.2.0 ------------- From e99c506b8e8be0753250622633df8a68dc76268e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 10 Mar 2025 14:20:12 +0500 Subject: [PATCH 188/208] =?UTF-8?q?Bump=20version:=201.2.0=20=E2=86=92=201?= =?UTF-8?q?.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES | 2 +- cssselect/__init__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 0bf3129..a6d5f41 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 1.3.0 ------------- -Released on 2025-MM-DD. +Released on 2025-03-10. * Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and PyPy 3.10. diff --git a/cssselect/__init__.py b/cssselect/__init__.py index c53b539..67acaaa 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -32,5 +32,5 @@ "parse", ) -VERSION = "1.2.0" +VERSION = "1.3.0" __version__ = VERSION diff --git a/pyproject.toml b/pyproject.toml index fa1a140..43a0672 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "1.2.0" +current_version = "1.3.0" commit = true tag = true From 0c4bce2ed0967792c60db1b026def45203e4aae0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Mar 2025 13:20:58 +0500 Subject: [PATCH 189/208] Add non-Linux CI jobs. (#154) --- .github/workflows/tests-macos.yml | 27 +++++++++++++++++++ .../workflows/{tests.yml => tests-ubuntu.yml} | 2 +- .github/workflows/tests-windows.yml | 27 +++++++++++++++++++ .pre-commit-config.yaml | 2 +- cssselect/parser.py | 18 ++++++------- cssselect/xpath.py | 26 +++++++++--------- tests/test_cssselect.py | 18 +++++++------ 7 files changed, 89 insertions(+), 31 deletions(-) create mode 100644 .github/workflows/tests-macos.yml rename .github/workflows/{tests.yml => tests-ubuntu.yml} (97%) create mode 100644 .github/workflows/tests-windows.yml diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml new file mode 100644 index 0000000..851a40e --- /dev/null +++ b/.github/workflows/tests-macos.yml @@ -0,0 +1,27 @@ +name: macOS +on: [push, pull_request] + +jobs: + tests: + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests-ubuntu.yml similarity index 97% rename from .github/workflows/tests.yml rename to .github/workflows/tests-ubuntu.yml index 427c4ad..75a06bd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -1,4 +1,4 @@ -name: Tests +name: Ubuntu on: [push, pull_request] jobs: diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml new file mode 100644 index 0000000..e56da2e --- /dev/null +++ b/.github/workflows/tests-windows.yml @@ -0,0 +1,27 @@ +name: Windows +on: [push, pull_request] + +jobs: + tests: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 66f262f..3c92c4d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.10 + rev: v0.11.2 hooks: - id: ruff args: [ --fix ] diff --git a/cssselect/parser.py b/cssselect/parser.py index 13ae959..e970a1b 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -562,7 +562,7 @@ def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = cast(str, stream.next().value) + combinator = cast("str", stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -608,7 +608,7 @@ def parse_simple_selector( f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.type == "HASH": - result = Hash(result, cast(str, stream.next().value)) + result = Hash(result, cast("str", stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -720,7 +720,7 @@ def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: ("DELIM", "."), ("DELIM", "*"), ]: - subselector += cast(str, next.value) + subselector += cast("str", next.value) elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] @@ -774,13 +774,13 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() next = stream.next() if next == ("DELIM", "]"): - return Attrib(selector, namespace, cast(str, attrib), "exists", None) + return Attrib(selector, namespace, cast("str", attrib), "exists", None) if next == ("DELIM", "="): op = "=" elif next.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = cast(str, next.value) + "=" + op = cast("str", next.value) + "=" stream.next() else: raise SelectorSyntaxError(f"Operator expected, got {next}") @@ -792,7 +792,7 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: next = stream.next() if next != ("DELIM", "]"): raise SelectorSyntaxError(f"Expected ']', got {next}") - return Attrib(selector, namespace, cast(str, attrib), op, value) + return Attrib(selector, namespace, cast("str", attrib), op, value) def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: @@ -806,7 +806,7 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(cast(str, token.value) for token in tokens).strip() + s = "".join(cast("str", token.value) for token in tokens).strip() if s == "odd": return 2, 1 if s == "even": @@ -867,7 +867,7 @@ def value(self) -> str | None: def css(self) -> str: if self.type == "STRING": return repr(self.value) - return cast(str, self.value) + return cast("str", self.value) class EOFToken(Token): @@ -1030,7 +1030,7 @@ def next_ident(self) -> str: next = self.next() if next.type != "IDENT": raise SelectorSyntaxError(f"Expected ident, got {next}") - return cast(str, next.value) + return cast("str", next.value) def next_ident_or_star(self) -> str | None: next = self.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 4018bcf..bc47dea 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,8 +14,7 @@ from __future__ import annotations import re -from collections.abc import Callable -from typing import TYPE_CHECKING, Optional, cast +from typing import TYPE_CHECKING, cast from cssselect.parser import ( Attrib, @@ -38,6 +37,8 @@ ) if TYPE_CHECKING: + from collections.abc import Callable + # typing.Self requires Python 3.11 from typing_extensions import Self @@ -289,7 +290,7 @@ def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ method = cast( - Optional[Callable[[Tree], XPathExpr]], + "Callable[[Tree], XPathExpr] | None", getattr(self, f"xpath_{type_name.lower()}", None), ) if method is None: @@ -302,7 +303,7 @@ def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] method = cast( - Callable[[XPathExpr, XPathExpr], XPathExpr], + "Callable[[XPathExpr, XPathExpr], XPathExpr]", getattr(self, f"xpath_{combinator}_combinator"), ) return method(self.xpath(combined.selector), self.xpath(combined.subselector)) @@ -321,10 +322,10 @@ def xpath_relation(self, relation: Relation) -> XPathExpr: subselector = relation.subselector right = self.xpath(subselector.parsed_tree) method = cast( - Callable[[XPathExpr, XPathExpr], XPathExpr], + "Callable[[XPathExpr, XPathExpr], XPathExpr]", getattr( self, - f"xpath_relation_{self.combinator_mapping[cast(str, combinator.value)]}_combinator", + f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator", ), ) return method(xpath, right) @@ -351,7 +352,7 @@ def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" method_name = "xpath_{}_function".format(function.name.replace("-", "_")) method = cast( - Optional[Callable[[XPathExpr, Function], XPathExpr]], + "Callable[[XPathExpr, Function], XPathExpr] | None", getattr(self, method_name, None), ) if not method: @@ -362,7 +363,8 @@ def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_")) method = cast( - Optional[Callable[[XPathExpr], XPathExpr]], getattr(self, method_name, None) + "Callable[[XPathExpr], XPathExpr] | None", + getattr(self, method_name, None), ) if not method: # TODO: better error message for pseudo-elements? @@ -373,7 +375,7 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] method = cast( - Callable[[XPathExpr, str, Optional[str]], XPathExpr], + "Callable[[XPathExpr, str, str | None], XPathExpr]", getattr(self, f"xpath_attrib_{operator}"), ) if self.lower_case_attribute_names: @@ -391,7 +393,7 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: if selector.value is None: value = None elif self.lower_case_attribute_values: - value = cast(str, selector.value.value).lower() + value = cast("str", selector.value.value).lower() else: value = selector.value.value return method(self.xpath(selector.selector), attrib, value) @@ -645,7 +647,7 @@ def xpath_contains_function( raise ExpressionError( f"Expected a single string or ident for :contains(), got {function.arguments!r}" ) - value = cast(str, function.arguments[0].value) + value = cast("str", function.arguments[0].value) return xpath.add_condition(f"contains(., {self.xpath_literal(value)})") def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: @@ -653,7 +655,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr raise ExpressionError( f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) - value = cast(str, function.arguments[0].value) + value = cast("str", function.arguments[0].value) return xpath.add_condition(f"lang({self.xpath_literal(value)})") # Pseudo: dispatch by pseudo-class name diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index e2e3ba5..2b89b6f 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -728,7 +728,7 @@ def xpath(css: str) -> str: def operator_id(selector: str) -> list[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = typing.cast(list["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -739,7 +739,9 @@ def operator_id(selector: str) -> list[str]: def test_series(self) -> None: def series(css: str) -> tuple[int, int] | None: (selector,) = parse(f":nth-child({css})") - args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments + args = typing.cast( + "FunctionalPseudoElement", selector.parsed_tree + ).arguments try: return parse_series(args) except ValueError: @@ -771,7 +773,7 @@ def test_lang(self) -> None: def langid(selector: str) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(list["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -800,7 +802,7 @@ def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: self.argument_types += typing.cast( - FunctionalPseudoElement, pseudo_element + "FunctionalPseudoElement", pseudo_element ).argument_types() return xpath @@ -827,11 +829,11 @@ def test_select(self) -> None: def select_ids(selector: str, html_only: bool) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(list["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = typing.cast(list["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -1065,14 +1067,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = typing.cast(list["etree._Element"], document.xpath("//body"))[0] + body = typing.cast("list[etree._Element]", document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = typing.cast(list["etree._Element"], body.xpath(xpath)) + results = typing.cast("list[etree._Element]", body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: From aae4d793810be3a9dd20fb112af8a5933c8570ed Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 24 Mar 2025 14:03:46 +0500 Subject: [PATCH 190/208] Add support for PyPy3.11. --- .github/workflows/tests-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 75a06bd..5500bf5 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10", "pypy3.11"] steps: - uses: actions/checkout@v4 From 93e1277ff43b0d7f3792722dca93aa29db752888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 24 Mar 2025 11:07:06 +0100 Subject: [PATCH 191/208] Fix CI badge --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index d62b320..c055295 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ cssselect: CSS Selectors for Python :target: https://pypi.python.org/pypi/cssselect :alt: Supported Python Versions -.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg - :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml +.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml/badge.svg + :target: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml :alt: Tests .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg From b478ce96deddd07bd7bd5311d49fd0b5bbf3f54f Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:23:55 +0200 Subject: [PATCH 192/208] Modernize packaging (#157) --- MANIFEST.in | 4 ---- pyproject.toml | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 43 ------------------------------------------- tox.ini | 2 +- 4 files changed, 50 insertions(+), 48 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 18022a7..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini cssselect/py.typed -recursive-include docs * -recursive-include tests * -prune docs/_build diff --git a/pyproject.toml b/pyproject.toml index 43a0672..782657e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,52 @@ +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling>=1.27.0"] + +[project] +name = "cssselect" +license = "BSD-3-Clause" +license-files = ["LICENSE", "AUTHORS"] +description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +readme = "README.rst" +authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }] +maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }] +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[project.urls] +"Homepage" = "https://github.com/scrapy/cssselect" + +[tool.hatch.version] +path = "cssselect/__init__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/cssselect", + "/docs", + "/tests", + "/CHANGES", + "/README.rst", + "/tox.ini", +] +exclude = [ + "/docs/_build", +] + +[tool.hatch.build.targets.wheel] +packages = ["cssselect"] + [tool.bumpversion] current_version = "1.3.0" commit = true diff --git a/setup.py b/setup.py deleted file mode 100644 index cb870dd..0000000 --- a/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -import re -from pathlib import Path - -from setuptools import setup - -ROOT = Path(__file__).parent -README = (ROOT / "README.rst").read_text(encoding="utf-8") -INIT_PY = (ROOT / "cssselect" / "__init__.py").read_text(encoding="utf-8") -VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) - - -setup( - name="cssselect", - version=VERSION, - author="Ian Bicking", - author_email="ianb@colorstudy.com", - maintainer="Paul Tremberth", - maintainer_email="paul.tremberth@gmail.com", - description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", - long_description=README, - long_description_content_type="text/x-rst", - url="https://github.com/scrapy/cssselect", - license="BSD", - packages=["cssselect"], - package_data={ - "cssselect": ["py.typed"], - }, - include_package_data=True, - python_requires=">=3.9", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - ], -) diff --git a/tox.ini b/tox.ini index 7746739..026741a 100644 --- a/tox.ini +++ b/tox.ini @@ -18,7 +18,7 @@ deps = {[testenv]deps} pylint==3.3.5 commands = - pylint {posargs: cssselect setup.py tests docs} + pylint {posargs: cssselect tests docs} [testenv:docs] changedir = docs From efcc78fa0ad0cb895094371f88ab9f864ae80b12 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 5 May 2025 23:19:33 +0500 Subject: [PATCH 193/208] Install -dev libs for lxml for PyPY CI jobs. --- .github/workflows/tests-ubuntu.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 5500bf5..013ca97 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -12,6 +12,12 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Install system libraries + if: contains(matrix.python-version, 'pypy') + run: | + sudo apt-get update + sudo apt-get install libxml2-dev libxslt-dev + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: From da0329ce8d0094e17481d49d877d90423e8a1033 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:12:17 +0500 Subject: [PATCH 194/208] Add Python 3.14 RC1 to CI. --- .github/workflows/tests-macos.yml | 2 +- .github/workflows/tests-ubuntu.yml | 2 +- .github/workflows/tests-windows.yml | 2 +- pyproject.toml | 1 + tox.ini | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index 851a40e..7b1bcb7 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 75a06bd..95b0839 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1", "pypy3.10"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index e56da2e..11e5917 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 782657e..0dc257c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] diff --git a/tox.ini b/tox.ini index 026741a..a0a9413 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ deps = commands = pytest --cov=cssselect \ --cov-report=term-missing --cov-report=html --cov-report=xml \ - --verbose {posargs: cssselect tests docs} + {posargs: cssselect tests docs} [testenv:pylint] deps = From 6c9d2191048e19fa7a2bb9346647d2096075e523 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:22:11 +0500 Subject: [PATCH 195/208] Bump ruff. --- .pre-commit-config.yaml | 4 +- cssselect/parser.py | 98 ++++++++++++++++++++--------------------- docs/conf.py | 2 +- pyproject.toml | 15 ++++++- tests/test_cssselect.py | 51 ++++++++++++--------- 5 files changed, 96 insertions(+), 74 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c92c4d..a8eebd9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.2 + rev: v0.12.5 hooks: - - id: ruff + - id: ruff-check args: [ --fix ] - id: ruff-format diff --git a/cssselect/parser.py b/cssselect/parser.py index e970a1b..5bca712 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -441,9 +441,9 @@ class Hash: Represents selector#id """ - def __init__(self, selector: Tree, id: str) -> None: + def __init__(self, selector: Tree, id_: str) -> None: self.selector = selector - self.id = id + self.id = id_ def __repr__(self) -> str: return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" @@ -660,13 +660,13 @@ def parse_simple_selector( argument, argument_pseudo_element = parse_simple_selector( stream, inside_negation=True ) - next = stream.next() + next_ = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}" + f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next_.pos}" ) - if next != ("DELIM", ")"): - raise SelectorSyntaxError(f"Expected ')', got {next}") + if next_ != ("DELIM", ")"): + raise SelectorSyntaxError(f"Expected ')', got {next_}") result = Negation(result, argument) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) @@ -687,46 +687,46 @@ def parse_simple_selector( return result, pseudo_element -def parse_arguments(stream: TokenStream) -> list[Token]: +def parse_arguments(stream: TokenStream) -> list[Token]: # noqa: RET503 arguments: list[Token] = [] - while 1: # noqa: RET503 + while 1: stream.skip_whitespace() - next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + next_ = stream.next() + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "+"), ("DELIM", "-"), ]: - arguments.append(next) - elif next == ("DELIM", ")"): + arguments.append(next_) + elif next_ == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") + raise SelectorSyntaxError(f"Expected an argument, got {next_}") -def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: # noqa: RET503 stream.skip_whitespace() subselector = "" - next = stream.next() + next_ = stream.next() - if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - combinator = next + if next_ in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next_ stream.skip_whitespace() - next = stream.next() + next_ = stream.next() else: combinator = Token("DELIM", " ", pos=0) - while 1: # noqa: RET503 - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + while 1: + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "."), ("DELIM", "*"), ]: - subselector += cast("str", next.value) - elif next == ("DELIM", ")"): + subselector += cast("str", next_.value) + elif next_ == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") - next = stream.next() + raise SelectorSyntaxError(f"Expected an argument, got {next_}") + next_ = stream.next() def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: @@ -738,16 +738,16 @@ def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: f"Got pseudo-element ::{pseudo_element} inside function" ) stream.skip_whitespace() - next = stream.next() - if next in (("EOF", None), ("DELIM", ",")): + next_ = stream.next() + if next_ in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ("DELIM", ")"): + elif next_ == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") + raise SelectorSyntaxError(f"Expected an argument, got {next_}") return arguments @@ -772,26 +772,26 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: namespace = op = None if op is None: stream.skip_whitespace() - next = stream.next() - if next == ("DELIM", "]"): + next_ = stream.next() + if next_ == ("DELIM", "]"): return Attrib(selector, namespace, cast("str", attrib), "exists", None) - if next == ("DELIM", "="): + if next_ == ("DELIM", "="): op = "=" - elif next.is_delim("^", "$", "*", "~", "|", "!") and ( + elif next_.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = cast("str", next.value) + "=" + op = cast("str", next_.value) + "=" stream.next() else: - raise SelectorSyntaxError(f"Operator expected, got {next}") + raise SelectorSyntaxError(f"Operator expected, got {next_}") stream.skip_whitespace() value = stream.next() if value.type not in ("IDENT", "STRING"): raise SelectorSyntaxError(f"Expected string or ident, got {value}") stream.skip_whitespace() - next = stream.next() - if next != ("DELIM", "]"): - raise SelectorSyntaxError(f"Expected ']', got {next}") + next_ = stream.next() + if next_ != ("DELIM", "]"): + raise SelectorSyntaxError(f"Expected ']', got {next_}") return Attrib(selector, namespace, cast("str", attrib), op, value) @@ -1015,9 +1015,9 @@ def next(self) -> Token: assert self.peeked is not None self.used.append(self.peeked) return self.peeked - next = self.next_token() - self.used.append(next) - return next + next_ = self.next_token() + self.used.append(next_) + return next_ def peek(self) -> Token: if not self._peeking: @@ -1027,18 +1027,18 @@ def peek(self) -> Token: return self.peeked def next_ident(self) -> str: - next = self.next() - if next.type != "IDENT": - raise SelectorSyntaxError(f"Expected ident, got {next}") - return cast("str", next.value) + next_ = self.next() + if next_.type != "IDENT": + raise SelectorSyntaxError(f"Expected ident, got {next_}") + return cast("str", next_.value) def next_ident_or_star(self) -> str | None: - next = self.next() - if next.type == "IDENT": - return next.value - if next == ("DELIM", "*"): + next_ = self.next() + if next_.type == "IDENT": + return next_.value + if next_ == ("DELIM", "*"): return None - raise SelectorSyntaxError(f"Expected ident or '*', got {next}") + raise SelectorSyntaxError(f"Expected ident or '*', got {next_}") def skip_whitespace(self) -> None: peek = self.peek() diff --git a/docs/conf.py b/docs/conf.py index ceeb2d2..5713d17 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ # General information about the project. project = "cssselect" -copyright = "2012-2017, Simon Sapin, Scrapy developers" +project_copyright = "2012-2017, Simon Sapin, Scrapy developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/pyproject.toml b/pyproject.toml index 0dc257c..8506c66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,10 +105,16 @@ testpaths = ["tests"] [tool.ruff.lint] extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", # flake8-bugbear "B", # flake8-comprehensions "C4", + # flake8-commas + "COM", # pydocstyle "D", # flake8-future-annotations @@ -131,6 +137,8 @@ extend-select = [ "PIE", # pylint "PL", + # flake8-pytest-style + "PT", # flake8-use-pathlib "PTH", # flake8-pyi @@ -161,6 +169,8 @@ extend-select = [ "YTT", ] ignore = [ + # Trailing comma missing + "COM812", # Missing docstring in public module "D100", # Missing docstring in public class @@ -213,9 +223,10 @@ ignore = [ "RUF012", # Use of `assert` detected "S101", - # Using lxml to parse untrusted data is known to be vulnerable to XML attacks - "S320", ] +[tool.ruff.lint.isort] +split-on-trailing-comma = false + [tool.ruff.lint.pydocstyle] convention = "pep257" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2b89b6f..dc67bb7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -23,6 +23,7 @@ import unittest from typing import TYPE_CHECKING +import pytest from lxml import etree, html from cssselect import ( @@ -268,12 +269,8 @@ def test_pseudo_repr(css: str) -> str: (selector,) = parse("e::foo") assert selector.pseudo_element == "foo" assert tr.selector_to_xpath(selector, prefix="") == "e" - self.assertRaises( - ExpressionError, - tr.selector_to_xpath, - selector, - translate_pseudo_elements=True, - ) + with pytest.raises(ExpressionError): + tr.selector_to_xpath(selector, translate_pseudo_elements=True) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() @@ -567,19 +564,32 @@ def xpath(css: str) -> str: assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") - self.assertRaises(ExpressionError, xpath, ":fİrst-child") - self.assertRaises(ExpressionError, xpath, ":first-of-type") - self.assertRaises(ExpressionError, xpath, ":only-of-type") - self.assertRaises(ExpressionError, xpath, ":last-of-type") - self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") - self.assertRaises(ExpressionError, xpath, ":after") - self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") - self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") - self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") - self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") + with pytest.raises(ExpressionError): + xpath(":fİrst-child") + with pytest.raises(ExpressionError): + xpath(":first-of-type") + with pytest.raises(ExpressionError): + xpath(":only-of-type") + with pytest.raises(ExpressionError): + xpath(":last-of-type") + with pytest.raises(ExpressionError): + xpath(":nth-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-last-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-child(n-)") + with pytest.raises(ExpressionError): + xpath(":after") + with pytest.raises(ExpressionError): + xpath(":lorem-ipsum") + with pytest.raises(ExpressionError): + xpath(":lorem(ipsum)") + with pytest.raises(ExpressionError): + xpath("::lorem-ipsum") + with pytest.raises(TypeError): + GenericTranslator().css_to_xpath(4) # type: ignore[arg-type] + with pytest.raises(TypeError): + GenericTranslator().selector_to_xpath("foo") # type: ignore[arg-type] def test_unicode(self) -> None: css = ".a\xc1b" @@ -967,7 +977,8 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: assert pcss("span:only-child") == ["foobar-span"] assert pcss("li div:only-child") == ["li-div"] assert pcss("div *:only-child") == ["li-div", "foobar-span"] - self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + with pytest.raises(ExpressionError): + pcss("p *:only-of-type") assert pcss("p:only-of-type") == ["paragraph"] assert pcss("a:empty", "a:EMpty") == ["name-anchor"] assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] From 0ee48e6317fa5a39ce1b399b3517d3121e742276 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:29:50 +0500 Subject: [PATCH 196/208] Bump mypy and pylint. --- cssselect/xpath.py | 8 ++++---- tox.ini | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index bc47dea..96eac3f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -825,7 +825,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -850,7 +850,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr f"'-'), {arg})]" ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -858,7 +858,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[over # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -888,7 +888,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[ # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/tox.ini b/tox.ini index a0a9413..01794d6 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.5 + pylint==3.3.7 commands = pylint {posargs: cssselect tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.15.0 - types-lxml==2025.3.4 + mypy==1.17.0 + types-lxml==2025.3.30 commands = mypy --strict {posargs: cssselect tests} From 52aabe2e7f974c8eda16190c62450cd4c32efb1e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:30:29 +0500 Subject: [PATCH 197/208] Add more linters. --- .git-blame-ignore-revs | 2 +- .github/workflows/checks.yml | 2 +- .pre-commit-config.yaml | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 9d2c8f6..bb4f6e1 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,2 @@ # applying pre-commit hooks to the project -e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb \ No newline at end of file +e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 666aaba..1607756 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -35,7 +35,7 @@ jobs: pip install -U pip pip install -U tox tox - + pre-commit: runs-on: ubuntu-latest steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8eebd9..119b328 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,3 +5,18 @@ repos: - id: ruff-check args: [ --fix ] - id: ruff-format +- repo: https://github.com/adamchainz/blacken-docs + rev: 1.19.1 + hooks: + - id: blacken-docs + additional_dependencies: + - black==25.1.0 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v1.0.0 + hooks: + - id: sphinx-lint From db990241a8bea7a7339baf4d007daa1c2fc28812 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 13 Sep 2025 21:57:56 +0500 Subject: [PATCH 198/208] Bump Python 3.14 to RC2. --- .github/workflows/tests-macos.yml | 2 +- .github/workflows/tests-ubuntu.yml | 2 +- .github/workflows/tests-windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index 7b1bcb7..21af09a 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 6881cf7..d65eadd 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1", "pypy3.10", "pypy3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2", "pypy3.10", "pypy3.11"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index 11e5917..658a6eb 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] steps: - uses: actions/checkout@v4 From 3a66f9568c977014376242c275357711bb6cb078 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 13 Sep 2025 21:58:16 +0500 Subject: [PATCH 199/208] Drop PyPy 3.10. --- .github/workflows/tests-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index d65eadd..91412b8 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2", "pypy3.10", "pypy3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2", "pypy3.11"] steps: - uses: actions/checkout@v4 From 74ba53e3228dde2b4a0e0115a9754fbafe6b1cbf Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 13 Sep 2025 21:58:55 +0500 Subject: [PATCH 200/208] Bump linters. --- .pre-commit-config.yaml | 6 +++--- tox.ini | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 119b328..461b7cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,18 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.5 + rev: v0.13.0 hooks: - id: ruff-check args: [ --fix ] - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs - rev: 1.19.1 + rev: 1.20.0 hooks: - id: blacken-docs additional_dependencies: - black==25.1.0 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace diff --git a/tox.ini b/tox.ini index 01794d6..46e8579 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.7 + pylint==3.3.8 commands = pylint {posargs: cssselect tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.17.0 - types-lxml==2025.3.30 + mypy==1.18.1 + types-lxml==2025.8.25 commands = mypy --strict {posargs: cssselect tests} @@ -43,8 +43,8 @@ skip_install = true [testenv:twinecheck] basepython = python3 deps = - twine==6.1.0 - build==1.2.2.post1 + twine==6.2.0 + build==1.3.0 commands = python -m build --sdist twine check dist/* From 0f27bb1bd77c0ac6aa6ff7f8c6f5e7773595a0a5 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 13 Sep 2025 22:01:04 +0500 Subject: [PATCH 201/208] Remove setuptools from test deps. --- tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tox.ini b/tox.ini index 46e8579..452a364 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,6 @@ deps = lxml>=4.4 pytest-cov>=2.8 pytest>=5.4 - setuptools sybil commands = pytest --cov=cssselect \ From 9112730f04457d58712abb8743ddcca3718ffe85 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 13 Sep 2025 22:02:43 +0500 Subject: [PATCH 202/208] Bump CI actions. --- .github/workflows/checks.yml | 6 +++--- .github/workflows/publish.yml | 4 ++-- .github/workflows/tests-macos.yml | 4 ++-- .github/workflows/tests-ubuntu.yml | 4 ++-- .github/workflows/tests-windows.yml | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 1607756..be003f5 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -22,10 +22,10 @@ jobs: TOXENV: twinecheck steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -39,5 +39,5 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index ad470a8..3faf4ab 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -16,10 +16,10 @@ jobs: id-token: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.13 diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index 21af09a..9d43f12 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -10,10 +10,10 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 91412b8..85db82d 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -10,7 +10,7 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2", "pypy3.11"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install system libraries if: contains(matrix.python-version, 'pypy') || contains(matrix.python-version, '3.14.0-rc') @@ -19,7 +19,7 @@ jobs: sudo apt-get install libxml2-dev libxslt-dev - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index 658a6eb..d4139b7 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -10,10 +10,10 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} From 2334984decf6fb6b59bd8bea78e552b7a6da8d7e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 16 Sep 2025 21:36:08 +0500 Subject: [PATCH 203/208] Roll back a backward incompatible fix. --- cssselect/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 5bca712..6cbe5d2 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -441,9 +441,9 @@ class Hash: Represents selector#id """ - def __init__(self, selector: Tree, id_: str) -> None: + def __init__(self, selector: Tree, id: str) -> None: # noqa: A002 self.selector = selector - self.id = id_ + self.id = id def __repr__(self) -> str: return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" From 439ad91e8911e4b366945d1c9eea8db93dd5d368 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 9 Dec 2025 16:10:12 +0500 Subject: [PATCH 204/208] Drop Python 3.9 (#160) --- .github/workflows/checks.yml | 6 +++--- .github/workflows/publish.yml | 2 +- .github/workflows/tests-macos.yml | 2 +- .github/workflows/tests-ubuntu.yml | 4 ++-- .github/workflows/tests-windows.yml | 2 +- .pre-commit-config.yaml | 4 ++-- cssselect/parser.py | 8 ++++---- pyproject.toml | 4 +--- tox.ini | 6 +++--- 9 files changed, 18 insertions(+), 20 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index be003f5..53b1962 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -8,16 +8,16 @@ jobs: fail-fast: false matrix: include: - - python-version: 3.13 + - python-version: 3.14 env: TOXENV: pylint - python-version: 3.13 # Keep in sync with .readthedocs.yml env: TOXENV: docs - - python-version: 3.13 + - python-version: 3.14 env: TOXENV: typing - - python-version: 3.13 + - python-version: 3.14 env: TOXENV: twinecheck diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3faf4ab..4609175 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v6 with: - python-version: 3.13 + python-version: 3.14 - name: Build run: | diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index 9d43f12..be68aac 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 85db82d..1ab66c2 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,13 +7,13 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2", "pypy3.11"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3.11"] steps: - uses: actions/checkout@v5 - name: Install system libraries - if: contains(matrix.python-version, 'pypy') || contains(matrix.python-version, '3.14.0-rc') + if: contains(matrix.python-version, 'pypy') run: | sudo apt-get update sudo apt-get install libxml2-dev libxslt-dev diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index d4139b7..44dc63e 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 461b7cf..e553d0a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.13.0 + rev: v0.14.4 hooks: - id: ruff-check args: [ --fix ] @@ -10,7 +10,7 @@ repos: hooks: - id: blacken-docs additional_dependencies: - - black==25.1.0 + - black==25.9.0 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: diff --git a/cssselect/parser.py b/cssselect/parser.py index 6cbe5d2..f969769 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -16,7 +16,7 @@ import operator import re import sys -from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload +from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, Union, cast, overload if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence @@ -46,7 +46,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): #### Parsed objects -Tree = Union[ +Tree: TypeAlias = Union[ "Element", "Hash", "Class", @@ -59,7 +59,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): "SpecificityAdjustment", "CombinedSelector", ] -PseudoElement = Union["FunctionalPseudoElement", str] +PseudoElement: TypeAlias = Union["FunctionalPseudoElement", str] class Selector: @@ -831,7 +831,7 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: #### Token objects -class Token(tuple[str, Optional[str]]): # noqa: SLOT001 +class Token(tuple[str, str | None]): # noqa: SLOT001 @overload def __new__( cls, diff --git a/pyproject.toml b/pyproject.toml index 8506c66..6b89b39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,12 +10,11 @@ description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" readme = "README.rst" authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }] maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }] -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -65,7 +64,6 @@ exclude_also = [ "def __repr__", "if sys.version_info", "if __name__ == '__main__':", - "if TYPE_CHECKING:", ] [tool.pylint.MASTER] diff --git a/tox.ini b/tox.ini index 452a364..949a297 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = pre-commit,pylint,py,docs,typing [testenv] deps = lxml>=4.4 - pytest-cov>=2.8 + pytest-cov>=7.0.0 pytest>=5.4 sybil commands = @@ -15,7 +15,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.8 + pylint==4.0.2 commands = pylint {posargs: cssselect tests docs} @@ -29,7 +29,7 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.18.1 + mypy==1.18.2 types-lxml==2025.8.25 commands = mypy --strict {posargs: cssselect tests} From 988a5f48cc413b55ae1e8cd284aa2f4966617c47 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 26 Jan 2026 13:11:37 +0400 Subject: [PATCH 205/208] Bump tools, add actionlint. (#161) * Bump tools, add actionlint. * Bump one more actions/checkout. --- .github/workflows/checks.yml | 6 +++--- .github/workflows/publish.yml | 2 +- .github/workflows/tests-macos.yml | 2 +- .github/workflows/tests-ubuntu.yml | 2 +- .github/workflows/tests-windows.yml | 2 +- .pre-commit-config.yaml | 6 +++++- .readthedocs.yml | 2 +- pyproject.toml | 3 +++ tox.ini | 10 +++++----- 9 files changed, 21 insertions(+), 14 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 53b1962..41ff7e1 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -11,7 +11,7 @@ jobs: - python-version: 3.14 env: TOXENV: pylint - - python-version: 3.13 # Keep in sync with .readthedocs.yml + - python-version: 3.14 # Keep in sync with .readthedocs.yml env: TOXENV: docs - python-version: 3.14 @@ -22,7 +22,7 @@ jobs: TOXENV: twinecheck steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 @@ -39,5 +39,5 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4609175..526c458 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -16,7 +16,7 @@ jobs: id-token: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index be68aac..4947937 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -10,7 +10,7 @@ jobs: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 1ab66c2..1ef905b 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -10,7 +10,7 @@ jobs: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3.11"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install system libraries if: contains(matrix.python-version, 'pypy') diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index 44dc63e..24d7ee8 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -10,7 +10,7 @@ jobs: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e553d0a..81ca890 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: hooks: - id: blacken-docs additional_dependencies: - - black==25.9.0 + - black==26.1.0 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: @@ -20,3 +20,7 @@ repos: rev: v1.0.0 hooks: - id: sphinx-lint +- repo: https://github.com/rhysd/actionlint + rev: v1.7.10 + hooks: + - id: actionlint diff --git a/.readthedocs.yml b/.readthedocs.yml index 46f5f14..b91642a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,7 +8,7 @@ build: tools: # For available versions, see: # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python - python: "3.13" # Keep in sync with .github/workflows/checks.yml + python: "3.14" # Keep in sync with .github/workflows/checks.yml python: install: - requirements: docs/requirements.txt diff --git a/pyproject.toml b/pyproject.toml index 6b89b39..e22a153 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,9 @@ exclude_also = [ "if __name__ == '__main__':", ] +[tool.mypy] +strict = true + [tool.pylint.MASTER] persistent = "no" extension-pkg-allow-list = ["lxml"] diff --git a/tox.ini b/tox.ini index 949a297..9ff54cf 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==4.0.2 + pylint==4.0.4 commands = pylint {posargs: cssselect tests docs} @@ -29,10 +29,10 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.18.2 - types-lxml==2025.8.25 + mypy==1.19.1 + types-lxml==2026.1.1 commands = - mypy --strict {posargs: cssselect tests} + mypy {posargs: cssselect tests} [testenv:pre-commit] deps = pre-commit @@ -43,7 +43,7 @@ skip_install = true basepython = python3 deps = twine==6.2.0 - build==1.3.0 + build==1.4.0 commands = python -m build --sdist twine check dist/* From b4dc0ce3fa3f9e68074f4945a43e9762914e88c8 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 28 Jan 2026 19:38:00 +0500 Subject: [PATCH 206/208] Release notes for 1.4.0. --- CHANGES | 13 +++++++++++++ docs/conf.py | 2 +- pyproject.toml | 6 ++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index a6d5f41..ccb5980 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,19 @@ Changelog ========= +Version 1.4.0 +------------- + +Unreleased. + +* Dropped support for Python 3.9 and PyPy 3.10. + +* Added support for Python 3.14 and PyPy 3.11. + +* Switched the build system to ``hatchling``. + +* CI fixes and improvements. + Version 1.3.0 ------------- diff --git a/docs/conf.py b/docs/conf.py index 5713d17..da3f023 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,7 +32,7 @@ templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = ".rst" +source_suffix = {".rst": "restructuredtext"} # The encoding of source files. # source_encoding = 'utf-8-sig' diff --git a/pyproject.toml b/pyproject.toml index e22a153..2b36a9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,12 @@ tag = true [[tool.bumpversion.files]] filename = "cssselect/__init__.py" +[[tool.bumpversion.files]] +filename = "NEWS" +search = "^Unreleased\\.$" +replace = "Released on ({now:%Y-%m-%d})" +regex = true + [tool.coverage.run] branch = true source = ["cssselect"] From cede767169d8d2c1125ad4dd95e139441cb08778 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 28 Jan 2026 20:16:03 +0500 Subject: [PATCH 207/208] Fix bumpversion configuration for the changelog. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2b36a9e..5ee907a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,9 +56,9 @@ tag = true filename = "cssselect/__init__.py" [[tool.bumpversion.files]] -filename = "NEWS" +filename = "CHANGES" search = "^Unreleased\\.$" -replace = "Released on ({now:%Y-%m-%d})" +replace = "Released on {now:%Y-%m-%d}." regex = true [tool.coverage.run] From 743c6e524ee534ed34fd4d4121ee629fee5cddec Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 29 Jan 2026 11:59:11 +0500 Subject: [PATCH 208/208] =?UTF-8?q?Bump=20version:=201.3.0=20=E2=86=92=201?= =?UTF-8?q?.4.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES | 2 +- cssselect/__init__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index ccb5980..5ca2959 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Changelog Version 1.4.0 ------------- -Unreleased. +Released on 2026-01-29. * Dropped support for Python 3.9 and PyPy 3.10. diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 67acaaa..59d62df 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -32,5 +32,5 @@ "parse", ) -VERSION = "1.3.0" +VERSION = "1.4.0" __version__ = VERSION diff --git a/pyproject.toml b/pyproject.toml index 5ee907a..c7c54a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ exclude = [ packages = ["cssselect"] [tool.bumpversion] -current_version = "1.3.0" +current_version = "1.4.0" commit = true tag = true