From c44c595ade63e274ba2e17a08b79ec06747032d5 Mon Sep 17 00:00:00 2001 From: annbgn Date: Mon, 21 Jun 2021 13:37:17 +0300 Subject: [PATCH 01/12] add support for :has() --- cssselect/parser.py | 45 +++++++++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 6 ++++++ tests/test_cssselect.py | 17 ++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..e28ad83 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -250,6 +250,30 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Relation(object): + """ + Represents selector:has(subselector) + """ + def __init__(self, selector, subselector): + self.selector = selector + self.subselector = subselector + + def __repr__(self): + return '%s[%r:has(%r)]' % ( + self.__class__.__name__, self.selector, self.subselector) + + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s:has(%s)' % (self.selector.canonical(), subsel) + + def specificity(self): + a1, b1, c1 = self.selector.specificity() + a2, b2, c2 = self.subselector.specificity() + return a1 + a2, b1 + b2, c1 + c2 + + class Attrib(object): """ Represents selector[namespace|attrib operator value] @@ -538,6 +562,9 @@ def parse_simple_selector(stream, inside_negation=False): if next != ('DELIM', ')'): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() == 'has': + arguments = parse_relative_selector(stream) + result = Relation(result, arguments) else: result = Function(result, ident, parse_arguments(stream)) else: @@ -564,6 +591,24 @@ def parse_arguments(stream): "Expected an argument, got %s" % (next,)) +def parse_relative_selector(stream): + arguments = [] + stream.skip_whitespace() + next = stream.next() + if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: + arguments.append(next) + while 1: + stream.skip_whitespace() + next = stream.next() + if next.type in ('IDENT', 'STRING', 'NUMBER'): + arguments.append(Element(element=next.value)) + elif next == ('DELIM', ')'): + return arguments + else: + raise SelectorSyntaxError( + "Expected an argument, got %s" % (next,)) + + def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a8722bb..a6f600f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -272,6 +272,12 @@ def xpath_negation(self, negation): else: return xpath.add_condition('0') + def xpath_relation(self, relation): + xpath = self.xpath(relation.selector) + combinator, subselector, *_ = relation.subselector + method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) + return method(xpath, self.xpath(subselector)) + def xpath_function(self, function): """Translate a functional pseudo-class.""" method = 'xpath_%s_function' % function.name.replace('-', '_') diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index d6969f2..6611e86 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -266,6 +266,13 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) + # assert specificity(':has(*)') == (0, 0, 0) + # assert specificity(':has(foo)') == (0, 0, 1) + # assert specificity(':has(.foo)') == (0, 1, 0) + # assert specificity(':has([foo])') == (0, 1, 0) + # assert specificity(':has(:empty)') == (0, 1, 0) + # assert specificity(':has(#foo)') == (1, 0, 0) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) assert specificity('foo::before') == (0, 0, 2) @@ -300,6 +307,12 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') + # css2css(':has(*)') + # css2css(':has(foo)') + # css2css(':has(*.foo)', ':has(.foo)') + # css2css(':has(*[foo])', ':has([foo])') + # css2css(':has(:empty)') + # css2css(':has(#foo)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') @@ -492,6 +505,7 @@ def xpath(css): "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches + assert xpath('e:has(> f)') == 'e/f' assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( @@ -863,6 +877,9 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + # assert pcss('link:has(*)') == [] + # assert pcss('link:has([href])') == ['link-href'] + # assert pcss('ol:has(div)') == ['first-ol'] assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] # Invalid characters in XPath element names, should not crash From c4ef8c892dbae8f25f92ded262300c0faabf3abf Mon Sep 17 00:00:00 2001 From: annbgn <47499658+annbgn@users.noreply.github.com> Date: Tue, 22 Jun 2021 23:09:31 +0300 Subject: [PATCH 02/12] allow :has() arguments start not solely from a combinator Co-authored-by: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com> --- cssselect/parser.py | 2 ++ cssselect/xpath.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index e28ad83..ac70f00 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -597,6 +597,8 @@ def parse_relative_selector(stream): next = stream.next() if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: arguments.append(next) + elif next.type in ('IDENT', 'STRING', 'NUMBER'): + arguments.append(Element(element=next.value)) while 1: stream.skip_whitespace() next = stream.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a6f600f..0673b38 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -274,7 +274,7 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, subselector, *_ = relation.subselector + combinator, subselector = relation.subselector method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) return method(xpath, self.xpath(subselector)) From 7bcc7e0f8138413e2a4632d01bd30ff5a53c02d6 Mon Sep 17 00:00:00 2001 From: annbgn Date: Tue, 6 Jul 2021 01:37:52 +0300 Subject: [PATCH 03/12] expand tests on relative selectors --- cssselect/parser.py | 4 +++- cssselect/xpath.py | 39 ++++++++++++++++++++++++++++++++++----- tests/test_cssselect.py | 17 +++++++++-------- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index ac70f00..92d3dfb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -270,7 +270,9 @@ def canonical(self): def specificity(self): a1, b1, c1 = self.selector.specificity() - a2, b2, c2 = self.subselector.specificity() + a2 = b2 = c2 = 0 + if self.subselector: + a2, b2, c2 = self.subselector[-1].specificity() return a1 + a2, b1 + b2, c1 + c2 diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0673b38..f60f83d 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import sys import re +import copy from cssselect.parser import parse, parse_series, SelectorError @@ -76,13 +77,13 @@ def add_star_prefix(self): """ self.path += '*/' - def join(self, combiner, other): + def join(self, combiner, other, closing_combiner=None): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != '*/': path += other.path self.path = path - self.element = other.element + self.element = other.element + closing_combiner if closing_combiner else other.element self.condition = other.condition return self @@ -274,9 +275,14 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, subselector = relation.subselector - method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) - return method(xpath, self.xpath(subselector)) + combinator, *subselector = relation.subselector + if not subselector: + combinator.value = ' ' + right = self.xpath(combinator) + else: + right = self.xpath(subselector[0]) + method = getattr(self, 'xpath_relation_%s_combinator' % self.combinator_mapping[combinator.value]) + return method(xpath, right) def xpath_function(self, function): """Translate a functional pseudo-class.""" @@ -375,6 +381,29 @@ def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" return left.join('/following-sibling::', right) + def xpath_relation_descendant_combinator(self, left, right): + """right is a child, grand-child or further descendant of left; select left""" + return left.join('/descendant-or-self::', right, closing_combiner='/ancestor-or-self::' + left.element) + + def xpath_relation_child_combinator(self, left, right): + """right is an immediate child of left; select left""" + return left.join('[./', right, closing_combiner=']') + + def xpath_relation_direct_adjacent_combinator(self, left, right): + """right is a sibling immediately after left; select left""" + left_copy = copy.copy(left) + xpath = left.join('/following-sibling::', right) + xpath.add_name_test() + xpath.add_condition('position() = 1') + + xpath = xpath.join('/preceding-sibling::', left_copy) + xpath.add_name_test() + return xpath.add_condition('position() = 1') + + def xpath_relation_indirect_adjacent_combinator(self, left, right): + """right is a sibling after left, immediately or not; select left""" + return left.join('/following-sibling::', right, closing_combiner='/preceding-sibling::'+left.element) + # Function: dispatch by function/pseudo-class name diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 6611e86..fd6c06d 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -266,12 +266,10 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) - # assert specificity(':has(*)') == (0, 0, 0) - # assert specificity(':has(foo)') == (0, 0, 1) - # assert specificity(':has(.foo)') == (0, 1, 0) - # assert specificity(':has([foo])') == (0, 1, 0) - # assert specificity(':has(:empty)') == (0, 1, 0) - # assert specificity(':has(#foo)') == (1, 0, 0) + assert specificity(':has(*)') == (0, 0, 0) + assert specificity(':has(foo)') == (0, 0, 1) + assert specificity(':has(> foo)') == (0, 0, 1) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) @@ -504,8 +502,11 @@ def xpath(css): assert xpath('e:not(:nth-child(odd))') == ( "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( - "e[0]") # never matches - assert xpath('e:has(> f)') == 'e/f' + "e[0]") # never matches + assert xpath('e:has(> f)') == 'e[./f]' + assert xpath('e:has(f)') == 'e/descendant-or-self::f/ancestor-or-self::e' + assert xpath('e:has(~ f)') == 'e/following-sibling::f/preceding-sibling::e' + assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( From 7b03ae2184338664e585d7b5d39c9fec9f47c298 Mon Sep 17 00:00:00 2001 From: annbgn Date: Mon, 12 Jul 2021 23:38:01 +0300 Subject: [PATCH 04/12] run formatter --- cssselect/parser.py | 25 ++++++++++++++----------- cssselect/xpath.py | 34 +++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 92d3dfb..11a47bb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -254,19 +254,23 @@ class Relation(object): """ Represents selector:has(subselector) """ + def __init__(self, selector, subselector): self.selector = selector self.subselector = subselector def __repr__(self): - return '%s[%r:has(%r)]' % ( - self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:has(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s:has(%s)' % (self.selector.canonical(), subsel) + subsel = subsel.lstrip("*") + return "%s:has(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -564,7 +568,7 @@ def parse_simple_selector(stream, inside_negation=False): if next != ('DELIM', ')'): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) - elif ident.lower() == 'has': + elif ident.lower() == "has": arguments = parse_relative_selector(stream) result = Relation(result, arguments) else: @@ -586,25 +590,24 @@ def parse_arguments(stream): if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ ('DELIM', '+'), ('DELIM', '-')]: arguments.append(next) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) def parse_relative_selector(stream): arguments = [] stream.skip_whitespace() next = stream.next() - if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: arguments.append(next) - elif next.type in ('IDENT', 'STRING', 'NUMBER'): + elif next.type in ("IDENT", "STRING", "NUMBER"): arguments.append(Element(element=next.value)) while 1: stream.skip_whitespace() next = stream.next() - if next.type in ('IDENT', 'STRING', 'NUMBER'): + if next.type in ("IDENT", "STRING", "NUMBER"): arguments.append(Element(element=next.value)) elif next == ('DELIM', ')'): return arguments diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f60f83d..05e9be2 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -83,7 +83,9 @@ def join(self, combiner, other, closing_combiner=None): if other.path != '*/': path += other.path self.path = path - self.element = other.element + closing_combiner if closing_combiner else other.element + self.element = ( + other.element + closing_combiner if closing_combiner else other.element + ) self.condition = other.condition return self @@ -277,11 +279,14 @@ def xpath_relation(self, relation): xpath = self.xpath(relation.selector) combinator, *subselector = relation.subselector if not subselector: - combinator.value = ' ' + combinator.value = " " right = self.xpath(combinator) else: right = self.xpath(subselector[0]) - method = getattr(self, 'xpath_relation_%s_combinator' % self.combinator_mapping[combinator.value]) + method = getattr( + self, + "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + ) return method(xpath, right) def xpath_function(self, function): @@ -383,27 +388,34 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" - return left.join('/descendant-or-self::', right, closing_combiner='/ancestor-or-self::' + left.element) + return left.join( + "/descendant-or-self::", + right, + closing_combiner="/ancestor-or-self::" + left.element, + ) def xpath_relation_child_combinator(self, left, right): """right is an immediate child of left; select left""" - return left.join('[./', right, closing_combiner=']') + return left.join("[./", right, closing_combiner="]") def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" left_copy = copy.copy(left) - xpath = left.join('/following-sibling::', right) + xpath = left.join("/following-sibling::", right) xpath.add_name_test() - xpath.add_condition('position() = 1') + xpath.add_condition("position() = 1") - xpath = xpath.join('/preceding-sibling::', left_copy) + xpath = xpath.join("/preceding-sibling::", left_copy) xpath.add_name_test() - return xpath.add_condition('position() = 1') + return xpath.add_condition("position() = 1") def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" - return left.join('/following-sibling::', right, closing_combiner='/preceding-sibling::'+left.element) - + return left.join( + "/following-sibling::", + right, + closing_combiner="/preceding-sibling::" + left.element, + ) # Function: dispatch by function/pseudo-class name From 62f737bc2d46b08cd5c9084992d6a3d757c6c4df Mon Sep 17 00:00:00 2001 From: annbgn Date: Fri, 16 Jul 2021 09:34:39 +0300 Subject: [PATCH 05/12] fix review remarks --- cssselect/parser.py | 5 ++++- cssselect/xpath.py | 12 ++---------- tests/test_cssselect.py | 16 +++++----------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 11a47bb..77033a3 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -267,7 +267,10 @@ def __repr__(self): ) def canonical(self): - subsel = self.subselector.canonical() + if not self.subselector: + subsel = '*' + else: + subsel = self.subselector[0].canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 05e9be2..c6c2a4a 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -388,11 +388,7 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" - return left.join( - "/descendant-or-self::", - right, - closing_combiner="/ancestor-or-self::" + left.element, - ) + return left.join("[descendant::", right, closing_combiner="]") def xpath_relation_child_combinator(self, left, right): """right is an immediate child of left; select left""" @@ -411,11 +407,7 @@ def xpath_relation_direct_adjacent_combinator(self, left, right): def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" - return left.join( - "/following-sibling::", - right, - closing_combiner="/preceding-sibling::" + left.element, - ) + return left.join("[following-sibling::", right, closing_combiner="]") # Function: dispatch by function/pseudo-class name diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index fd6c06d..e4c4b58 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -305,12 +305,8 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') - # css2css(':has(*)') - # css2css(':has(foo)') - # css2css(':has(*.foo)', ':has(.foo)') - # css2css(':has(*[foo])', ':has([foo])') - # css2css(':has(:empty)') - # css2css(':has(#foo)') + css2css(':has(*)') + css2css(':has(foo)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') @@ -504,8 +500,8 @@ def xpath(css): assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e:has(> f)') == 'e[./f]' - assert xpath('e:has(f)') == 'e/descendant-or-self::f/ancestor-or-self::e' - assert xpath('e:has(~ f)') == 'e/following-sibling::f/preceding-sibling::e' + assert xpath('e:has(f)') == 'e[descendant::f]' + assert xpath('e:has(~ f)') == 'e[following-sibling::f]' assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" assert xpath('e f') == ( "e/descendant-or-self::*/f") @@ -878,9 +874,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] - # assert pcss('link:has(*)') == [] - # assert pcss('link:has([href])') == ['link-href'] - # assert pcss('ol:has(div)') == ['first-ol'] + assert pcss('ol:has(div)') == ['first-ol'] assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] # Invalid characters in XPath element names, should not crash From 47f3c11c552c9d8f4d13d63880cafc0a386ddcfd Mon Sep 17 00:00:00 2001 From: annbgn Date: Fri, 16 Jul 2021 20:23:23 +0300 Subject: [PATCH 06/12] fix lint --- tests/test_cssselect.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 3c1240c..dd099c2 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -270,9 +270,9 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) - assert specificity(':has(*)') == (0, 0, 0) - assert specificity(':has(foo)') == (0, 0, 1) - assert specificity(':has(> foo)') == (0, 0, 1) + assert specificity(":has(*)") == (0, 0, 0) + assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(> foo)") == (0, 0, 1) assert specificity(':is(.foo, #bar)') == (1, 0, 0) assert specificity(':is(:hover, :visited)') == (0, 1, 0) @@ -311,8 +311,8 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') - css2css(':has(*)') - css2css(':has(foo)') + css2css(":has(*)") + css2css(":has(foo)") css2css(':is(#bar, .foo)') css2css(':is(:focused, :visited)') css2css('foo:empty') @@ -511,10 +511,14 @@ def xpath(css): "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches - assert xpath('e:has(> f)') == 'e[./f]' - assert xpath('e:has(f)') == 'e[descendant::f]' - assert xpath('e:has(~ f)') == 'e[following-sibling::f]' - assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + assert xpath("e:has(> f)") == "e[./f]" + assert xpath("e:has(f)") == "e[descendant::f]" + assert xpath("e:has(~ f)") == "e[following-sibling::f]" + assert ( + xpath("e:has(+ f)") + == "e/following-sibling::*[(name() = 'f') and (position() = 1)]" + "/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + ) assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( @@ -886,7 +890,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] - assert pcss('ol:has(div)') == ['first-ol'] + assert pcss("ol:has(div)") == ["first-ol"] assert pcss(':is(#first-li, #second-li)') == [ 'first-li', 'second-li'] assert pcss('a:is(#name-anchor, #tag-anchor)') == [ From b64eacf664280bd720c68d7c8a55688a356d722a Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 21 Jul 2021 13:27:39 +0300 Subject: [PATCH 07/12] simplify test + run ```darker master HEAD``` --- cssselect/parser.py | 29 +++++++++++++++-------------- cssselect/xpath.py | 20 ++++++-------------- tests/test_cssselect.py | 3 +-- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d7770a1..9d48dc7 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -268,7 +268,7 @@ def __repr__(self): def canonical(self): if not self.subselector: - subsel = '*' + subsel = "*" else: subsel = self.subselector[0].canonical() if len(subsel) > 1: @@ -287,22 +287,24 @@ class Matching(object): """ Represents selector:is(selector_list) """ + def __init__(self, selector, selector_list): self.selector = selector self.selector_list = selector_list def __repr__(self): - return '%s[%r:is(%s)]' % ( - self.__class__.__name__, self.selector, ", ".join( - map(repr, self.selector_list))) + return "%s[%r:is(%s)]" % ( + self.__class__.__name__, + self.selector, + ", ".join(map(repr, self.selector_list)), + ) def canonical(self): selector_arguments = [] for s in self.selector_list: selarg = s.canonical() - selector_arguments.append(selarg.lstrip('*')) - return '%s:is(%s)' % (self.selector.canonical(), - ", ".join(map(str, selector_arguments))) + selector_arguments.append(selarg.lstrip("*")) + return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) def specificity(self): return max([x.specificity() for x in self.selector_list]) @@ -600,7 +602,7 @@ def parse_simple_selector(stream, inside_negation=False): elif ident.lower() == "has": arguments = parse_relative_selector(stream) result = Relation(result, arguments) - elif ident.lower() in ('matches', 'is'): + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) else: @@ -654,20 +656,19 @@ def parse_simple_selector_arguments(stream): result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s inside function' - % (pseudo_element, )) + "Got pseudo-element ::%s inside function" % (pseudo_element,) + ) stream.skip_whitespace() next = stream.next() - if next in (('EOF', None), ('DELIM', ',')): + if next in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) return arguments diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f8930b1..13bc590 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -55,9 +55,9 @@ def __str__(self): def __repr__(self): return '%s[%s]' % (self.__class__.__name__, self) - def add_condition(self, condition, conjuction='and'): + def add_condition(self, condition, conjuction="and"): if self.condition: - self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition) + self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: self.condition = condition return self @@ -83,9 +83,7 @@ def join(self, combiner, other, closing_combiner=None): if other.path != '*/': path += other.path self.path = path - self.element = ( - other.element + closing_combiner if closing_combiner else other.element - ) + self.element = other.element + closing_combiner if closing_combiner else other.element self.condition = other.condition return self @@ -295,7 +293,7 @@ def xpath_matching(self, matching): for e in exprs: e.add_name_test() if e.condition: - xpath.add_condition(e.condition, 'or') + xpath.add_condition(e.condition, "or") return xpath def xpath_function(self, function): @@ -405,14 +403,8 @@ def xpath_relation_child_combinator(self, left, right): def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" - left_copy = copy.copy(left) - xpath = left.join("/following-sibling::", right) - xpath.add_name_test() - xpath.add_condition("position() = 1") - - xpath = xpath.join("/preceding-sibling::", left_copy) - xpath.add_name_test() - return xpath.add_condition("position() = 1") + xpath = left.add_condition("following-sibling::{}[position() = 1]".format(right.element)) + return xpath def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index dd099c2..e3e4761 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -516,8 +516,7 @@ def xpath(css): assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( xpath("e:has(+ f)") - == "e/following-sibling::*[(name() = 'f') and (position() = 1)]" - "/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + == "e[following-sibling::f[position() = 1]]" ) assert xpath('e f') == ( "e/descendant-or-self::*/f") From 72bd7762df6961f39cd8a18939cbc61b97b71a84 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Fri, 23 Jul 2021 10:47:23 -0300 Subject: [PATCH 08/12] Revert xpath translation change to make it consistent --- cssselect/xpath.py | 4 +++- tests/test_cssselect.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 13bc590..d7a2203 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -403,7 +403,9 @@ def xpath_relation_child_combinator(self, left, right): def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" - xpath = left.add_condition("following-sibling::{}[position() = 1]".format(right.element)) + xpath = left.add_condition( + "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + ) return xpath def xpath_relation_indirect_adjacent_combinator(self, left, right): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index e3e4761..5552b78 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -516,7 +516,7 @@ def xpath(css): assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( xpath("e:has(+ f)") - == "e[following-sibling::f[position() = 1]]" + == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e f') == ( "e/descendant-or-self::*/f") From 41a0f7f3cd5abf3228fdf50218e6c3c4ed5db46b Mon Sep 17 00:00:00 2001 From: annbgn Date: Sun, 25 Jul 2021 21:43:44 +0300 Subject: [PATCH 09/12] add test, expand :has() to accept more complex arguments, remove useless ifs --- cssselect/parser.py | 38 ++++++++++++++++++++++---------------- cssselect/xpath.py | 9 +++------ tests/test_cssselect.py | 11 +++++++++++ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9d48dc7..43d55eb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -255,8 +255,9 @@ class Relation(object): Represents selector:has(subselector) """ - def __init__(self, selector, subselector): + def __init__(self, selector, combinator, subselector): self.selector = selector + self.combinator = combinator self.subselector = subselector def __repr__(self): @@ -267,19 +268,20 @@ def __repr__(self): ) def canonical(self): - if not self.subselector: - subsel = "*" - else: + try: subsel = self.subselector[0].canonical() + except TypeError: + subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() - a2 = b2 = c2 = 0 - if self.subselector: + try: a2, b2, c2 = self.subselector[-1].specificity() + except TypeError: + a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -600,8 +602,8 @@ def parse_simple_selector(stream, inside_negation=False): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) elif ident.lower() == "has": - arguments = parse_relative_selector(stream) - result = Relation(result, arguments) + combinator, arguments = parse_relative_selector(stream) + result = Relation(result, combinator, arguments) elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) @@ -631,23 +633,27 @@ def parse_arguments(stream): def parse_relative_selector(stream): - arguments = [] stream.skip_whitespace() + subselector = "" next = stream.next() + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - arguments.append(next) - elif next.type in ("IDENT", "STRING", "NUMBER"): - arguments.append(Element(element=next.value)) - while 1: + combinator = next stream.skip_whitespace() next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER"): - arguments.append(Element(element=next.value)) + else: + combinator = Token("DELIM", " ", pos=0) + + while 1: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + subselector += next.value elif next == ('DELIM', ')'): - return arguments + result = parse(subselector) + return combinator, result[0] else: raise SelectorSyntaxError( "Expected an argument, got %s" % (next,)) + next = stream.next() def parse_simple_selector_arguments(stream): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index d7a2203..82c03f1 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -275,12 +275,9 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, *subselector = relation.subselector - if not subselector: - combinator.value = " " - right = self.xpath(combinator) - else: - right = self.xpath(subselector[0]) + combinator = relation.combinator + subselector = relation.subselector + right = self.xpath(subselector.parsed_tree) method = getattr( self, "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 5552b78..78f2558 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -145,6 +145,8 @@ def parse_many(first, *others): 'Hash[Element[div]#foobar]'] assert parse_many('div:not(div.foo)') == [ 'Negation[Element[div]:not(Class[Element[div].foo])]'] + assert parse_many('div:has(div.foo)') == [ + 'Relation[Element[div]:has(Selector[Class[Element[div].foo]])]'] assert parse_many('div:is(.foo, #bar)') == [ 'Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]'] assert parse_many(':is(:hover, :visited)') == [ @@ -272,6 +274,7 @@ def specificity(css): assert specificity(":has(*)") == (0, 0, 0) assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(.foo)") == (0, 1, 0) assert specificity(":has(> foo)") == (0, 0, 1) assert specificity(':is(.foo, #bar)') == (1, 0, 0) @@ -313,6 +316,7 @@ def css2css(css, res=None): css2css(':not(#foo)') css2css(":has(*)") css2css(":has(foo)") + css2css(':has(*.foo)', ':has(.foo)') css2css(':is(#bar, .foo)') css2css(':is(:focused, :visited)') css2css('foo:empty') @@ -400,6 +404,12 @@ def get_error(css): ) assert get_error('> div p') == ("Expected selector, got ' at 0>") + # Unsupported :has() with several arguments + assert get_error(':has(a, b)') == ( + "Expected an argument, got ") + assert get_error(':has()') == ( + "Expected selector, got ") + def test_translation(self): def xpath(css): return _unicode(GenericTranslator().css_to_xpath(css, prefix='')) @@ -889,6 +899,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + assert pcss('link:has(*)') == [] assert pcss("ol:has(div)") == ["first-ol"] assert pcss(':is(#first-li, #second-li)') == [ 'first-li', 'second-li'] From 9d16efa21bd72214b4a6de343f3aaab030ba1c98 Mon Sep 17 00:00:00 2001 From: annbgn Date: Tue, 3 Aug 2021 22:34:38 +0300 Subject: [PATCH 10/12] run black --- cssselect/parser.py | 6 ++---- cssselect/xpath.py | 2 +- tests/test_cssselect.py | 9 +++------ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 74268c8..f1ccf98 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -661,16 +661,14 @@ def parse_relative_selector(stream): while 1: if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: subselector += next.value - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) next = stream.next() - def parse_simple_selector_arguments(stream): arguments = [] while 1: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0485ce9..b9ff1d2 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -386,7 +386,7 @@ def xpath_direct_adjacent_combinator(self, left, right): def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" - return left.join('/following-sibling::', right) + return left.join("/following-sibling::", right) def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ba64f6f..88fe821 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -391,10 +391,8 @@ def get_error(css): assert get_error("> div p") == ("Expected selector, got ' at 0>") # Unsupported :has() with several arguments - assert get_error(':has(a, b)') == ( - "Expected an argument, got ") - assert get_error(':has()') == ( - "Expected selector, got ") + assert get_error(":has(a, b)") == ("Expected an argument, got ") + assert get_error(":has()") == ("Expected selector, got ") def test_translation(self): def xpath(css): @@ -474,8 +472,7 @@ def xpath(css): assert xpath("e:has(f)") == "e[descendant::f]" assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( - xpath("e:has(+ f)") - == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") From 0e37bf28d202c26e85cbee9ff1ac408d747e652a Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 4 Aug 2021 08:35:46 +0300 Subject: [PATCH 11/12] add an xpath test, similar to parse_many --- tests/test_cssselect.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 88fe821..10aa6ea 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -468,6 +468,10 @@ def xpath(css): assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches + assert ( + xpath("div:has(div.foo)") == "div[descendant::div]" + "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]" + ) assert xpath("e:has(> f)") == "e[./f]" assert xpath("e:has(f)") == "e[descendant::f]" assert xpath("e:has(~ f)") == "e[following-sibling::f]" From b4cbd4eb2ef70bc25244180cdbbd0453a2c07b69 Mon Sep 17 00:00:00 2001 From: annbgn Date: Tue, 17 Aug 2021 07:53:23 +0300 Subject: [PATCH 12/12] fix xpath translating for > combinator --- cssselect/xpath.py | 15 +++++++++++---- tests/test_cssselect.py | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index b9ff1d2..9bdcb7f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -76,14 +76,21 @@ def add_star_prefix(self): """ self.path += "*/" - def join(self, combiner, other, closing_combiner=None): + def join(self, combiner, other, closing_combiner=None, has_inner_condition=False): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path self.path = path - self.element = other.element + closing_combiner if closing_combiner else other.element - self.condition = other.condition + if not has_inner_condition: + self.element = other.element + closing_combiner if closing_combiner else other.element + self.condition = other.condition + else: + self.element = other.element + if other.condition: + self.element += "[" + other.condition + "]" + if closing_combiner: + self.element += closing_combiner return self @@ -390,7 +397,7 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" - return left.join("[descendant::", right, closing_combiner="]") + return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) def xpath_relation_child_combinator(self, left, right): """right is an immediate child of left; select left""" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 10aa6ea..6c0f29a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -469,8 +469,8 @@ def xpath(css): assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches assert ( - xpath("div:has(div.foo)") == "div[descendant::div]" - "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]" + xpath("div:has(bar.foo)") == "div[descendant::bar" + "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]]" ) assert xpath("e:has(> f)") == "e[./f]" assert xpath("e:has(f)") == "e[descendant::f]"