From 5c225087343f783edc04ba37066fcbb1066c089e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 19:42:31 +0500 Subject: [PATCH 01/10] Add pyupgrade. --- .pre-commit-config.yaml | 5 ++ cssselect/__init__.py | 1 - cssselect/parser.py | 118 ++++++++++++++++++++-------------------- cssselect/xpath.py | 28 +++++----- docs/conf.py | 1 - setup.py | 2 - tests/test_cssselect.py | 61 ++++++++++----------- 7 files changed, 105 insertions(+), 111 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab99544..c0afc8d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,3 +16,8 @@ repos: rev: 5.13.2 hooks: - id: isort +- repo: https://github.com/asottile/pyupgrade + rev: v3.19.1 + hooks: + - id: pyupgrade + args: [--py39-plus, --keep-percent-format] diff --git a/cssselect/__init__.py b/cssselect/__init__.py index a59995c..8e0782b 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ CSS Selectors based on XPath ============================ diff --git a/cssselect/parser.py b/cssselect/parser.py index 354713d..156de53 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ cssselect.parser ================ @@ -12,11 +11,14 @@ """ +from __future__ import annotations + import operator import re import sys import typing -from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union +from collections.abc import Iterable, Iterator, Sequence +from typing import Optional, Union def ascii_lower(string: str) -> str: @@ -67,9 +69,7 @@ class Selector: """ - def __init__( - self, tree: Tree, pseudo_element: Optional[PseudoElement] = None - ) -> None: + def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: self.parsed_tree = tree if pseudo_element is not None and not isinstance( pseudo_element, FunctionalPseudoElement @@ -119,7 +119,7 @@ def canonical(self) -> str: res = res.lstrip("*") return res - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: """Return the specificity_ of this selector as a tuple of 3 integers. .. _specificity: http://www.w3.org/TR/selectors/#specificity @@ -146,7 +146,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s.%s" % (self.selector.canonical(), self.class_name) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -170,7 +170,7 @@ class FunctionalPseudoElement: """ - def __init__(self, name: str, arguments: Sequence["Token"]): + def __init__(self, name: str, arguments: Sequence[Token]): self.name = ascii_lower(name) self.arguments = arguments @@ -181,7 +181,7 @@ def __repr__(self) -> str: [token.value for token in self.arguments], ) - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: @@ -194,7 +194,7 @@ class Function: Represents selector:name(expr) """ - def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: + def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments @@ -207,14 +207,14 @@ def __repr__(self) -> str: [token.value for token in self.arguments], ) - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -235,7 +235,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s:%s" % (self.selector.canonical(), self.ident) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -263,7 +263,7 @@ def canonical(self) -> str: subsel = subsel.lstrip("*") return "%s:not(%s)" % (self.selector.canonical(), subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -274,7 +274,7 @@ class Relation: Represents selector:has(subselector) """ - def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): + def __init__(self, selector: Tree, combinator: Token, subselector: Selector): self.selector = selector self.combinator = combinator self.subselector = subselector @@ -295,7 +295,7 @@ def canonical(self) -> str: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: a2, b2, c2 = self.subselector[-1].specificity() # type: ignore @@ -330,7 +330,7 @@ def canonical(self) -> str: ", ".join(map(str, selector_arguments)), ) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -340,7 +340,7 @@ class SpecificityAdjustment: Same as selector:is(selector_list), but its specificity is always 0 """ - def __init__(self, selector: Tree, selector_list: List[Tree]): + def __init__(self, selector: Tree, selector_list: list[Tree]): self.selector = selector self.selector_list = selector_list @@ -361,7 +361,7 @@ def canonical(self) -> str: ", ".join(map(str, selector_arguments)), ) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return 0, 0, 0 @@ -374,9 +374,9 @@ class Attrib: def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, - operator: 'typing.Literal["exists"]', + operator: typing.Literal["exists"], value: None, ) -> None: ... @@ -384,19 +384,19 @@ def __init__( def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: "Token", + value: Token, ) -> None: ... def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: Optional["Token"], + value: Token | None, ) -> None: self.selector = selector self.namespace = namespace @@ -437,7 +437,7 @@ def canonical(self) -> str: return "%s[%s]" % (self.selector.canonical(), op) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -452,7 +452,7 @@ class Element: """ def __init__( - self, namespace: Optional[str] = None, element: Optional[str] = None + self, namespace: str | None = None, element: str | None = None ) -> None: self.namespace = namespace self.element = element @@ -466,7 +466,7 @@ def canonical(self) -> str: element = "%s|%s" % (self.namespace, element) return element - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: if self.element: return 0, 0, 1 else: @@ -488,7 +488,7 @@ def __repr__(self) -> str: def canonical(self) -> str: return "%s#%s" % (self.selector.canonical(), self.id) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() a += 1 return a, b, c @@ -519,7 +519,7 @@ def canonical(self) -> str: subsel = subsel.lstrip("*") return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -539,7 +539,7 @@ def specificity(self) -> Tuple[int, int, int]: ) -def parse(css: str) -> List[Selector]: +def parse(css: str) -> list[Selector]: """Parse a CSS *group of selectors*. If you don't care about pseudo-elements or selector specificity, @@ -581,7 +581,7 @@ def parse(css: str) -> List[Selector]: # raise -def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: +def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) @@ -592,7 +592,7 @@ def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: break -def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: +def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() @@ -617,8 +617,8 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] def parse_simple_selector( - stream: "TokenStream", inside_negation: bool = False -) -> Tuple[Tree, Optional[PseudoElement]]: + stream: TokenStream, inside_negation: bool = False +) -> tuple[Tree, PseudoElement | None]: stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() @@ -637,7 +637,7 @@ def parse_simple_selector( else: element = namespace = None result: Tree = Element(namespace, element) - pseudo_element: Optional[PseudoElement] = None + pseudo_element: PseudoElement | None = None while 1: peek = stream.peek() if ( @@ -732,8 +732,8 @@ def parse_simple_selector( return result, pseudo_element -def parse_arguments(stream: "TokenStream") -> List["Token"]: - arguments: List["Token"] = [] +def parse_arguments(stream: TokenStream) -> list[Token]: + arguments: list[Token] = [] while 1: stream.skip_whitespace() next = stream.next() @@ -748,7 +748,7 @@ def parse_arguments(stream: "TokenStream") -> List["Token"]: raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) -def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: stream.skip_whitespace() subselector = "" next = stream.next() @@ -774,7 +774,7 @@ def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: next = stream.next() -def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: +def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: arguments = [] while 1: result, pseudo_element = parse_simple_selector(stream, True) @@ -796,13 +796,13 @@ def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: return arguments -def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: +def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) - namespace: Optional[str] - op: Optional[str] + namespace: str | None + op: str | None if stream.peek() == ("DELIM", "|"): stream.next() if stream.peek() == ("DELIM", "="): @@ -840,7 +840,7 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: return Attrib(selector, namespace, typing.cast(str, attrib), op, value) -def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: +def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: """ Parses the arguments for :nth-child() and friends. @@ -880,21 +880,19 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: #### Token objects -class Token(Tuple[str, Optional[str]]): +class Token(tuple[str, Optional[str]]): @typing.overload def __new__( cls, - type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', + type_: typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, - ) -> "Token": ... + ) -> Token: ... @typing.overload - def __new__( - cls, type_: 'typing.Literal["EOF"]', value: None, pos: int - ) -> "Token": ... + def __new__(cls, type_: typing.Literal["EOF"], value: None, pos: int) -> Token: ... - def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": + def __new__(cls, type_: str, value: str | None, pos: int) -> Token: obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj @@ -912,7 +910,7 @@ def type(self) -> str: return self[0] @property - def value(self) -> Optional[str]: + def value(self) -> str | None: return self[1] def css(self) -> str: @@ -923,7 +921,7 @@ def css(self) -> str: class EOFToken(Token): - def __new__(cls, pos: int) -> "EOFToken": + def __new__(cls, pos: int) -> EOFToken: return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) def __repr__(self) -> str: @@ -947,10 +945,10 @@ class TokenMacros: class MatchFunc(typing.Protocol): def __call__( self, string: str, pos: int = ..., endpos: int = ... - ) -> Optional["re.Match[str]"]: ... + ) -> re.Match[str] | None: ... -def _compile(pattern: str) -> "MatchFunc": +def _compile(pattern: str) -> MatchFunc: return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match @@ -971,7 +969,7 @@ def _compile(pattern: str) -> "MatchFunc": _replace_simple = operator.methodcaller("group", 1) -def _replace_unicode(match: "re.Match[str]") -> str: +def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD @@ -1056,11 +1054,11 @@ def tokenize(s: str) -> Iterator[Token]: class TokenStream: - def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: - self.used: List[Token] = [] + def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: + self.used: list[Token] = [] self.tokens = iter(tokens) self.source = source - self.peeked: Optional[Token] = None + self.peeked: Token | None = None self._peeking = False self.next_token = self.tokens.__next__ @@ -1086,7 +1084,7 @@ def next_ident(self) -> str: raise SelectorSyntaxError("Expected ident, got %s" % (next,)) return typing.cast(str, next.value) - def next_ident_or_star(self) -> Optional[str]: + def next_ident_or_star(self) -> str | None: next = self.next() if next.type == "IDENT": return next.value diff --git a/cssselect/xpath.py b/cssselect/xpath.py index ee59f89..1b8f3b4 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ cssselect.xpath =============== @@ -12,9 +11,10 @@ """ +from __future__ import annotations + import re import typing -from typing import Optional from cssselect.parser import ( Attrib, @@ -65,7 +65,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": + def add_condition(self, condition: str, conjuction: str = "and") -> XPathExpr: if self.condition: self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: @@ -91,10 +91,10 @@ def add_star_prefix(self) -> None: def join( self, combiner: str, - other: "XPathExpr", - closing_combiner: Optional[str] = None, + other: XPathExpr, + closing_combiner: str | None = None, has_inner_condition: bool = False, - ) -> "XPathExpr": + ) -> XPathExpr: path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": @@ -713,21 +713,21 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: # Attrib: dispatch by attribute operator def xpath_attrib_exists( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert not value xpath.add_condition(name) return xpath def xpath_attrib_equals( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) return xpath def xpath_attrib_different( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None # FIXME: this seems like a weird hack... @@ -740,7 +740,7 @@ def xpath_attrib_different( return xpath def xpath_attrib_includes( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value and is_non_whitespace(value): xpath.add_condition( @@ -752,7 +752,7 @@ def xpath_attrib_includes( return xpath def xpath_attrib_dashmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None # Weird, but true... @@ -769,7 +769,7 @@ def xpath_attrib_dashmatch( return xpath def xpath_attrib_prefixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: xpath.add_condition( @@ -780,7 +780,7 @@ def xpath_attrib_prefixmatch( return xpath def xpath_attrib_suffixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with @@ -793,7 +793,7 @@ def xpath_attrib_suffixmatch( return xpath def xpath_attrib_substringmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Attribute selectors are case sensitive diff --git a/docs/conf.py b/docs/conf.py index aa5ae22..8188208 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # cssselect documentation build configuration file, created by # sphinx-quickstart on Tue Mar 27 14:20:34 2012. diff --git a/setup.py b/setup.py index 4c5d49d..9ce9ae3 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - import os.path import re diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 32c1683..ee0a4d3 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Tests for cssselect =================== @@ -17,10 +16,12 @@ """ +from __future__ import annotations + import sys import typing import unittest -from typing import List, Optional, Sequence, Tuple +from collections.abc import Sequence from lxml import etree, html @@ -70,7 +71,7 @@ def test_tokenizer(self) -> None: ] def test_parser(self) -> None: - def repr_parse(css: str) -> List[str]: + def repr_parse(css: str) -> list[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None @@ -79,7 +80,7 @@ def repr_parse(css: str) -> List[str]: for selector in selectors ] - def parse_many(first: str, *others: str) -> List[str]: + def parse_many(first: str, *others: str) -> list[str]: result = repr_parse(first) for other in others: assert repr_parse(other) == result @@ -185,8 +186,8 @@ def parse_many(first: str, *others: str) -> List[str]: ] def test_pseudo_elements(self) -> None: - def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: - result: List[Tuple[str, Optional[str]]] = [] + def parse_pseudo(css: str) -> list[tuple[str, str | None]]: + result: list[tuple[str, str | None]] = [] for selector in parse(css): pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo @@ -196,7 +197,7 @@ def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: result.append((selector_as_str, pseudo)) return result - def parse_one(css: str) -> Tuple[str, Optional[str]]: + def parse_one(css: str) -> tuple[str, str | None]: result = parse_pseudo(css) assert len(result) == 1 return result[0] @@ -280,7 +281,7 @@ def test_pseudo_repr(css: str) -> str: assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self) -> None: - def specificity(css: str) -> Tuple[int, int, int]: + def specificity(css: str) -> tuple[int, int, int]: selectors = parse(css) assert len(selectors) == 1 return selectors[0].specificity() @@ -326,7 +327,7 @@ def specificity(css: str) -> Tuple[int, int, int]: ) def test_css_export(self) -> None: - def css2css(css: str, res: Optional[str] = None) -> None: + def css2css(css: str, res: str | None = None) -> None: selectors = parse(css) assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) @@ -365,7 +366,7 @@ def css2css(css: str, res: Optional[str] = None) -> None: css2css("foo > *") def test_parse_errors(self) -> None: - def get_error(css: str) -> Optional[str]: + def get_error(css: str) -> str | None: try: parse(css) except SelectorSyntaxError: @@ -725,13 +726,11 @@ def xpath(css: str) -> str: assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ - def operator_id(selector: str) -> List[str]: + def operator_id(selector: str) -> list[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -740,7 +739,7 @@ def operator_id(selector: str) -> List[str]: assert operator_id("[href]:first-or-second") == ["second"] def test_series(self) -> None: - def series(css: str) -> Optional[Tuple[int, int]]: + def series(css: str) -> tuple[int, int] | None: (selector,) = parse(":nth-child(%s)" % css) args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments try: @@ -769,14 +768,12 @@ def series(css: str) -> Optional[Tuple[int, int]]: def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath - def langid(selector: str) -> List[str]: + def langid(selector: str) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -799,7 +796,7 @@ def langid(selector: str) -> List[str]: def test_argument_types(self) -> None: class CustomTranslator(GenericTranslator): def __init__(self) -> None: - self.argument_types: List[str] = [] + self.argument_types: list[str] = [] def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement @@ -809,12 +806,12 @@ def xpath_pseudo_element( ).argument_types() return xpath - def argument_types(css: str) -> List[str]: + def argument_types(css: str) -> list[str]: translator = CustomTranslator() translator.css_to_xpath(css) return translator.argument_types - mappings: List[Tuple[str, List[str]]] = [ + mappings: list[tuple[str, list[str]]] = [ ("", []), ("ident", ["IDENT"]), ('"string"', ["STRING"]), @@ -826,23 +823,21 @@ def argument_types(css: str) -> List[str]: def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath - def select_ids(selector: str, html_only: bool) -> List[str]: + def select_ids(selector: str, html_only: bool) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast(list["etree._Element"], document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] - def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: + def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: @@ -1072,14 +1067,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = typing.cast(List["etree._Element"], document.xpath("//body"))[0] + body = typing.cast(list["etree._Element"], document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = typing.cast(List["etree._Element"], body.xpath(xpath)) + results = typing.cast(list["etree._Element"], body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: From 1fd34e55b724c5621b82238c5ecefcaae9d10ff4 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 20:15:10 +0500 Subject: [PATCH 02/10] Bump tool versions. --- .github/workflows/tests.yml | 2 +- tox.ini | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70b6c77..427c4ad 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,4 +24,4 @@ jobs: tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) + uses: codecov/codecov-action@v5 diff --git a/tox.ini b/tox.ini index 616d223..3585406 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.1 + pylint==3.3.4 commands = pylint {posargs: cssselect setup.py tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.11.2 - types-lxml==2024.9.16 + mypy==1.14.1 + types-lxml==2024.12.13 commands = mypy --strict {posargs: cssselect tests} From 5b4c304ba1035ead2eb5502c2667ad4861dd0ceb Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 21:08:30 +0500 Subject: [PATCH 03/10] Migrate to ruff. --- .bandit.yml | 6 -- .flake8 | 16 ----- .isort.cfg | 2 - .pre-commit-config.yaml | 26 ++------ cssselect/__init__.py | 16 ++--- cssselect/parser.py | 127 +++++++++++++++++++--------------------- cssselect/xpath.py | 46 ++++++--------- docs/conf.py | 8 +-- pyproject.toml | 125 +++++++++++++++++++++++++++++++++++++-- setup.cfg | 11 ---- setup.py | 10 ++-- tests/test_cssselect.py | 31 +++++----- 12 files changed, 232 insertions(+), 192 deletions(-) delete mode 100644 .bandit.yml delete mode 100644 .flake8 delete mode 100644 .isort.cfg diff --git a/.bandit.yml b/.bandit.yml deleted file mode 100644 index 4f60a02..0000000 --- a/.bandit.yml +++ /dev/null @@ -1,6 +0,0 @@ -skips: -- B101 -- B311 -- B320 -- B410 -exclude_dirs: ['tests'] diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 2417f2e..0000000 --- a/.flake8 +++ /dev/null @@ -1,16 +0,0 @@ -[flake8] -max-line-length = 99 -ignore = - W503 - # too many leading '#' for block comment - E266 - E704 -exclude = - .git - .tox - venv* - - # pending revision - docs/conf.py -per-file-ignores = - cssselect/__init__.py:F401 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index 6860bdb..0000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile = black \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c0afc8d..b1829a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,23 +1,7 @@ repos: -- repo: https://github.com/PyCQA/bandit - rev: 1.7.10 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.4 hooks: - - id: bandit - args: [-r, -c, .bandit.yml] -- repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 -- repo: https://github.com/psf/black.git - rev: 24.10.0 - hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort -- repo: https://github.com/asottile/pyupgrade - rev: v3.19.1 - hooks: - - id: pyupgrade - args: [--py39-plus, --keep-percent-format] + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 8e0782b..c53b539 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,14 +1,14 @@ """ - CSS Selectors based on XPath - ============================ +CSS Selectors based on XPath +============================ - This module supports selecting XML/HTML elements based on CSS selectors. - See the `CSSSelector` class for details. +This module supports selecting XML/HTML elements based on CSS selectors. +See the `CSSSelector` class for details. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -26,10 +26,10 @@ "FunctionalPseudoElement", "GenericTranslator", "HTMLTranslator", - "parse", "Selector", "SelectorError", "SelectorSyntaxError", + "parse", ) VERSION = "1.2.0" diff --git a/cssselect/parser.py b/cssselect/parser.py index 156de53..d4678ef 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,13 +1,13 @@ """ - cssselect.parser - ================ +cssselect.parser +================ - Tokenizer, parser and parsed objects for CSS selectors. +Tokenizer, parser and parsed objects for CSS selectors. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -17,8 +17,13 @@ import re import sys import typing -from collections.abc import Iterable, Iterator, Sequence -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + # typing.Self requires Python 3.11 + from typing_extensions import Self def ascii_lower(string: str) -> str: @@ -288,7 +293,7 @@ def __repr__(self) -> str: def canonical(self) -> str: try: - subsel = self.subselector[0].canonical() # type: ignore + subsel = self.subselector[0].canonical() # type: ignore[index] except TypeError: subsel = self.subselector.canonical() if len(subsel) > 1: @@ -298,7 +303,7 @@ def canonical(self) -> str: def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: - a2, b2, c2 = self.subselector[-1].specificity() # type: ignore + a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] except TypeError: a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -411,14 +416,13 @@ def __repr__(self) -> str: attrib = self.attrib if self.operator == "exists": return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) - else: - return "%s[%r[%s %s %r]]" % ( - self.__class__.__name__, - self.selector, - attrib, - self.operator, - typing.cast("Token", self.value).value, - ) + return "%s[%r[%s %s %r]]" % ( + self.__class__.__name__, + self.selector, + attrib, + self.operator, + typing.cast("Token", self.value).value, + ) def canonical(self) -> str: if self.namespace: @@ -469,8 +473,7 @@ def canonical(self) -> str: def specificity(self) -> tuple[int, int, int]: if self.element: return 0, 0, 1 - else: - return 0, 0, 0 + return 0, 0, 0 class Hash: @@ -502,10 +505,7 @@ def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - if self.combinator == " ": - comb = "" - else: - comb = self.combinator + comb = "" if self.combinator == " " else self.combinator return "%s[%r %s %r]" % ( self.__class__.__name__, self.selector, @@ -680,21 +680,20 @@ def parse_simple_selector( continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if repr(result) == "Pseudo[Element[*]:scope]": - if not ( - len(stream.used) == 2 - or (len(stream.used) == 3 and stream.used[0].type == "S") - or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) - or ( - len(stream.used) >= 4 - and stream.used[-3].type == "S" - and stream.used[-4].is_delim(",") - ) - ): - raise SelectorSyntaxError( - 'Got immediate child pseudo-element ":scope" ' - "not at the start of a selector" - ) + if repr(result) == "Pseudo[Element[*]:scope]" and not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) + or ( + len(stream.used) >= 4 + and stream.used[-3].type == "S" + and stream.used[-4].is_delim(",") + ) + ): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() @@ -734,7 +733,7 @@ def parse_simple_selector( def parse_arguments(stream: TokenStream) -> list[Token]: arguments: list[Token] = [] - while 1: + while 1: # noqa: RET503 stream.skip_whitespace() next = stream.next() if next.type in ("IDENT", "STRING", "NUMBER") or next in [ @@ -760,7 +759,7 @@ def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: else: combinator = Token("DELIM", " ", pos=0) - while 1: + while 1: # noqa: RET503 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ ("DELIM", "."), ("DELIM", "*"), @@ -820,7 +819,7 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: next = stream.next() if next == ("DELIM", "]"): return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) - elif next == ("DELIM", "="): + if next == ("DELIM", "="): op = "=" elif next.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") @@ -854,9 +853,9 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: s = "".join(typing.cast(str, token.value) for token in tokens).strip() if s == "odd": return 2, 1 - elif s == "even": + if s == "even": return 2, 0 - elif s == "n": + if s == "n": return 1, 0 if "n" not in s: # Just b @@ -865,34 +864,30 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: a_as_int: int if not a: a_as_int = 1 - elif a == "-" or a == "+": + elif a in {"-", "+"}: a_as_int = int(a + "1") else: a_as_int = int(a) - b_as_int: int - if not b: - b_as_int = 0 - else: - b_as_int = int(b) + b_as_int = int(b) if b else 0 return a_as_int, b_as_int #### Token objects -class Token(tuple[str, Optional[str]]): +class Token(tuple[str, Optional[str]]): # noqa: SLOT001 @typing.overload def __new__( cls, type_: typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, - ) -> Token: ... + ) -> Self: ... @typing.overload - def __new__(cls, type_: typing.Literal["EOF"], value: None, pos: int) -> Token: ... + def __new__(cls, type_: typing.Literal["EOF"], value: None, pos: int) -> Self: ... - def __new__(cls, type_: str, value: str | None, pos: int) -> Token: + def __new__(cls, type_: str, value: str | None, pos: int) -> Self: obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj @@ -916,13 +911,12 @@ def value(self) -> str | None: def css(self) -> str: if self.type == "STRING": return repr(self.value) - else: - return typing.cast(str, self.value) + return typing.cast(str, self.value) class EOFToken(Token): - def __new__(cls, pos: int) -> EOFToken: - return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) + def __new__(cls, pos: int) -> Self: + return Token.__new__(cls, "EOF", None, pos) def __repr__(self) -> str: return "<%s at %i>" % (self.type, self.pos) @@ -962,7 +956,7 @@ def _compile(pattern: str) -> MatchFunc: } _sub_simple_escape = re.compile(r"\\(.)").sub -_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub +_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython @@ -978,8 +972,7 @@ def _replace_unicode(match: re.Match[str]) -> str: def unescape_ident(value: str) -> str: value = _sub_unicode_escape(_replace_unicode, value) - value = _sub_simple_escape(_replace_simple, value) - return value + return _sub_simple_escape(_replace_simple, value) def tokenize(s: str) -> Iterator[Token]: @@ -1067,10 +1060,9 @@ def next(self) -> Token: self._peeking = False self.used.append(typing.cast(Token, self.peeked)) return typing.cast(Token, self.peeked) - else: - next = self.next_token() - self.used.append(next) - return next + next = self.next_token() + self.used.append(next) + return next def peek(self) -> Token: if not self._peeking: @@ -1088,10 +1080,9 @@ def next_ident_or_star(self) -> str | None: next = self.next() if next.type == "IDENT": return next.value - elif next == ("DELIM", "*"): + if next == ("DELIM", "*"): return None - else: - raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) def skip_whitespace(self) -> None: peek = self.peek() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 1b8f3b4..0d54aa6 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,13 +1,13 @@ """ - cssselect.xpath - =============== +cssselect.xpath +=============== - Translation of parsed CSS selectors to XPath expressions. +Translation of parsed CSS selectors to XPath expressions. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -276,7 +276,7 @@ def xpath_literal(s: str) -> str: else: s = "concat(%s)" % ",".join( [ - (("'" in part) and '"%s"' or "'%s'") % part + ((("'" in part) and '"%s"') or "'%s'") % part for part in split_at_single_quotes(s) if part ] @@ -308,8 +308,7 @@ def xpath_negation(self, negation: Negation) -> XPathExpr: sub_xpath.add_name_test() if sub_xpath.condition: return xpath.add_condition("not(%s)" % sub_xpath.condition) - else: - return xpath.add_condition("0") + return xpath.add_condition("0") def xpath_relation(self, relation: Relation) -> XPathExpr: xpath = self.xpath(relation.selector) @@ -459,12 +458,9 @@ def xpath_relation_direct_adjacent_combinator( self, left: XPathExpr, right: XPathExpr ) -> XPathExpr: """right is a sibling immediately after left; select left""" - xpath = left.add_condition( - "following-sibling::*[(name() = '{}') and (position() = 1)]".format( - right.element - ) + return left.add_condition( + f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]" ) - return xpath def xpath_relation_indirect_adjacent_combinator( self, left: XPathExpr, right: XPathExpr @@ -483,8 +479,8 @@ def xpath_nth_child_function( ) -> XPathExpr: try: a, b = parse_series(function.arguments) - except ValueError: - raise ExpressionError("Invalid series: '%r'" % function.arguments) + except ValueError as ex: + raise ExpressionError("Invalid series: '%r'" % function.arguments) from ex # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # @@ -546,10 +542,7 @@ def xpath_nth_child_function( # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: - nodetest = "*" - else: - nodetest = "%s" % xpath.element + nodetest = "*" if add_name_test else "%s" % xpath.element # count siblings before or after the element if not last: @@ -604,10 +597,7 @@ def xpath_nth_child_function( expressions.append("%s mod %s = 0" % (left, a)) - if len(expressions) > 1: - template = "(%s)" - else: - template = "%s" + template = "(%s)" if len(expressions) > 1 else "%s" xpath.add_condition( " and ".join(template % expression for expression in expressions) ) @@ -831,7 +821,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -857,7 +847,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -865,7 +855,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -895,7 +885,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/docs/conf.py b/docs/conf.py index 8188208..470373b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,9 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import os import re -import sys +from pathlib import Path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -50,10 +49,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -with open( - os.path.join(os.path.dirname(__file__), "..", "cssselect", "__init__.py") -) as init_file: - init_py = init_file.read() +init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text() release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. version = release.rstrip("dev") diff --git a/pyproject.toml b/pyproject.toml index 261fe3e..058ea32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,124 @@ -[tool.isort] -profile = "black" -multi_line_output = 3 - [tool.mypy] check_untyped_defs = true ignore_missing_imports = true no_warn_no_return = true -[tool.black] -target-version = ["py38", "py39", "py310", "py311", "py312"] \ No newline at end of file +[tool.ruff.lint] +extend-select = [ + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # First word of the first line should be properly capitalized + "D403", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", + # Using lxml to parse untrusted data is known to be vulnerable to XML attacks + "S320", + + + "UP031", +] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/setup.cfg b/setup.cfg index b8c93b1..25c6497 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,13 +1,2 @@ -[build_sphinx] -source-dir = docs -build-dir = docs/_build -#all_files = 1 - -[upload_sphinx] # Sphinx-PyPI-upload -upload-dir = docs/_build/html - [tool:pytest] testpaths = tests - -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py index 9ce9ae3..cb870dd 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,11 @@ -import os.path import re +from pathlib import Path from setuptools import setup -ROOT = os.path.dirname(__file__) -with open(os.path.join(ROOT, "README.rst")) as readme_file: - README = readme_file.read() -with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: - INIT_PY = init_file.read() +ROOT = Path(__file__).parent +README = (ROOT / "README.rst").read_text(encoding="utf-8") +INIT_PY = (ROOT / "cssselect" / "__init__.py").read_text(encoding="utf-8") VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ee0a4d3..fc56c41 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -1,18 +1,18 @@ #!/usr/bin/env python """ - Tests for cssselect - =================== +Tests for cssselect +=================== - These tests can be run either by py.test or by the standard library's - unittest. They use plain ``assert`` statements and do little reporting - themselves in case of failure. +These tests can be run either by py.test or by the standard library's +unittest. They use plain ``assert`` statements and do little reporting +themselves in case of failure. - Use py.test to get fancy error reporting and assert introspection. +Use py.test to get fancy error reporting and assert introspection. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -21,7 +21,7 @@ import sys import typing import unittest -from collections.abc import Sequence +from typing import TYPE_CHECKING from lxml import etree, html @@ -42,6 +42,9 @@ ) from cssselect.xpath import XPathExpr +if TYPE_CHECKING: + from collections.abc import Sequence + class TestCssselect(unittest.TestCase): def test_tokenizer(self) -> None: @@ -453,7 +456,7 @@ def xpath(css: str) -> str: assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]" ) assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( @@ -461,7 +464,7 @@ def xpath(css: str) -> str: ) assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" ) # --- nth-* and nth-last-* ------------------------------------- @@ -720,7 +723,7 @@ def xpath(css: str) -> str: ) assert xpath(":scope") == "descendant-or-self::*[1]" assert xpath(":first-or-second[href]") == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]" ) assert str(XPathExpr("", "", condition="@href")) == "[@href]" @@ -1522,7 +1525,7 @@ def count(selector: str) -> int: -""" # noqa: W191,E101 +""" if __name__ == "__main__": From 74399aadaf807e281cd3e9bbba3188eb33e32323 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 22:12:15 +0500 Subject: [PATCH 04/10] Simplify typing casts. --- cssselect/parser.py | 9 +++++--- cssselect/xpath.py | 53 +++++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d4678ef..35400d1 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -416,12 +416,13 @@ def __repr__(self) -> str: attrib = self.attrib if self.operator == "exists": return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) + assert self.value is not None return "%s[%r[%s %s %r]]" % ( self.__class__.__name__, self.selector, attrib, self.operator, - typing.cast("Token", self.value).value, + self.value.value, ) def canonical(self) -> str: @@ -433,10 +434,11 @@ def canonical(self) -> str: if self.operator == "exists": op = attrib else: + assert self.value is not None op = "%s%s%s" % ( attrib, self.operator, - typing.cast("Token", self.value).css(), + self.value.css(), ) return "%s[%s]" % (self.selector.canonical(), op) @@ -1058,7 +1060,8 @@ def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: def next(self) -> Token: if self._peeking: self._peeking = False - self.used.append(typing.cast(Token, self.peeked)) + assert self.peeked is not None + self.used.append(self.peeked) return typing.cast(Token, self.peeked) next = self.next_token() self.used.append(next) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0d54aa6..60d70f1 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -15,6 +15,8 @@ import re import typing +from collections.abc import Callable +from typing import Optional from cssselect.parser import ( Attrib, @@ -286,22 +288,26 @@ def xpath_literal(s: str) -> str: def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, "xpath_%s" % type_name.lower(), None) + method = typing.cast( + Optional[Callable[[Tree], XPathExpr]], + getattr(self, "xpath_%s" % type_name.lower(), None), + ) if method is None: raise ExpressionError("%s is not supported." % type_name) - return typing.cast(XPathExpr, method(parsed_selector)) + return method(parsed_selector) # Dispatched by parsed object type def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, "xpath_%s_combinator" % combinator) - return typing.cast( - XPathExpr, - method(self.xpath(combined.selector), self.xpath(combined.subselector)), + method = typing.cast( + Callable[[XPathExpr, XPathExpr], XPathExpr], + getattr(self, "xpath_%s_combinator" % combinator), ) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) + def xpath_negation(self, negation: Negation) -> XPathExpr: xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) @@ -315,12 +321,15 @@ def xpath_relation(self, relation: Relation) -> XPathExpr: combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) - method = getattr( - self, - "xpath_relation_%s_combinator" - % self.combinator_mapping[typing.cast(str, combinator.value)], + method = typing.cast( + Callable[[XPathExpr, XPathExpr], XPathExpr], + getattr( + self, + "xpath_relation_%s_combinator" + % self.combinator_mapping[typing.cast(str, combinator.value)], + ), ) - return typing.cast(XPathExpr, method(xpath, right)) + return method(xpath, right) def xpath_matching(self, matching: Matching) -> XPathExpr: xpath = self.xpath(matching.selector) @@ -343,24 +352,32 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" method_name = "xpath_%s_function" % function.name.replace("-", "_") - method = getattr(self, method_name, None) + method = typing.cast( + Optional[Callable[[XPathExpr, Function], XPathExpr]], + getattr(self, method_name, None), + ) if not method: raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) - return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) + return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = getattr(self, method_name, None) + method = typing.cast( + Optional[Callable[[XPathExpr], XPathExpr]], getattr(self, method_name, None) + ) if not method: # TODO: better error message for pseudo-elements? raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) - return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) + return method(self.xpath(pseudo.selector)) def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, "xpath_attrib_%s" % operator) + method = typing.cast( + Callable[[XPathExpr, str, Optional[str]], XPathExpr], + getattr(self, "xpath_attrib_%s" % operator), + ) if self.lower_case_attribute_names: name = selector.attrib.lower() else: @@ -379,9 +396,7 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: value = typing.cast(str, selector.value.value).lower() else: value = selector.value.value - return typing.cast( - XPathExpr, method(self.xpath(selector.selector), attrib, value) - ) + return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" From 0a69c6099bb5ed2058af0516215c4addf666fc2e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 22:14:42 +0500 Subject: [PATCH 05/10] Cleanup pylintrc. --- pylintrc | 18 ++++++------------ tests/test_cssselect.py | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pylintrc b/pylintrc index 5a4647b..08ba7b1 100644 --- a/pylintrc +++ b/pylintrc @@ -1,21 +1,19 @@ [MASTER] persistent=no +extension-pkg-allow-list=lxml [MESSAGES CONTROL] -disable=assignment-from-no-return, - c-extension-no-member, - consider-using-f-string, - consider-using-in, +enable=useless-suppression +disable=consider-using-f-string, + duplicate-string-formatting-argument, fixme, - inconsistent-return-statements, invalid-name, + line-too-long, missing-class-docstring, missing-function-docstring, missing-module-docstring, - multiple-imports, - no-else-return, no-member, - raise-missing-from, + not-callable, redefined-builtin, redefined-outer-name, too-few-public-methods, @@ -26,8 +24,4 @@ disable=assignment-from-no-return, too-many-positional-arguments, too-many-public-methods, too-many-statements, - undefined-variable, - unidiomatic-typecheck, - unspecified-encoding, unused-argument, - unused-import, diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index fc56c41..f46fb67 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -195,7 +195,7 @@ def parse_pseudo(css: str) -> list[tuple[str, str | None]]: pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo # No Symbol here - assert pseudo is None or type(pseudo) is str + assert pseudo is None or isinstance(pseudo, str) selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") result.append((selector_as_str, pseudo)) return result From 57a3b7e1dbcd9f647466d1fa2a3154981ef372ba Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 31 Jan 2025 22:39:23 +0500 Subject: [PATCH 06/10] More cleanup. --- cssselect/parser.py | 50 +++++++++++++++---------------- cssselect/xpath.py | 65 ++++++++++++++++++++--------------------- pylintrc | 1 - tests/test_cssselect.py | 10 ++----- 4 files changed, 58 insertions(+), 68 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 35400d1..d16751f 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -16,8 +16,7 @@ import operator import re import sys -import typing -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence @@ -375,17 +374,17 @@ class Attrib: Represents selector[namespace|attrib operator value] """ - @typing.overload + @overload def __init__( self, selector: Tree, namespace: str | None, attrib: str, - operator: typing.Literal["exists"], + operator: Literal["exists"], value: None, ) -> None: ... - @typing.overload + @overload def __init__( self, selector: Tree, @@ -607,7 +606,7 @@ def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = typing.cast(str, stream.next().value) + combinator = cast(str, stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -653,7 +652,7 @@ def parse_simple_selector( "Got pseudo-element ::%s not at the end of a selector" % pseudo_element ) if peek.type == "HASH": - result = Hash(result, typing.cast(str, stream.next().value)) + result = Hash(result, cast(str, stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -766,7 +765,7 @@ def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: ("DELIM", "."), ("DELIM", "*"), ]: - subselector += typing.cast(str, next.value) + subselector += cast(str, next.value) elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] @@ -820,13 +819,13 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() next = stream.next() if next == ("DELIM", "]"): - return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) + return Attrib(selector, namespace, cast(str, attrib), "exists", None) if next == ("DELIM", "="): op = "=" elif next.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = typing.cast(str, next.value) + "=" + op = cast(str, next.value) + "=" stream.next() else: raise SelectorSyntaxError("Operator expected, got %s" % (next,)) @@ -838,7 +837,7 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: next = stream.next() if next != ("DELIM", "]"): raise SelectorSyntaxError("Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, typing.cast(str, attrib), op, value) + return Attrib(selector, namespace, cast(str, attrib), op, value) def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: @@ -852,7 +851,7 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(typing.cast(str, token.value) for token in tokens).strip() + s = "".join(cast(str, token.value) for token in tokens).strip() if s == "odd": return 2, 1 if s == "even": @@ -878,16 +877,16 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: class Token(tuple[str, Optional[str]]): # noqa: SLOT001 - @typing.overload + @overload def __new__( cls, - type_: typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], + type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, ) -> Self: ... - @typing.overload - def __new__(cls, type_: typing.Literal["EOF"], value: None, pos: int) -> Self: ... + @overload + def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... def __new__(cls, type_: str, value: str | None, pos: int) -> Self: obj = tuple.__new__(cls, (type_, value)) @@ -913,7 +912,7 @@ def value(self) -> str | None: def css(self) -> str: if self.type == "STRING": return repr(self.value) - return typing.cast(str, self.value) + return cast(str, self.value) class EOFToken(Token): @@ -936,12 +935,10 @@ class TokenMacros: nmstart = "[_a-z]|%s|%s" % (escape, nonascii) -if typing.TYPE_CHECKING: - - class MatchFunc(typing.Protocol): - def __call__( - self, string: str, pos: int = ..., endpos: int = ... - ) -> re.Match[str] | None: ... +class MatchFunc(Protocol): + def __call__( + self, string: str, pos: int = ..., endpos: int = ... + ) -> re.Match[str] | None: ... def _compile(pattern: str) -> MatchFunc: @@ -1062,7 +1059,7 @@ def next(self) -> Token: self._peeking = False assert self.peeked is not None self.used.append(self.peeked) - return typing.cast(Token, self.peeked) + return self.peeked next = self.next_token() self.used.append(next) return next @@ -1071,13 +1068,14 @@ def peek(self) -> Token: if not self._peeking: self.peeked = self.next_token() self._peeking = True - return typing.cast(Token, self.peeked) + assert self.peeked is not None + return self.peeked def next_ident(self) -> str: next = self.next() if next.type != "IDENT": raise SelectorSyntaxError("Expected ident, got %s" % (next,)) - return typing.cast(str, next.value) + return cast(str, next.value) def next_ident_or_star(self) -> str | None: next = self.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 60d70f1..e68c51c 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,9 +14,8 @@ from __future__ import annotations import re -import typing from collections.abc import Callable -from typing import Optional +from typing import TYPE_CHECKING, Optional, cast from cssselect.parser import ( Attrib, @@ -38,6 +37,10 @@ parse_series, ) +if TYPE_CHECKING: + # typing.Self requires Python 3.11 + from typing_extensions import Self + class ExpressionError(SelectorError, RuntimeError): """Unknown or unsupported selector (eg. pseudo-class).""" @@ -67,7 +70,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return "%s[%s]" % (self.__class__.__name__, self) - def add_condition(self, condition: str, conjuction: str = "and") -> XPathExpr: + def add_condition(self, condition: str, conjuction: str = "and") -> Self: if self.condition: self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: @@ -96,7 +99,7 @@ def join( other: XPathExpr, closing_combiner: str | None = None, has_inner_condition: bool = False, - ) -> XPathExpr: + ) -> Self: path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": @@ -276,19 +279,18 @@ def xpath_literal(s: str) -> str: elif '"' not in s: s = '"%s"' % s else: - s = "concat(%s)" % ",".join( - [ - ((("'" in part) and '"%s"') or "'%s'") % part - for part in split_at_single_quotes(s) - if part - ] - ) + parts_quoted = [ + f'"{part}"' if "'" in part else f"'{part}'" + for part in split_at_single_quotes(s) + if part + ] + s = "concat({})".format(",".join(parts_quoted)) return s def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = typing.cast( + method = cast( Optional[Callable[[Tree], XPathExpr]], getattr(self, "xpath_%s" % type_name.lower(), None), ) @@ -301,7 +303,7 @@ def xpath(self, parsed_selector: Tree) -> XPathExpr: def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = typing.cast( + method = cast( Callable[[XPathExpr, XPathExpr], XPathExpr], getattr(self, "xpath_%s_combinator" % combinator), ) @@ -321,12 +323,12 @@ def xpath_relation(self, relation: Relation) -> XPathExpr: combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) - method = typing.cast( + method = cast( Callable[[XPathExpr, XPathExpr], XPathExpr], getattr( self, "xpath_relation_%s_combinator" - % self.combinator_mapping[typing.cast(str, combinator.value)], + % self.combinator_mapping[cast(str, combinator.value)], ), ) return method(xpath, right) @@ -352,7 +354,7 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" method_name = "xpath_%s_function" % function.name.replace("-", "_") - method = typing.cast( + method = cast( Optional[Callable[[XPathExpr, Function], XPathExpr]], getattr(self, method_name, None), ) @@ -363,7 +365,7 @@ def xpath_function(self, function: Function) -> XPathExpr: def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = typing.cast( + method = cast( Optional[Callable[[XPathExpr], XPathExpr]], getattr(self, method_name, None) ) if not method: @@ -374,7 +376,7 @@ def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = typing.cast( + method = cast( Callable[[XPathExpr, str, Optional[str]], XPathExpr], getattr(self, "xpath_attrib_%s" % operator), ) @@ -393,7 +395,7 @@ def xpath_attrib(self, selector: Attrib) -> XPathExpr: if selector.value is None: value = None elif self.lower_case_attribute_values: - value = typing.cast(str, selector.value.value).lower() + value = cast(str, selector.value.value).lower() else: value = selector.value.value return method(self.xpath(selector.selector), attrib, value) @@ -649,7 +651,7 @@ def xpath_contains_function( "Expected a single string or ident for :contains(), got %r" % function.arguments ) - value = typing.cast(str, function.arguments[0].value) + value = cast(str, function.arguments[0].value) return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: @@ -658,7 +660,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr "Expected a single string or ident for :lang(), got %r" % function.arguments ) - value = typing.cast(str, function.arguments[0].value) + value = cast(str, function.arguments[0].value) return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) # Pseudo: dispatch by pseudo-class name @@ -748,9 +750,9 @@ def xpath_attrib_includes( self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value and is_non_whitespace(value): + arg = self.xpath_literal(" " + value + " ") xpath.add_condition( - "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(" " + value + " ")) + f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})" ) else: xpath.add_condition("0") @@ -760,16 +762,11 @@ def xpath_attrib_dashmatch( self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None + arg = self.xpath_literal(value) + arg_dash = self.xpath_literal(value + "-") # Weird, but true... xpath.add_condition( - "%s and (%s = %s or starts-with(%s, %s))" - % ( - name, - name, - self.xpath_literal(value), - name, - self.xpath_literal(value + "-"), - ) + f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))" ) return xpath @@ -853,13 +850,13 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr ) value = function.arguments[0].value assert value + arg = self.xpath_literal(value.lower() + "-") return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + f"'-'), {arg})]" ) def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] diff --git a/pylintrc b/pylintrc index 08ba7b1..43fb62b 100644 --- a/pylintrc +++ b/pylintrc @@ -5,7 +5,6 @@ extension-pkg-allow-list=lxml [MESSAGES CONTROL] enable=useless-suppression disable=consider-using-f-string, - duplicate-string-formatting-argument, fixme, invalid-name, line-too-long, diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index f46fb67..0a95f92 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -78,10 +78,7 @@ def repr_parse(css: str) -> list[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [ - repr(selector.parsed_tree).replace("(u'", "('") - for selector in selectors - ] + return [repr(selector.parsed_tree) for selector in selectors] def parse_many(first: str, *others: str) -> list[str]: result = repr_parse(first) @@ -196,7 +193,7 @@ def parse_pseudo(css: str) -> list[tuple[str, str | None]]: pseudo = str(pseudo) if pseudo else pseudo # No Symbol here assert pseudo is None or isinstance(pseudo, str) - selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") + selector_as_str = repr(selector.parsed_tree) result.append((selector_as_str, pseudo)) return result @@ -373,8 +370,7 @@ def get_error(css: str) -> str | None: try: parse(css) except SelectorSyntaxError: - # Py2, Py3, ... - return str(sys.exc_info()[1]).replace("(u'", "('") + return str(sys.exc_info()[1]) return None assert get_error("attributes(href)/html/body/a") == ( From 32045eb492ff927b4039983ba997e1cbf7abf0ab Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sun, 2 Feb 2025 19:17:21 +0500 Subject: [PATCH 07/10] Move tool configs to pyproject.toml. --- .bumpversion.cfg | 6 ------ .coveragerc | 10 --------- pylintrc | 26 ---------------------- pyproject.toml | 56 +++++++++++++++++++++++++++++++++++++++++++++++- setup.cfg | 2 -- 5 files changed, 55 insertions(+), 45 deletions(-) delete mode 100644 .bumpversion.cfg delete mode 100644 .coveragerc delete mode 100644 pylintrc delete mode 100644 setup.cfg diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 56cfabc..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 1.2.0 -commit = True -tag = True - -[bumpversion:file:cssselect/__init__.py] diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index ed1fac6..0000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -branch = True -source = cssselect - -[report] -exclude_lines = - pragma: no cover - def __repr__ - if sys.version_info - if __name__ == '__main__': diff --git a/pylintrc b/pylintrc deleted file mode 100644 index 43fb62b..0000000 --- a/pylintrc +++ /dev/null @@ -1,26 +0,0 @@ -[MASTER] -persistent=no -extension-pkg-allow-list=lxml - -[MESSAGES CONTROL] -enable=useless-suppression -disable=consider-using-f-string, - fixme, - invalid-name, - line-too-long, - missing-class-docstring, - missing-function-docstring, - missing-module-docstring, - no-member, - not-callable, - redefined-builtin, - redefined-outer-name, - too-few-public-methods, - too-many-arguments, - too-many-branches, - too-many-function-args, - too-many-lines, - too-many-positional-arguments, - too-many-public-methods, - too-many-statements, - unused-argument, diff --git a/pyproject.toml b/pyproject.toml index 058ea32..b38bfdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,62 @@ +[tool.bumpversion] +current_version = "1.2.0" +commit = true +tag = true + +[[tool.bumpversion.files]] +filename = "cssselect/__init__.py" + +[tool.coverage.run] +branch = true +source = ["cssselect"] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if sys.version_info", + "if __name__ == '__main__':", +] + [tool.mypy] check_untyped_defs = true ignore_missing_imports = true no_warn_no_return = true +[tool.pylint.MASTER] +persistent = "no" +extension-pkg-allow-list = ["lxml"] + +[tool.pylint."MESSAGES CONTROL"] +enable = [ + "useless-suppression", +] +disable = [ + "consider-using-f-string", + "fixme", + "invalid-name", + "line-too-long", + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "no-member", + "not-callable", + "redefined-builtin", + "redefined-outer-name", + "too-few-public-methods", + "too-many-arguments", + "too-many-branches", + "too-many-function-args", + "too-many-lines", + "too-many-locals", + "too-many-positional-arguments", + "too-many-public-methods", + "too-many-statements", + "unused-argument", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] + [tool.ruff.lint] extend-select = [ # flake8-bugbear @@ -116,7 +170,7 @@ ignore = [ # Using lxml to parse untrusted data is known to be vulnerable to XML attacks "S320", - + # TODO: Use format specifiers instead of percent format "UP031", ] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 25c6497..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[tool:pytest] -testpaths = tests From b8836d8595e19535c10f78f18cd1b04f55ec6c2b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sun, 2 Feb 2025 19:27:36 +0500 Subject: [PATCH 08/10] Update the nitpicky config. --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 470373b..ceeb2d2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -253,5 +253,5 @@ nitpicky = True nitpick_ignore = [ # explicitly not a part of the public API - ("py:class", "cssselect.parser.Token"), + ("py:class", "Token"), ] From ac9b0ebf9dfba8fe216c769e70cfd31be239936a Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sun, 2 Feb 2025 19:29:00 +0500 Subject: [PATCH 09/10] Exclude TYPE_CHECKING blocks from coverage. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b38bfdd..5ddbeb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ exclude_also = [ "def __repr__", "if sys.version_info", "if __name__ == '__main__':", + "if TYPE_CHECKING:", ] [tool.mypy] From 65d5b7d52ac596a21e2f54282775c3b7318329c0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sun, 2 Feb 2025 19:35:17 +0500 Subject: [PATCH 10/10] Remove an extra newline. --- cssselect/xpath.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index e68c51c..e9d1065 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -307,7 +307,6 @@ def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: Callable[[XPathExpr, XPathExpr], XPathExpr], getattr(self, "xpath_%s_combinator" % combinator), ) - return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation: Negation) -> XPathExpr: