diff --git a/.bandit.yml b/.bandit.yml deleted file mode 100644 index 7fcde04..0000000 --- a/.bandit.yml +++ /dev/null @@ -1,2 +0,0 @@ -skips: -- B101 diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 56cfabc..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 1.2.0 -commit = True -tag = True - -[bumpversion:file:cssselect/__init__.py] diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index ed1fac6..0000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -branch = True -source = cssselect - -[report] -exclude_lines = - pragma: no cover - def __repr__ - if sys.version_info - if __name__ == '__main__': diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 8b0608f..0000000 --- a/.flake8 +++ /dev/null @@ -1,15 +0,0 @@ -[flake8] -max-line-length = 99 -ignore = - W503 - # too many leading '#' for block comment - E266 -exclude = - .git - .tox - venv* - - # pending revision - docs/conf.py -per-file-ignores = - cssselect/__init__.py:F401 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..bb4f6e1 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# applying pre-commit hooks to the project +e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 847d788..41ff7e1 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -5,32 +5,27 @@ jobs: checks: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: include: - - python-version: 3.12 - env: - TOXENV: black - - python-version: 3.12 - env: - TOXENV: flake8 - - python-version: 3.12 + - python-version: 3.14 env: TOXENV: pylint - - python-version: 3.12 - env: - TOXENV: security - - python-version: 3.12 + - python-version: 3.14 # Keep in sync with .readthedocs.yml env: TOXENV: docs - - python-version: 3.12 + - python-version: 3.14 env: TOXENV: typing + - python-version: 3.14 + env: + TOXENV: twinecheck steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -40,3 +35,9 @@ jobs: pip install -U pip pip install -U tox tox + + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 67d9c5a..526c458 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,31 +1,32 @@ name: Publish -on: [push] +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' jobs: publish: runs-on: ubuntu-latest - if: startsWith(github.event.ref, 'refs/tags/') + + environment: + name: pypi + url: https://pypi.org/p/cssselect + + permissions: + id-token: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - name: Set up Python 3.12 - uses: actions/setup-python@v4 + - name: Set up Python + uses: actions/setup-python@v6 with: - python-version: 3.12 + python-version: 3.14 - - name: Check Tag - id: check-release-tag + - name: Build run: | - if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then - echo ::set-output name=release_tag::true - fi + python -m pip install --upgrade build + python -m build - name: Publish to PyPI - if: steps.check-release-tag.outputs.release_tag == 'true' - run: | - pip install --upgrade setuptools wheel twine - python setup.py sdist bdist_wheel - export TWINE_USERNAME=__token__ - export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} - twine upload dist/* + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests-macos.yml similarity index 59% rename from .github/workflows/tests.yml rename to .github/workflows/tests-macos.yml index 0de2aa2..4947937 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests-macos.yml @@ -1,18 +1,19 @@ -name: Tests +name: macOS on: [push, pull_request] jobs: tests: - runs-on: ubuntu-latest + runs-on: macos-latest strategy: + fail-fast: false matrix: - python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -23,4 +24,4 @@ jobs: tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) + uses: codecov/codecov-action@v5 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml new file mode 100644 index 0000000..1ef905b --- /dev/null +++ b/.github/workflows/tests-ubuntu.yml @@ -0,0 +1,33 @@ +name: Ubuntu +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3.11"] + + steps: + - uses: actions/checkout@v6 + + - name: Install system libraries + if: contains(matrix.python-version, 'pypy') + run: | + sudo apt-get update + sudo apt-get install libxml2-dev libxslt-dev + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml new file mode 100644 index 0000000..24d7ee8 --- /dev/null +++ b/.github/workflows/tests-windows.yml @@ -0,0 +1,27 @@ +name: Windows +on: [push, pull_request] + +jobs: + tests: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..81ca890 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.4 + hooks: + - id: ruff-check + args: [ --fix ] + - id: ruff-format +- repo: https://github.com/adamchainz/blacken-docs + rev: 1.20.0 + hooks: + - id: blacken-docs + additional_dependencies: + - black==26.1.0 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v1.0.0 + hooks: + - id: sphinx-lint +- repo: https://github.com/rhysd/actionlint + rev: v1.7.10 + hooks: + - id: actionlint diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..b91642a --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,15 @@ +version: 2 +formats: all +sphinx: + configuration: docs/conf.py + fail_on_warning: true +build: + os: ubuntu-24.04 + tools: + # For available versions, see: + # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python + python: "3.14" # Keep in sync with .github/workflows/checks.yml +python: + install: + - requirements: docs/requirements.txt + - path: . diff --git a/CHANGES b/CHANGES index dc38826..5ca2959 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,34 @@ Changelog ========= +Version 1.4.0 +------------- + +Released on 2026-01-29. + +* Dropped support for Python 3.9 and PyPy 3.10. + +* Added support for Python 3.14 and PyPy 3.11. + +* Switched the build system to ``hatchling``. + +* CI fixes and improvements. + +Version 1.3.0 +------------- + +Released on 2025-03-10. + +* Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and + PyPy 3.10. + +* Removed ``_unicode_safe_getattr()``, deprecated in 1.2.0. + +* Added ``pre-commit`` and formatted the code with ``ruff``. + +* Many CI additions and improvements. + + Version 1.2.0 ------------- diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 7fc2933..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc py.typed -recursive-include docs * -recursive-include tests * -prune docs/_build diff --git a/README.rst b/README.rst index d62b320..c055295 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ cssselect: CSS Selectors for Python :target: https://pypi.python.org/pypi/cssselect :alt: Supported Python Versions -.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg - :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml +.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml/badge.svg + :target: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml :alt: Tests .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg diff --git a/cssselect/__init__.py b/cssselect/__init__.py index 77f028b..59d62df 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,37 +1,36 @@ -# -*- coding: utf-8 -*- """ - CSS Selectors based on XPath - ============================ +CSS Selectors based on XPath +============================ - This module supports selecting XML/HTML elements based on CSS selectors. - See the `CSSSelector` class for details. +This module supports selecting XML/HTML elements based on CSS selectors. +See the `CSSSelector` class for details. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ from cssselect.parser import ( - parse, - Selector, FunctionalPseudoElement, + Selector, SelectorError, SelectorSyntaxError, + parse, ) -from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError +from cssselect.xpath import ExpressionError, GenericTranslator, HTMLTranslator __all__ = ( "ExpressionError", "FunctionalPseudoElement", "GenericTranslator", "HTMLTranslator", - "parse", "Selector", "SelectorError", "SelectorSyntaxError", + "parse", ) -VERSION = "1.2.0" +VERSION = "1.4.0" __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index 25a650c..f969769 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,22 +1,28 @@ -# -*- coding: utf-8 -*- """ - cssselect.parser - ================ +cssselect.parser +================ - Tokenizer, parser and parsed objects for CSS selectors. +Tokenizer, parser and parsed objects for CSS selectors. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ -import sys -import re +from __future__ import annotations + import operator -import typing -from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union +import re +import sys +from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, Union, cast, overload + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + # typing.Self requires Python 3.11 + from typing_extensions import Self def ascii_lower(string: str) -> str: @@ -40,7 +46,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): #### Parsed objects -Tree = Union[ +Tree: TypeAlias = Union[ "Element", "Hash", "Class", @@ -53,7 +59,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): "SpecificityAdjustment", "CombinedSelector", ] -PseudoElement = Union["FunctionalPseudoElement", str] +PseudoElement: TypeAlias = Union["FunctionalPseudoElement", str] class Selector: @@ -67,9 +73,11 @@ class Selector: """ - def __init__(self, tree: Tree, pseudo_element: Optional[PseudoElement] = None) -> None: + def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: self.parsed_tree = tree - if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): + if pseudo_element is not None and not isinstance( + pseudo_element, FunctionalPseudoElement + ): pseudo_element = ascii_lower(pseudo_element) #: A :class:`FunctionalPseudoElement`, #: or the identifier for the pseudo-element as a string, @@ -97,25 +105,25 @@ def __repr__(self) -> str: if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) + return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]" def canonical(self) -> str: """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = "::%s" % self.pseudo_element.canonical() + pseudo_element = f"::{self.pseudo_element.canonical()}" elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) + res = f"{self.parsed_tree.canonical()}{pseudo_element}" if len(res) > 1: res = res.lstrip("*") return res - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: """Return the specificity_ of this selector as a tuple of 3 integers. .. _specificity: http://www.w3.org/TR/selectors/#specificity @@ -137,12 +145,12 @@ def __init__(self, selector: Tree, class_name: str) -> None: self.class_name = class_name def __repr__(self) -> str: - return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) + return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]" def canonical(self) -> str: - return "%s.%s" % (self.selector.canonical(), self.class_name) + return f"{self.selector.canonical()}.{self.class_name}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -166,23 +174,20 @@ class FunctionalPseudoElement: """ - def __init__(self, name: str, arguments: Sequence["Token"]): + def __init__(self, name: str, arguments: Sequence[Token]): self.name = ascii_lower(name) self.arguments = arguments def __repr__(self) -> str: - return "%s[::%s(%r)]" % ( - self.__class__.__name__, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[::{self.name}({token_values!r})]" - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s(%s)" % (self.name, args) + return f"{self.name}({args})" class Function: @@ -190,27 +195,23 @@ class Function: Represents selector:name(expr) """ - def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: + def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments def __repr__(self) -> str: - return "%s[%r:%s(%r)]" % ( - self.__class__.__name__, - self.selector, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]" - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) + return f"{self.selector.canonical()}:{self.name}({args})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -226,12 +227,12 @@ def __init__(self, selector: Tree, ident: str) -> None: self.ident = ascii_lower(ident) def __repr__(self) -> str: - return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) + return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]" def canonical(self) -> str: - return "%s:%s" % (self.selector.canonical(), self.ident) + return f"{self.selector.canonical()}:{self.ident}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -247,15 +248,15 @@ def __init__(self, selector: Tree, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]" def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:not(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:not({subsel})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -266,31 +267,27 @@ class Relation: Represents selector:has(subselector) """ - def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): + def __init__(self, selector: Tree, combinator: Token, subselector: Selector): self.selector = selector self.combinator = combinator self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:has(%r)]" % ( - self.__class__.__name__, - self.selector, - self.subselector, - ) + return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]" def canonical(self) -> str: try: - subsel = self.subselector[0].canonical() # type: ignore + subsel = self.subselector[0].canonical() # type: ignore[index] except TypeError: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:has(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:has({subsel})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: - a2, b2, c2 = self.subselector[-1].specificity() # type: ignore + a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] except TypeError: a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -306,20 +303,18 @@ def __init__(self, selector: Tree, selector_list: Iterable[Tree]): self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:is(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:is({args_str})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -329,28 +324,23 @@ class SpecificityAdjustment: Same as selector:is(selector_list), but its specificity is always 0 """ - def __init__(self, selector: Tree, selector_list: List[Tree]): + def __init__(self, selector: Tree, selector_list: list[Tree]): self.selector = selector self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:where(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:where(%s)" % ( - self.selector.canonical(), - ", ".join(map(str, selector_arguments)), - ) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:where({args_str})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return 0, 0, 0 @@ -359,30 +349,33 @@ class Attrib: Represents selector[namespace|attrib operator value] """ - @typing.overload + @overload def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, - operator: 'typing.Literal["exists"]', + operator: Literal["exists"], value: None, - ) -> None: - ... + ) -> None: ... - @typing.overload + @overload def __init__( - self, selector: Tree, namespace: Optional[str], attrib: str, operator: str, value: "Token" - ) -> None: - ... + self, + selector: Tree, + namespace: str | None, + attrib: str, + operator: str, + value: Token, + ) -> None: ... def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: Optional["Token"], + value: Token | None, ) -> None: self.selector = selector self.namespace = namespace @@ -391,35 +384,24 @@ def __init__( self.value = value def __repr__(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": - return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) - else: - return "%s[%r[%s %s %r]]" % ( - self.__class__.__name__, - self.selector, - attrib, - self.operator, - typing.cast("Token", self.value).value, - ) + return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]" + assert self.value is not None + return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]" def canonical(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": op = attrib else: - op = "%s%s%s" % (attrib, self.operator, typing.cast("Token", self.value).css()) + assert self.value is not None + op = f"{attrib}{self.operator}{self.value.css()}" - return "%s[%s]" % (self.selector.canonical(), op) + return f"{self.selector.canonical()}[{op}]" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -433,24 +415,25 @@ class Element: """ - def __init__(self, namespace: Optional[str] = None, element: Optional[str] = None) -> None: + def __init__( + self, namespace: str | None = None, element: str | None = None + ) -> None: self.namespace = namespace self.element = element def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self.canonical()) + return f"{self.__class__.__name__}[{self.canonical()}]" def canonical(self) -> str: element = self.element or "*" if self.namespace: - element = "%s|%s" % (self.namespace, element) + element = f"{self.namespace}|{element}" return element - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: if self.element: return 0, 0, 1 - else: - return 0, 0, 0 + return 0, 0, 0 class Hash: @@ -458,17 +441,17 @@ class Hash: Represents selector#id """ - def __init__(self, selector: Tree, id: str) -> None: + def __init__(self, selector: Tree, id: str) -> None: # noqa: A002 self.selector = selector self.id = id def __repr__(self) -> str: - return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) + return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" def canonical(self) -> str: - return "%s#%s" % (self.selector.canonical(), self.id) + return f"{self.selector.canonical()}#{self.id}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() a += 1 return a, b, c @@ -482,19 +465,18 @@ def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - if self.combinator == " ": - comb = "" - else: - comb = self.combinator - return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) + comb = "" if self.combinator == " " else self.combinator + return ( + f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]" + ) def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) + return f"{self.selector.canonical()} {self.combinator} {subsel}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -509,10 +491,12 @@ def specificity(self) -> Tuple[int, int, int]: _id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") # foo.bar or .bar -_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") +_class_re = re.compile( + r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" +) -def parse(css: str) -> List[Selector]: +def parse(css: str) -> list[Selector]: """Parse a CSS *group of selectors*. If you don't care about pseudo-elements or selector specificity, @@ -536,7 +520,9 @@ def parse(css: str) -> List[Selector]: return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] match = _class_re.match(css) if match is not None: - return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))] + return [ + Selector(Class(Element(element=match.group(1) or None), match.group(2))) + ] stream = TokenStream(tokenize(css)) stream.source = css @@ -552,7 +538,7 @@ def parse(css: str) -> List[Selector]: # raise -def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: +def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) @@ -563,7 +549,7 @@ def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: break -def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: +def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() @@ -572,11 +558,11 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = typing.cast(str, stream.next().value) + combinator = cast("str", stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -588,8 +574,8 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] def parse_simple_selector( - stream: "TokenStream", inside_negation: bool = False -) -> Tuple[Tree, Optional[PseudoElement]]: + stream: TokenStream, inside_negation: bool = False +) -> tuple[Tree, PseudoElement | None]: stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() @@ -608,7 +594,7 @@ def parse_simple_selector( else: element = namespace = None result: Tree = Element(namespace, element) - pseudo_element: Optional[PseudoElement] = None + pseudo_element: PseudoElement | None = None while 1: peek = stream.peek() if ( @@ -619,10 +605,10 @@ def parse_simple_selector( break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.type == "HASH": - result = Hash(result, typing.cast(str, stream.next().value)) + result = Hash(result, cast("str", stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -651,21 +637,20 @@ def parse_simple_selector( continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if repr(result) == "Pseudo[Element[*]:scope]": - if not ( - len(stream.used) == 2 - or (len(stream.used) == 3 and stream.used[0].type == "S") - or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) - or ( - len(stream.used) >= 4 - and stream.used[-3].type == "S" - and stream.used[-4].is_delim(",") - ) - ): - raise SelectorSyntaxError( - 'Got immediate child pseudo-element ":scope" ' - "not at the start of a selector" - ) + if repr(result) == "Pseudo[Element[*]:scope]" and not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) + or ( + len(stream.used) >= 4 + and stream.used[-3].type == "S" + and stream.used[-4].is_delim(",") + ) + ): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() @@ -675,14 +660,13 @@ def parse_simple_selector( argument, argument_pseudo_element = parse_simple_selector( stream, inside_negation=True ) - next = stream.next() + next_ = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside :not() at %s" - % (argument_pseudo_element, next.pos) + f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next_.pos}" ) - if next != ("DELIM", ")"): - raise SelectorSyntaxError("Expected ')', got %s" % (next,)) + if next_ != ("DELIM", ")"): + raise SelectorSyntaxError(f"Expected ')', got {next_}") result = Negation(result, argument) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) @@ -697,77 +681,83 @@ def parse_simple_selector( else: result = Function(result, ident, parse_arguments(stream)) else: - raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) + raise SelectorSyntaxError(f"Expected selector, got {peek}") if len(stream.used) == selector_start: - raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) + raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}") return result, pseudo_element -def parse_arguments(stream: "TokenStream") -> List["Token"]: - arguments: List["Token"] = [] +def parse_arguments(stream: TokenStream) -> list[Token]: # noqa: RET503 + arguments: list[Token] = [] while 1: stream.skip_whitespace() - next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]: - arguments.append(next) - elif next == ("DELIM", ")"): + next_ = stream.next() + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ + ("DELIM", "+"), + ("DELIM", "-"), + ]: + arguments.append(next_) + elif next_ == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next_}") -def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: # noqa: RET503 stream.skip_whitespace() subselector = "" - next = stream.next() + next_ = stream.next() - if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - combinator = next + if next_ in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next_ stream.skip_whitespace() - next = stream.next() + next_ = stream.next() else: combinator = Token("DELIM", " ", pos=0) while 1: - if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: - subselector += typing.cast(str, next.value) - elif next == ("DELIM", ")"): + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ + ("DELIM", "."), + ("DELIM", "*"), + ]: + subselector += cast("str", next_.value) + elif next_ == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) - next = stream.next() + raise SelectorSyntaxError(f"Expected an argument, got {next_}") + next_ = stream.next() -def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: +def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: arguments = [] while 1: result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside function" % (pseudo_element,) + f"Got pseudo-element ::{pseudo_element} inside function" ) stream.skip_whitespace() - next = stream.next() - if next in (("EOF", None), ("DELIM", ",")): + next_ = stream.next() + if next_ in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ("DELIM", ")"): + elif next_ == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next_}") return arguments -def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: +def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): - raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) - namespace: Optional[str] - op: Optional[str] + raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}") + namespace: str | None + op: str | None if stream.peek() == ("DELIM", "|"): stream.next() if stream.peek() == ("DELIM", "="): @@ -782,28 +772,30 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: namespace = op = None if op is None: stream.skip_whitespace() - next = stream.next() - if next == ("DELIM", "]"): - return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) - elif next == ("DELIM", "="): + next_ = stream.next() + if next_ == ("DELIM", "]"): + return Attrib(selector, namespace, cast("str", attrib), "exists", None) + if next_ == ("DELIM", "="): op = "=" - elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): - op = typing.cast(str, next.value) + "=" + elif next_.is_delim("^", "$", "*", "~", "|", "!") and ( + stream.peek() == ("DELIM", "=") + ): + op = cast("str", next_.value) + "=" stream.next() else: - raise SelectorSyntaxError("Operator expected, got %s" % (next,)) + raise SelectorSyntaxError(f"Operator expected, got {next_}") stream.skip_whitespace() value = stream.next() if value.type not in ("IDENT", "STRING"): - raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) + raise SelectorSyntaxError(f"Expected string or ident, got {value}") stream.skip_whitespace() - next = stream.next() - if next != ("DELIM", "]"): - raise SelectorSyntaxError("Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, typing.cast(str, attrib), op, value) + next_ = stream.next() + if next_ != ("DELIM", "]"): + raise SelectorSyntaxError(f"Expected ']', got {next_}") + return Attrib(selector, namespace, cast("str", attrib), op, value) -def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: +def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: """ Parses the arguments for :nth-child() and friends. @@ -814,12 +806,12 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(typing.cast(str, token.value) for token in tokens).strip() + s = "".join(cast("str", token.value) for token in tokens).strip() if s == "odd": return 2, 1 - elif s == "even": + if s == "even": return 2, 0 - elif s == "n": + if s == "n": return 1, 0 if "n" not in s: # Just b @@ -828,42 +820,36 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: a_as_int: int if not a: a_as_int = 1 - elif a == "-" or a == "+": + elif a in {"-", "+"}: a_as_int = int(a + "1") else: a_as_int = int(a) - b_as_int: int - if not b: - b_as_int = 0 - else: - b_as_int = int(b) + b_as_int = int(b) if b else 0 return a_as_int, b_as_int #### Token objects -class Token(Tuple[str, Optional[str]]): - @typing.overload +class Token(tuple[str, str | None]): # noqa: SLOT001 + @overload def __new__( cls, - type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', + type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, - ) -> "Token": - ... + ) -> Self: ... - @typing.overload - def __new__(cls, type_: 'typing.Literal["EOF"]', value: None, pos: int) -> "Token": - ... + @overload + def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... - def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": + def __new__(cls, type_: str, value: str | None, pos: int) -> Self: obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj def __repr__(self) -> str: - return "<%s '%s' at %i>" % (self.type, self.value, self.pos) + return f"<{self.type} '{self.value}' at {self.pos}>" def is_delim(self, *values: str) -> bool: return self.type == "DELIM" and self.value in values @@ -875,22 +861,21 @@ def type(self) -> str: return self[0] @property - def value(self) -> Optional[str]: + def value(self) -> str | None: return self[1] def css(self) -> str: if self.type == "STRING": return repr(self.value) - else: - return typing.cast(str, self.value) + return cast("str", self.value) class EOFToken(Token): - def __new__(cls, pos: int) -> "EOFToken": - return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) + def __new__(cls, pos: int) -> Self: + return Token.__new__(cls, "EOF", None, pos) def __repr__(self) -> str: - return "<%s at %i>" % (self.type, self.pos) + return f"<{self.type} at {self.pos}>" #### Tokenizer @@ -901,20 +886,17 @@ class TokenMacros: escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape nonascii = r"[^\0-\177]" - nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) - nmstart = "[_a-z]|%s|%s" % (escape, nonascii) - + nmchar = f"[_a-z0-9-]|{escape}|{nonascii}" + nmstart = f"[_a-z]|{escape}|{nonascii}" -if typing.TYPE_CHECKING: - class MatchFunc(typing.Protocol): - def __call__( - self, string: str, pos: int = ..., endpos: int = ... - ) -> Optional["re.Match[str]"]: - ... +class MatchFunc(Protocol): + def __call__( + self, string: str, pos: int = ..., endpos: int = ... + ) -> re.Match[str] | None: ... -def _compile(pattern: str) -> "MatchFunc": +def _compile(pattern: str) -> MatchFunc: return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match @@ -928,14 +910,14 @@ def _compile(pattern: str) -> "MatchFunc": } _sub_simple_escape = re.compile(r"\\(.)").sub -_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub +_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython _replace_simple = operator.methodcaller("group", 1) -def _replace_unicode(match: "re.Match[str]") -> str: +def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD @@ -944,8 +926,7 @@ def _replace_unicode(match: "re.Match[str]") -> str: def unescape_ident(value: str) -> str: value = _sub_unicode_escape(_replace_unicode, value) - value = _sub_simple_escape(_replace_simple, value) - return value + return _sub_simple_escape(_replace_simple, value) def tokenize(s: str) -> Iterator[Token]: @@ -970,7 +951,8 @@ def tokenize(s: str) -> Iterator[Token]: match = _match_hash(s, pos=pos) if match: value = _sub_simple_escape( - _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:]) + _replace_simple, + _sub_unicode_escape(_replace_unicode, match.group()[1:]), ) yield Token("HASH", value, pos) pos = match.end() @@ -982,12 +964,14 @@ def tokenize(s: str) -> Iterator[Token]: assert match, "Should have found at least an empty match" end_pos = match.end() if end_pos == len_s: - raise SelectorSyntaxError("Unclosed string at %s" % pos) + raise SelectorSyntaxError(f"Unclosed string at {pos}") if s[end_pos] != quote: - raise SelectorSyntaxError("Invalid string at %s" % pos) + raise SelectorSyntaxError(f"Invalid string at {pos}") value = _sub_simple_escape( _replace_simple, - _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())), + _sub_unicode_escape( + _replace_unicode, _sub_newline_escape("", match.group()) + ), ) yield Token("STRING", value, pos) pos = end_pos + 1 @@ -1017,44 +1001,44 @@ def tokenize(s: str) -> Iterator[Token]: class TokenStream: - def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: - self.used: List[Token] = [] + def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: + self.used: list[Token] = [] self.tokens = iter(tokens) self.source = source - self.peeked: Optional[Token] = None + self.peeked: Token | None = None self._peeking = False self.next_token = self.tokens.__next__ def next(self) -> Token: if self._peeking: self._peeking = False - self.used.append(typing.cast(Token, self.peeked)) - return typing.cast(Token, self.peeked) - else: - next = self.next_token() - self.used.append(next) - return next + assert self.peeked is not None + self.used.append(self.peeked) + return self.peeked + next_ = self.next_token() + self.used.append(next_) + return next_ def peek(self) -> Token: if not self._peeking: self.peeked = self.next_token() self._peeking = True - return typing.cast(Token, self.peeked) + assert self.peeked is not None + return self.peeked def next_ident(self) -> str: - next = self.next() - if next.type != "IDENT": - raise SelectorSyntaxError("Expected ident, got %s" % (next,)) - return typing.cast(str, next.value) - - def next_ident_or_star(self) -> Optional[str]: - next = self.next() - if next.type == "IDENT": - return next.value - elif next == ("DELIM", "*"): + next_ = self.next() + if next_.type != "IDENT": + raise SelectorSyntaxError(f"Expected ident, got {next_}") + return cast("str", next_.value) + + def next_ident_or_star(self) -> str | None: + next_ = self.next() + if next_.type == "IDENT": + return next_.value + if next_ == ("DELIM", "*"): return None - else: - raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ident or '*', got {next_}") def skip_whitespace(self) -> None: peek = self.peek() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index fd28c47..96eac3f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,52 +1,46 @@ -# -*- coding: utf-8 -*- """ - cssselect.xpath - =============== +cssselect.xpath +=============== - Translation of parsed CSS selectors to XPath expressions. +Translation of parsed CSS selectors to XPath expressions. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import re -import typing -import warnings -from typing import Optional +from typing import TYPE_CHECKING, cast from cssselect.parser import ( - parse, - parse_series, - PseudoElement, - Selector, - SelectorError, - Tree, - Element, - Hash, + Attrib, Class, + CombinedSelector, + Element, Function, - Pseudo, - Attrib, + Hash, + Matching, Negation, + Pseudo, + PseudoElement, Relation, - Matching, + Selector, + SelectorError, SpecificityAdjustment, - CombinedSelector, + Tree, + parse, + parse_series, ) +if TYPE_CHECKING: + from collections.abc import Callable -@typing.no_type_check -def _unicode_safe_getattr(obj, name, default=None): - warnings.warn( - "_unicode_safe_getattr is deprecated and will be removed in the" - " next release, use getattr() instead", - DeprecationWarning, - stacklevel=2, - ) - return getattr(obj, name, default) + # typing.Self requires Python 3.11 + from typing_extensions import Self class ExpressionError(SelectorError, RuntimeError): @@ -58,7 +52,11 @@ class ExpressionError(SelectorError, RuntimeError): class XPathExpr: def __init__( - self, path: str = "", element: str = "*", condition: str = "", star_prefix: bool = False + self, + path: str = "", + element: str = "*", + condition: str = "", + star_prefix: bool = False, ) -> None: self.path = path self.element = element @@ -67,15 +65,15 @@ def __init__( def __str__(self) -> str: path = str(self.path) + str(self.element) if self.condition: - path += "[%s]" % self.condition + path += f"[{self.condition}]" return path def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self) + return f"{self.__class__.__name__}[{self}]" - def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": + def add_condition(self, condition: str, conjuction: str = "and") -> Self: if self.condition: - self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) + self.condition = f"({self.condition}) {conjuction} ({condition})" else: self.condition = condition return self @@ -84,7 +82,7 @@ def add_name_test(self) -> None: if self.element == "*": # We weren't doing a test anyway return - self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element)) + self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}") self.element = "*" def add_star_prefix(self) -> None: @@ -97,17 +95,19 @@ def add_star_prefix(self) -> None: def join( self, combiner: str, - other: "XPathExpr", - closing_combiner: Optional[str] = None, + other: XPathExpr, + closing_combiner: str | None = None, has_inner_condition: bool = False, - ) -> "XPathExpr": + ) -> Self: path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path self.path = path if not has_inner_condition: - self.element = other.element + closing_combiner if closing_combiner else other.element + self.element = ( + other.element + closing_combiner if closing_combiner else other.element + ) self.condition = other.condition else: self.element = other.element @@ -252,14 +252,16 @@ def selector_to_xpath( """ tree = getattr(selector, "parsed_tree", None) if not tree: - raise TypeError("Expected a parsed selector, got %r" % (selector,)) + raise TypeError(f"Expected a parsed selector, got {selector!r}") xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) return (prefix or "") + str(xpath) - def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) -> XPathExpr: + def xpath_pseudo_element( + self, xpath: XPathExpr, pseudo_element: PseudoElement + ) -> XPathExpr: """Translate a pseudo-element. Defaults to not supporting pseudo-elements at all, @@ -272,57 +274,61 @@ def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) def xpath_literal(s: str) -> str: s = str(s) if "'" not in s: - s = "'%s'" % s + s = f"'{s}'" elif '"' not in s: - s = '"%s"' % s + s = f'"{s}"' else: - s = "concat(%s)" % ",".join( - [ - (("'" in part) and '"%s"' or "'%s'") % part - for part in split_at_single_quotes(s) - if part - ] - ) + parts_quoted = [ + f'"{part}"' if "'" in part else f"'{part}'" + for part in split_at_single_quotes(s) + if part + ] + s = "concat({})".format(",".join(parts_quoted)) return s def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, "xpath_%s" % type_name.lower(), None) + method = cast( + "Callable[[Tree], XPathExpr] | None", + getattr(self, f"xpath_{type_name.lower()}", None), + ) if method is None: - raise ExpressionError("%s is not supported." % type_name) - return typing.cast(XPathExpr, method(parsed_selector)) + raise ExpressionError(f"{type_name} is not supported.") + return method(parsed_selector) # Dispatched by parsed object type def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, "xpath_%s_combinator" % combinator) - return typing.cast( - XPathExpr, method(self.xpath(combined.selector), self.xpath(combined.subselector)) + method = cast( + "Callable[[XPathExpr, XPathExpr], XPathExpr]", + getattr(self, f"xpath_{combinator}_combinator"), ) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation: Negation) -> XPathExpr: xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() if sub_xpath.condition: - return xpath.add_condition("not(%s)" % sub_xpath.condition) - else: - return xpath.add_condition("0") + return xpath.add_condition(f"not({sub_xpath.condition})") + return xpath.add_condition("0") def xpath_relation(self, relation: Relation) -> XPathExpr: xpath = self.xpath(relation.selector) combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) - method = getattr( - self, - "xpath_relation_%s_combinator" - % self.combinator_mapping[typing.cast(str, combinator.value)], + method = cast( + "Callable[[XPathExpr, XPathExpr], XPathExpr]", + getattr( + self, + f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator", + ), ) - return typing.cast(XPathExpr, method(xpath, right)) + return method(xpath, right) def xpath_matching(self, matching: Matching) -> XPathExpr: xpath = self.xpath(matching.selector) @@ -344,44 +350,53 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" - method_name = "xpath_%s_function" % function.name.replace("-", "_") - method = getattr(self, method_name, None) + method_name = "xpath_{}_function".format(function.name.replace("-", "_")) + method = cast( + "Callable[[XPathExpr, Function], XPathExpr] | None", + getattr(self, method_name, None), + ) if not method: - raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) - return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) + raise ExpressionError(f"The pseudo-class :{function.name}() is unknown") + return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" - method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = getattr(self, method_name, None) + method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_")) + method = cast( + "Callable[[XPathExpr], XPathExpr] | None", + getattr(self, method_name, None), + ) if not method: # TODO: better error message for pseudo-elements? - raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) - return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) + raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown") + return method(self.xpath(pseudo.selector)) def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, "xpath_attrib_%s" % operator) + method = cast( + "Callable[[XPathExpr, str, str | None], XPathExpr]", + getattr(self, f"xpath_attrib_{operator}"), + ) if self.lower_case_attribute_names: name = selector.attrib.lower() else: name = selector.attrib safe = is_safe_name(name) if selector.namespace: - name = "%s:%s" % (selector.namespace, name) + name = f"{selector.namespace}:{name}" safe = safe and is_safe_name(selector.namespace) if safe: attrib = "@" + name else: - attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) + attrib = f"attribute::*[name() = {self.xpath_literal(name)}]" if selector.value is None: value = None elif self.lower_case_attribute_values: - value = typing.cast(str, selector.value.value).lower() + value = cast("str", selector.value.value).lower() else: value = selector.value.value - return typing.cast(XPathExpr, method(self.xpath(selector.selector), attrib, value)) + return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" @@ -407,7 +422,7 @@ def xpath_element(self, selector: Element) -> XPathExpr: if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes - element = "%s:%s" % (selector.namespace, element) + element = f"{selector.namespace}:{element}" safe = safe and bool(is_safe_name(selector.namespace)) xpath = self.xpathexpr_cls(element=element) if not safe: @@ -416,7 +431,9 @@ def xpath_element(self, selector: Element) -> XPathExpr: # CombinedSelector: dispatch by combinator - def xpath_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_descendant_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a child, grand-child or further descendant of left""" return left.join("/descendant-or-self::*/", right) @@ -424,21 +441,31 @@ def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr """right is an immediate child of left""" return left.join("/", right) - def xpath_direct_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_direct_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling immediately after left""" xpath = left.join("/following-sibling::", right) xpath.add_name_test() return xpath.add_condition("position() = 1") - def xpath_indirect_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_indirect_adjacent_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) - def xpath_relation_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_relation_descendant_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is a child, grand-child or further descendant of left; select left""" - return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) + return left.join( + "[descendant::", right, closing_combiner="]", has_inner_condition=True + ) - def xpath_relation_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: + def xpath_relation_child_combinator( + self, left: XPathExpr, right: XPathExpr + ) -> XPathExpr: """right is an immediate child of left; select left""" return left.join("[./", right, closing_combiner="]") @@ -446,10 +473,9 @@ def xpath_relation_direct_adjacent_combinator( self, left: XPathExpr, right: XPathExpr ) -> XPathExpr: """right is a sibling immediately after left; select left""" - xpath = left.add_condition( - "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + return left.add_condition( + f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]" ) - return xpath def xpath_relation_indirect_adjacent_combinator( self, left: XPathExpr, right: XPathExpr @@ -460,12 +486,16 @@ def xpath_relation_indirect_adjacent_combinator( # Function: dispatch by function/pseudo-class name def xpath_nth_child_function( - self, xpath: XPathExpr, function: Function, last: bool = False, add_name_test: bool = True + self, + xpath: XPathExpr, + function: Function, + last: bool = False, + add_name_test: bool = True, ) -> XPathExpr: try: a, b = parse_series(function.arguments) - except ValueError: - raise ExpressionError("Invalid series: '%r'" % function.arguments) + except ValueError as ex: + raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # @@ -527,23 +557,20 @@ def xpath_nth_child_function( # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: - nodetest = "*" - else: - nodetest = "%s" % xpath.element + nodetest = "*" if add_name_test else f"{xpath.element}" # count siblings before or after the element if not last: - siblings_count = "count(preceding-sibling::%s)" % nodetest + siblings_count = f"count(preceding-sibling::{nodetest})" else: - siblings_count = "count(following-sibling::%s)" % nodetest + siblings_count = f"count(following-sibling::{nodetest})" # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) + return xpath.add_condition(f"{siblings_count} = {b_min_1}") expressions = [] @@ -552,12 +579,12 @@ def xpath_nth_child_function( # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expressions.append("%s >= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} >= {b_min_1}") else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expressions.append("%s <= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} <= {b_min_1}") # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -580,48 +607,56 @@ def xpath_nth_child_function( b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg_as_str = "+%s" % b_neg - left = "(%s %s)" % (left, b_neg_as_str) + left = f"({left} +{b_neg})" - expressions.append("%s mod %s = 0" % (left, a)) + expressions.append(f"{left} mod {a} = 0") - if len(expressions) > 1: - template = "(%s)" - else: - template = "%s" - xpath.add_condition(" and ".join(template % expression for expression in expressions)) + template = "(%s)" if len(expressions) > 1 else "%s" + xpath.add_condition( + " and ".join(template % expression for expression in expressions) + ) return xpath - def xpath_nth_last_child_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_last_child_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: return self.xpath_nth_child_function(xpath, function, last=True) - def xpath_nth_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_of_type_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") return self.xpath_nth_child_function(xpath, function, add_name_test=False) - def xpath_nth_last_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nth_last_of_type_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:nth-of-type() is not implemented") - return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False) + return self.xpath_nth_child_function( + xpath, function, last=True, add_name_test=False + ) - def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_contains_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: # Defined there, removed in later drafts: # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" % function.arguments + f"Expected a single string or ident for :contains(), got {function.arguments!r}" ) - value = typing.cast(str, function.arguments[0].value) - return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) + value = cast("str", function.arguments[0].value) + return xpath.add_condition(f"contains(., {self.xpath_literal(value)})") def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) - value = typing.cast(str, function.arguments[0].value) - return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) + value = cast("str", function.arguments[0].value) + return xpath.add_condition(f"lang({self.xpath_literal(value)})") # Pseudo: dispatch by pseudo-class name @@ -645,12 +680,12 @@ def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:first-of-type is not implemented") - return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0") def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:last-of-type is not implemented") - return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0") def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(parent::*/child::*) = 1") @@ -658,7 +693,7 @@ def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:only-of-type is not implemented") - return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) + return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1") def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("not(*) and not(string-length())") @@ -679,81 +714,85 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: # Attrib: dispatch by attribute operator - def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: + def xpath_attrib_exists( + self, xpath: XPathExpr, name: str, value: str | None + ) -> XPathExpr: assert not value xpath.add_condition(name) return xpath - def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr: + def xpath_attrib_equals( + self, xpath: XPathExpr, name: str, value: str | None + ) -> XPathExpr: assert value is not None - xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} = {self.xpath_literal(value)}") return xpath def xpath_attrib_different( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None # FIXME: this seems like a weird hack... if value: - xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value))) + xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}") else: - xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} != {self.xpath_literal(value)}") return xpath def xpath_attrib_includes( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value and is_non_whitespace(value): + arg = self.xpath_literal(" " + value + " ") xpath.add_condition( - "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(" " + value + " ")) + f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})" ) else: xpath.add_condition("0") return xpath def xpath_attrib_dashmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None + arg = self.xpath_literal(value) + arg_dash = self.xpath_literal(value + "-") # Weird, but true... xpath.add_condition( - "%s and (%s = %s or starts-with(%s, %s))" - % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-")) + f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))" ) return xpath def xpath_attrib_prefixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: xpath.add_condition( - "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and starts-with({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") return xpath def xpath_attrib_suffixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( - "%s and substring(%s, string-length(%s)-%s) = %s" - % (name, name, name, len(value) - 1, self.xpath_literal(value)) + f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}" ) else: xpath.add_condition("0") return xpath def xpath_attrib_substringmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Attribute selectors are case sensitive xpath.add_condition( - "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and contains({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") @@ -786,7 +825,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -798,19 +837,20 @@ def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) value = function.arguments[0].value assert value + arg = self.xpath_literal(value.lower() + "-") return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + f"'-'), {arg})]" ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -818,7 +858,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -848,7 +888,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/docs/conf.py b/docs/conf.py index 5524479..da3f023 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # cssselect documentation build configuration file, created by # sphinx-quickstart on Tue Mar 27 14:20:34 2012. @@ -12,217 +11,210 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os, re +import re +from pathlib import Path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', - 'sphinx.ext.doctest'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.doctest"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = {".rst": "restructuredtext"} # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'cssselect' -copyright = '2012-2017, Simon Sapin, Scrapy developers' +project = "cssselect" +project_copyright = "2012-2017, Simon Sapin, Scrapy developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. -with open(os.path.join(os.path.dirname(__file__), '..', 'cssselect', '__init__.py')) as init_file: - init_py = init_file.read() +init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text() release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. -version = release.rstrip('dev') +version = release.rstrip("dev") # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'classic' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'cssselectdoc' +htmlhelp_basename = "cssselectdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'cssselect.tex', 'cssselect Documentation', - 'Simon Sapin', 'manual'), + ("index", "cssselect.tex", "cssselect Documentation", "Simon Sapin", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'cssselect', 'cssselect Documentation', - ['Simon Sapin'], 1) -] +man_pages = [("index", "cssselect", "cssselect Documentation", ["Simon Sapin"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -231,23 +223,29 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'cssselect', 'cssselect Documentation', - 'Simon Sapin', 'cssselect', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "cssselect", + "cssselect Documentation", + "Simon Sapin", + "cssselect", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # --- Nitpicking options ------------------------------------------------------ @@ -255,5 +253,5 @@ nitpicky = True nitpick_ignore = [ # explicitly not a part of the public API - ('py:class', 'cssselect.parser.Token'), + ("py:class", "Token"), ] diff --git a/docs/conftest.py b/docs/conftest.py index 9d16bb7..a71d108 100644 --- a/docs/conftest.py +++ b/docs/conftest.py @@ -3,6 +3,7 @@ from sybil import Sybil from sybil.parsers.doctest import DocTestParser from sybil.parsers.skip import skip + try: # sybil 3.0.0+ from sybil.parsers.codeblock import PythonCodeBlockParser @@ -13,8 +14,8 @@ pytest_collect_file = Sybil( parsers=[ DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE), - PythonCodeBlockParser(future_imports=['print_function']), + PythonCodeBlockParser(future_imports=["print_function"]), skip, ], - pattern='*.rst', + pattern="*.rst", ).pytest() diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..21cb2eb --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx==8.2.3 +sphinx-rtd-theme==3.0.2 diff --git a/pylintrc b/pylintrc deleted file mode 100644 index e35425e..0000000 --- a/pylintrc +++ /dev/null @@ -1,32 +0,0 @@ -[MASTER] -persistent=no - -[MESSAGES CONTROL] -disable=assignment-from-no-return, - c-extension-no-member, - consider-using-f-string, - consider-using-in, - fixme, - inconsistent-return-statements, - invalid-name, - missing-class-docstring, - missing-function-docstring, - missing-module-docstring, - multiple-imports, - no-else-return, - no-member, - raise-missing-from, - redefined-builtin, - redefined-outer-name, - too-few-public-methods, - too-many-arguments, - too-many-branches, - too-many-function-args, - too-many-lines, - too-many-public-methods, - too-many-statements, - undefined-variable, - unidiomatic-typecheck, - unspecified-encoding, - unused-argument, - unused-import, diff --git a/pyproject.toml b/pyproject.toml index 57a5583..c7c54a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,239 @@ -[tool.black] -line-length = 99 +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling>=1.27.0"] + +[project] +name = "cssselect" +license = "BSD-3-Clause" +license-files = ["LICENSE", "AUTHORS"] +description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +readme = "README.rst" +authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }] +maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[project.urls] +"Homepage" = "https://github.com/scrapy/cssselect" + +[tool.hatch.version] +path = "cssselect/__init__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/cssselect", + "/docs", + "/tests", + "/CHANGES", + "/README.rst", + "/tox.ini", +] +exclude = [ + "/docs/_build", +] + +[tool.hatch.build.targets.wheel] +packages = ["cssselect"] + +[tool.bumpversion] +current_version = "1.4.0" +commit = true +tag = true + +[[tool.bumpversion.files]] +filename = "cssselect/__init__.py" + +[[tool.bumpversion.files]] +filename = "CHANGES" +search = "^Unreleased\\.$" +replace = "Released on {now:%Y-%m-%d}." +regex = true + +[tool.coverage.run] +branch = true +source = ["cssselect"] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if sys.version_info", + "if __name__ == '__main__':", +] + +[tool.mypy] +strict = true + +[tool.pylint.MASTER] +persistent = "no" +extension-pkg-allow-list = ["lxml"] + +[tool.pylint."MESSAGES CONTROL"] +enable = [ + "useless-suppression", +] +disable = [ + "consider-using-f-string", + "fixme", + "invalid-name", + "line-too-long", + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "no-member", + "not-callable", + "redefined-builtin", + "redefined-outer-name", + "too-few-public-methods", + "too-many-arguments", + "too-many-branches", + "too-many-function-args", + "too-many-lines", + "too-many-locals", + "too-many-positional-arguments", + "too-many-public-methods", + "too-many-statements", + "unused-argument", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] + +[tool.ruff.lint] +extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # flake8-commas + "COM", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-pytest-style + "PT", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Trailing comma missing + "COM812", + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # First word of the first line should be properly capitalized + "D403", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", +] + +[tool.ruff.lint.isort] +split-on-trailing-comma = false + +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b8c93b1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,13 +0,0 @@ -[build_sphinx] -source-dir = docs -build-dir = docs/_build -#all_files = 1 - -[upload_sphinx] # Sphinx-PyPI-upload -upload-dir = docs/_build/html - -[tool:pytest] -testpaths = tests - -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py deleted file mode 100644 index f7b51eb..0000000 --- a/setup.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- - -import re -import os.path - -from setuptools import setup - - -ROOT = os.path.dirname(__file__) -with open(os.path.join(ROOT, "README.rst")) as readme_file: - README = readme_file.read() -with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: - INIT_PY = init_file.read() -VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) - - -setup( - name="cssselect", - version=VERSION, - author="Ian Bicking", - author_email="ianb@colorstudy.com", - maintainer="Paul Tremberth", - maintainer_email="paul.tremberth@gmail.com", - description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", - long_description=README, - url="https://github.com/scrapy/cssselect", - license="BSD", - packages=["cssselect"], - test_suite="cssselect.tests", - package_data={ - "cssselect": ["py.typed"], - }, - include_package_data=True, - python_requires=">=3.8", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - ], -) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2c9e94c..dc67bb7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -1,49 +1,58 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ - Tests for cssselect - =================== +Tests for cssselect +=================== - These tests can be run either by py.test or by the standard library's - unittest. They use plain ``assert`` statements and do little reporting - themselves in case of failure. +These tests can be run either by py.test or by the standard library's +unittest. They use plain ``assert`` statements and do little reporting +themselves in case of failure. - Use py.test to get fancy error reporting and assert introspection. +Use py.test to get fancy error reporting and assert introspection. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import sys import typing import unittest -from typing import List, Optional, Sequence, Tuple +from typing import TYPE_CHECKING +import pytest from lxml import etree, html + from cssselect import ( - parse, + ExpressionError, GenericTranslator, HTMLTranslator, SelectorSyntaxError, - ExpressionError, + parse, ) from cssselect.parser import ( - tokenize, - parse_series, - PseudoElement, - FunctionalPseudoElement, Function, + FunctionalPseudoElement, + PseudoElement, Token, + parse_series, + tokenize, ) from cssselect.xpath import XPathExpr +if TYPE_CHECKING: + from collections.abc import Sequence + class TestCssselect(unittest.TestCase): def test_tokenizer(self) -> None: - tokens = [str(item) for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')] + tokens = [ + str(item) + for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)') + ] assert tokens == [ "", "", @@ -66,13 +75,13 @@ def test_tokenizer(self) -> None: ] def test_parser(self) -> None: - def repr_parse(css: str) -> List[str]: + def repr_parse(css: str) -> list[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [repr(selector.parsed_tree).replace("(u'", "('") for selector in selectors] + return [repr(selector.parsed_tree) for selector in selectors] - def parse_many(first: str, *others: str) -> List[str]: + def parse_many(first: str, *others: str) -> list[str]: result = repr_parse(first) for other in others: assert repr_parse(other) == result @@ -95,7 +104,9 @@ def parse_many(first: str, *others: str) -> List[str]: "div\r>\n\n\n.foo", "div\f>\f.foo", ) == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"] - assert parse_many("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar") == [ + assert parse_many( + "td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar" + ) == [ "Class[Element[td].foo]", "Class[Element[*].bar]", ] @@ -123,11 +134,15 @@ def parse_many(first: str, *others: str) -> List[str]: assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ "Attrib[Element[a][hreflang |= 'en']]" ] - assert parse_many("div:nth-child(10)") == ["Function[Element[div]:nth-child(['10'])]"] + assert parse_many("div:nth-child(10)") == [ + "Function[Element[div]:nth-child(['10'])]" + ] assert parse_many(":nth-child(2n+2)") == [ "Function[Element[*]:nth-child(['2', 'n', '+2'])]" ] - assert parse_many("div:nth-of-type(10)") == ["Function[Element[div]:nth-of-type(['10'])]"] + assert parse_many("div:nth-of-type(10)") == [ + "Function[Element[div]:nth-of-type(['10'])]" + ] assert parse_many("div div:nth-of-type(10) .aclass") == [ "CombinedSelector[CombinedSelector[Element[div] " "Function[Element[div]:nth-of-type(['10'])]] " @@ -135,7 +150,9 @@ def parse_many(first: str, *others: str) -> List[str]: ] assert parse_many("label:only") == ["Pseudo[Element[label]:only]"] assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] - assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] + assert parse_many('div:contains("foo")') == [ + "Function[Element[div]:contains(['foo'])]" + ] assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" @@ -170,18 +187,18 @@ def parse_many(first: str, *others: str) -> List[str]: ] def test_pseudo_elements(self) -> None: - def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: - result: List[Tuple[str, Optional[str]]] = [] + def parse_pseudo(css: str) -> list[tuple[str, str | None]]: + result: list[tuple[str, str | None]] = [] for selector in parse(css): pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo # No Symbol here - assert pseudo is None or type(pseudo) is str - selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") + assert pseudo is None or isinstance(pseudo, str) + selector_as_str = repr(selector.parsed_tree) result.append((selector_as_str, pseudo)) return result - def parse_one(css: str) -> Tuple[str, Optional[str]]: + def parse_one(css: str) -> tuple[str, str | None]: result = parse_pseudo(css) assert len(result) == 1 return result[0] @@ -242,7 +259,7 @@ def test_pseudo_repr(css: str) -> str: # Special cases for CSS 2.1 pseudo-elements are ignored by default for pseudo in ("after", "before", "first-line", "first-letter"): - (selector,) = parse("e:%s" % pseudo) + (selector,) = parse(f"e:{pseudo}") assert selector.pseudo_element == pseudo assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e" @@ -252,9 +269,8 @@ def test_pseudo_repr(css: str) -> str: (selector,) = parse("e::foo") assert selector.pseudo_element == "foo" assert tr.selector_to_xpath(selector, prefix="") == "e" - self.assertRaises( - ExpressionError, tr.selector_to_xpath, selector, translate_pseudo_elements=True - ) + with pytest.raises(ExpressionError): + tr.selector_to_xpath(selector, translate_pseudo_elements=True) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() @@ -262,7 +278,7 @@ def test_pseudo_repr(css: str) -> str: assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self) -> None: - def specificity(css: str) -> Tuple[int, int, int]: + def specificity(css: str) -> tuple[int, int, int]: selectors = parse(css) assert len(selectors) == 1 return selectors[0].specificity() @@ -301,10 +317,14 @@ def specificity(css: str) -> Tuple[int, int, int]: assert specificity("foo::before") == (0, 0, 2) assert specificity("foo:empty::before") == (0, 1, 2) - assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (2, 1, 3) + assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == ( + 2, + 1, + 3, + ) def test_css_export(self) -> None: - def css2css(css: str, res: Optional[str] = None) -> None: + def css2css(css: str, res: str | None = None) -> None: selectors = parse(css) assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) @@ -343,18 +363,19 @@ def css2css(css: str, res: Optional[str] = None) -> None: css2css("foo > *") def test_parse_errors(self) -> None: - def get_error(css: str) -> Optional[str]: + def get_error(css: str) -> str | None: try: parse(css) except SelectorSyntaxError: - # Py2, Py3, ... - return str(sys.exc_info()[1]).replace("(u'", "('") + return str(sys.exc_info()[1]) return None assert get_error("attributes(href)/html/body/a") == ( "Expected selector, got " ) - assert get_error("attributes(href)") == ("Expected selector, got ") + assert get_error("attributes(href)") == ( + "Expected selector, got " + ) assert get_error("html/body/a") == ("Expected selector, got ") assert get_error(" ") == ("Expected selector, got ") assert get_error("div, ") == ("Expected selector, got ") @@ -369,10 +390,14 @@ def get_error(css: str) -> Optional[str]: assert get_error("[*]") == ("Expected '|', got ") assert get_error("[foo|]") == ("Expected ident, got ") assert get_error("[#]") == ("Expected ident or '*', got ") - assert get_error("[foo=#]") == ("Expected string or ident, got ") + assert get_error("[foo=#]") == ( + "Expected string or ident, got " + ) assert get_error("[href]a") == ("Expected selector, got ") assert get_error("[rel=stylesheet]") is None - assert get_error("[rel:stylesheet]") == ("Operator expected, got ") + assert get_error("[rel:stylesheet]") == ( + "Operator expected, got " + ) assert get_error("[rel=stylesheet") == ("Expected ']', got ") assert get_error(":lang(fr)") is None assert get_error(":lang(fr") == ("Expected an argument, got ") @@ -386,12 +411,20 @@ def get_error(css: str) -> Optional[str]: assert get_error("li:before a") == ( "Got pseudo-element ::before not at the end of a selector" ) - assert get_error(":not(:before)") == ("Got pseudo-element ::before inside :not() at 12") + assert get_error(":not(:before)") == ( + "Got pseudo-element ::before inside :not() at 12" + ) assert get_error(":not(:not(a))") == ("Got nested :not()") - assert get_error(":is(:before)") == ("Got pseudo-element ::before inside function") + assert get_error(":is(:before)") == ( + "Got pseudo-element ::before inside function" + ) assert get_error(":is(a b)") == ("Expected an argument, got ") - assert get_error(":where(:before)") == ("Got pseudo-element ::before inside function") - assert get_error(":where(a b)") == ("Expected an argument, got ") + assert get_error(":where(:before)") == ( + "Got pseudo-element ::before inside function" + ) + assert get_error(":where(a b)") == ( + "Expected an argument, got " + ) assert get_error(":scope > div :scope header") == ( 'Got immediate child pseudo-element ":scope" not at the start of a selector' ) @@ -416,7 +449,7 @@ def xpath(css: str) -> str: assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]" ) assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( @@ -424,7 +457,7 @@ def xpath(css: str) -> str: ) assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" ) # --- nth-* and nth-last-* ------------------------------------- @@ -446,19 +479,29 @@ def xpath(css: str) -> str: "e[(count(preceding-sibling::*) >= 1) and " "((count(preceding-sibling::*) +2) mod 3 = 0)]" ) - assert xpath("e:nth-child(3n-2)") == ("e[count(preceding-sibling::*) mod 3 = 0]") + assert xpath("e:nth-child(3n-2)") == ( + "e[count(preceding-sibling::*) mod 3 = 0]" + ) assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]") assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]") - assert xpath("e:nth-last-child(2n)") == ("e[(count(following-sibling::*) +1) mod 2 = 0]") - assert xpath("e:nth-last-child(2n+1)") == ("e[count(following-sibling::*) mod 2 = 0]") + assert xpath("e:nth-last-child(2n)") == ( + "e[(count(following-sibling::*) +1) mod 2 = 0]" + ) + assert xpath("e:nth-last-child(2n+1)") == ( + "e[count(following-sibling::*) mod 2 = 0]" + ) assert xpath("e:nth-last-child(2n+2)") == ( "e[(count(following-sibling::*) >= 1) and " "((count(following-sibling::*) +1) mod 2 = 0)]" ) - assert xpath("e:nth-last-child(3n+1)") == ("e[count(following-sibling::*) mod 3 = 0]") + assert xpath("e:nth-last-child(3n+1)") == ( + "e[count(following-sibling::*) mod 3 = 0]" + ) # represents the two last e elements - assert xpath("e:nth-last-child(-n+2)") == ("e[count(following-sibling::*) <= 1]") + assert xpath("e:nth-last-child(-n+2)") == ( + "e[count(following-sibling::*) <= 1]" + ) assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]") assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]") @@ -486,24 +529,32 @@ def xpath(css: str) -> str: assert xpath("e:has(f)") == "e[descendant::f]" assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( - xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + xpath("e:has(+ f)") + == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") assert xpath("e.warning") == ( - "e[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' warning ')]" + "e[@class and contains(" + "concat(' ', normalize-space(@class), ' '), ' warning ')]" ) assert xpath("e#myid") == ("e[@id = 'myid']") - assert xpath("e:not(:nth-child(odd))") == ("e[not(count(preceding-sibling::*) mod 2 = 0)]") + assert xpath("e:not(:nth-child(odd))") == ( + "e[not(count(preceding-sibling::*) mod 2 = 0)]" + ) assert xpath("e:nOT(*)") == ("e[0]") # never matches assert xpath("e f") == ("e/descendant-or-self::*/f") assert xpath("e > f") == ("e/f") - assert xpath("e + f") == ("e/following-sibling::*[(name() = 'f') and (position() = 1)]") + assert xpath("e + f") == ( + "e/following-sibling::*[(name() = 'f') and (position() = 1)]" + ) assert xpath("e ~ f") == ("e/following-sibling::f") assert xpath("e ~ f:nth-child(3)") == ( "e/following-sibling::f[count(preceding-sibling::*) = 2]" ) - assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") + assert xpath("div#container p") == ( + "div[@id = 'container']/descendant-or-self::*/p" + ) assert xpath("e:where(foo)") == "e[name() = 'foo']" assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]" @@ -513,19 +564,32 @@ def xpath(css: str) -> str: assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") - self.assertRaises(ExpressionError, xpath, ":fİrst-child") - self.assertRaises(ExpressionError, xpath, ":first-of-type") - self.assertRaises(ExpressionError, xpath, ":only-of-type") - self.assertRaises(ExpressionError, xpath, ":last-of-type") - self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") - self.assertRaises(ExpressionError, xpath, ":after") - self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") - self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") - self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") - self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") + with pytest.raises(ExpressionError): + xpath(":fİrst-child") + with pytest.raises(ExpressionError): + xpath(":first-of-type") + with pytest.raises(ExpressionError): + xpath(":only-of-type") + with pytest.raises(ExpressionError): + xpath(":last-of-type") + with pytest.raises(ExpressionError): + xpath(":nth-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-last-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-child(n-)") + with pytest.raises(ExpressionError): + xpath(":after") + with pytest.raises(ExpressionError): + xpath(":lorem-ipsum") + with pytest.raises(ExpressionError): + xpath(":lorem(ipsum)") + with pytest.raises(ExpressionError): + xpath("::lorem-ipsum") + with pytest.raises(TypeError): + GenericTranslator().css_to_xpath(4) # type: ignore[arg-type] + with pytest.raises(TypeError): + GenericTranslator().selector_to_xpath("foo") # type: ignore[arg-type] def test_unicode(self) -> None: css = ".a\xc1b" @@ -539,10 +603,18 @@ def test_unicode(self) -> None: def test_quoting(self) -> None: css_to_xpath = GenericTranslator().css_to_xpath - assert css_to_xpath('*[aval="\'"]') == ("""descendant-or-self::*[@aval = "'"]""") - assert css_to_xpath("*[aval=\"'''\"]") == ("""descendant-or-self::*[@aval = "'''"]""") - assert css_to_xpath("*[aval='\"']") == ("""descendant-or-self::*[@aval = '"']""") - assert css_to_xpath('*[aval=\'"""\']') == ('''descendant-or-self::*[@aval = '"""']''') + assert css_to_xpath('*[aval="\'"]') == ( + """descendant-or-self::*[@aval = "'"]""" + ) + assert css_to_xpath("*[aval=\"'''\"]") == ( + """descendant-or-self::*[@aval = "'''"]""" + ) + assert css_to_xpath("*[aval='\"']") == ( + """descendant-or-self::*[@aval = '"']""" + ) + assert css_to_xpath('*[aval=\'"""\']') == ( + '''descendant-or-self::*[@aval = '"""']''' + ) assert css_to_xpath(':scope > div[dataimg=""]') == ( "descendant-or-self::*[1]/div[@dataimg = '']" ) @@ -569,33 +641,35 @@ def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): - method_name = "xpath_%s_functional_pseudo_element" % ( + method_name = "xpath_{}_functional_pseudo_element".format( pseudo_element.name.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" % pseudo_element.name + f"The functional pseudo-element ::{pseudo_element.name}() is unknown" ) xpath = method(xpath, pseudo_element.arguments) else: - method_name = "xpath_%s_simple_pseudo_element" % ( + method_name = "xpath_{}_simple_pseudo_element".format( pseudo_element.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The pseudo-element ::%s is unknown" % pseudo_element + f"The pseudo-element ::{pseudo_element} is unknown" ) xpath = method(xpath) return xpath # functional pseudo-class: # elements that have a certain number of attributes - def xpath_nb_attr_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: + def xpath_nb_attr_function( + self, xpath: XPathExpr, function: Function + ) -> XPathExpr: assert function.arguments[0].value nb_attributes = int(function.arguments[0].value) - return xpath.add_condition("count(@*)=%d" % nb_attributes) + return xpath.add_condition(f"count(@*)={nb_attributes}") # pseudo-class: # elements that have 5 attributes @@ -609,14 +683,16 @@ def xpath_attr_functional_pseudo_element( ) -> XPathExpr: attribute_name = arguments[0].value other = XPathExpr( - "@%s" % attribute_name, + f"@{attribute_name}", "", ) return xpath.join("/", other) # pseudo-element: # element's text() nodes - def xpath_text_node_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: + def xpath_text_node_simple_pseudo_element( + self, xpath: XPathExpr + ) -> XPathExpr: other = XPathExpr( "text()", "", @@ -625,7 +701,9 @@ def xpath_text_node_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: # pseudo-element: # element's href attribute - def xpath_attr_href_simple_pseudo_element(self, xpath: XPathExpr) -> XPathExpr: + def xpath_attr_href_simple_pseudo_element( + self, xpath: XPathExpr + ) -> XPathExpr: other = XPathExpr( "@href", "", @@ -650,17 +728,17 @@ def xpath(css: str) -> str: ) assert xpath(":scope") == "descendant-or-self::*[1]" assert xpath(":first-or-second[href]") == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]" ) assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ - def operator_id(selector: str) -> List[str]: + def operator_id(selector: str) -> list[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -669,9 +747,11 @@ def operator_id(selector: str) -> List[str]: assert operator_id("[href]:first-or-second") == ["second"] def test_series(self) -> None: - def series(css: str) -> Optional[Tuple[int, int]]: - (selector,) = parse(":nth-child(%s)" % css) - args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments + def series(css: str) -> tuple[int, int] | None: + (selector,) = parse(f":nth-child({css})") + args = typing.cast( + "FunctionalPseudoElement", selector.parsed_tree + ).arguments try: return parse_series(args) except ValueError: @@ -698,12 +778,12 @@ def series(css: str) -> Optional[Tuple[int, int]]: def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath - def langid(selector: str) -> List[str]: + def langid(selector: str) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -714,28 +794,34 @@ def langid(selector: str) -> List[str]: assert langid(":lang(ru)") == ["sixth"] assert langid(":lang('ZH')") == ["eighth"] assert langid(":lang(de) :lang(zh)") == ["eighth"] - assert langid(":lang(en), :lang(zh)") == ["first", "second", "third", "fourth", "eighth"] + assert langid(":lang(en), :lang(zh)") == [ + "first", + "second", + "third", + "fourth", + "eighth", + ] assert langid(":lang(es)") == [] def test_argument_types(self) -> None: class CustomTranslator(GenericTranslator): def __init__(self) -> None: - self.argument_types: List[str] = [] + self.argument_types: list[str] = [] def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: self.argument_types += typing.cast( - FunctionalPseudoElement, pseudo_element + "FunctionalPseudoElement", pseudo_element ).argument_types() return xpath - def argument_types(css: str) -> List[str]: + def argument_types(css: str) -> list[str]: translator = CustomTranslator() translator.css_to_xpath(css) return translator.argument_types - mappings: List[Tuple[str, List[str]]] = [ + mappings: list[tuple[str, list[str]]] = [ ("", []), ("ident", ["IDENT"]), ('"string"', ["STRING"]), @@ -747,21 +833,21 @@ def argument_types(css: str) -> List[str]: def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict((el, count) for count, el in enumerate(document.iter())).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath - def select_ids(selector: str, html_only: bool) -> List[str]: + def select_ids(selector: str, html_only: bool) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] - def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: + def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: @@ -769,7 +855,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: return result all_ids = pcss("*") - assert all_ids[:6] == ["html", "nil", "link-href", "link-nohref", "nil", "outer-div"] + assert all_ids[:6] == [ + "html", + "nil", + "link-href", + "link-nohref", + "nil", + "outer-div", + ] assert all_ids[-1:] == ["foobar-span"] assert pcss("div") == ["outer-div", "li-div", "foobar-div"] assert pcss("DIV", html_only=True) == [ @@ -780,7 +873,9 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss("div div") == ["li-div"] assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"] assert pcss("a[name]") == ["name-anchor"] - assert pcss("a[NAme]", html_only=True) == ["name-anchor"] # case-insensitive in HTML: + assert pcss("a[NAme]", html_only=True) == [ + "name-anchor" + ] # case-insensitive in HTML: assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"] assert pcss('a[rel="tag"]') == ["tag-anchor"] assert pcss('a[href*="localhost"]') == ["tag-anchor"] @@ -798,7 +893,10 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] assert pcss('*[lang|="e"]') == [] # ... :lang() is not. - assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == ["second-li", "li-div"] + assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == [ + "second-li", + "li-div", + ] assert pcss(':lang("e")', html_only=True) == [] assert pcss(":scope > div") == [] assert pcss(":scope body") == ["nil"] @@ -852,7 +950,11 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: "seventh-li", ] assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"] - assert pcss("li:nth-last-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"] + assert pcss("li:nth-last-child(3n+1)") == [ + "first-li", + "fourth-li", + "seventh-li", + ] assert pcss("ol:first-of-type") == ["first-ol"] assert pcss("ol:nth-child(1)") == [] assert pcss("ol:nth-of-type(2)") == ["second-ol"] @@ -875,7 +977,8 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss("span:only-child") == ["foobar-span"] assert pcss("li div:only-child") == ["li-div"] assert pcss("div *:only-child") == ["li-div", "foobar-span"] - self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + with pytest.raises(ExpressionError): + pcss("p *:only-of-type") assert pcss("p:only-of-type") == ["paragraph"] assert pcss("a:empty", "a:EMpty") == ["name-anchor"] assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] @@ -901,7 +1004,10 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss('*:contains("E")') == [] # case-sensitive assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"] assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"] - assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == ["third-li", "fourth-li"] + assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == [ + "third-li", + "fourth-li", + ] assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"] assert pcss("li div", "li > div", "div div") == ["li-div"] assert pcss("div > div") == [] @@ -972,14 +1078,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = typing.cast(List["etree._Element"], document.xpath("//body"))[0] + body = typing.cast("list[etree._Element]", document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = typing.cast(List["etree._Element"], body.xpath(xpath)) + results = typing.cast("list[etree._Element]", body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: @@ -1427,7 +1533,7 @@ def count(selector: str) -> int: -""" # noqa: W191,E101 +""" if __name__ == "__main__": diff --git a/tox.ini b/tox.ini index c618dfb..9ff54cf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,55 +1,49 @@ [tox] -envlist = black,flake8,pylint,security,py,docs +envlist = pre-commit,pylint,py,docs,typing [testenv] deps = lxml>=4.4 - pytest-cov>=2.8 + pytest-cov>=7.0.0 pytest>=5.4 - setuptools sybil commands = pytest --cov=cssselect \ --cov-report=term-missing --cov-report=html --cov-report=xml \ - --verbose {posargs: cssselect tests docs} - -[testenv:black] -deps = - black==22.10.0 -commands = - black --check {posargs: cssselect setup.py tests} - -[testenv:flake8] -deps = - flake8==6.1.0 -commands = - flake8 {posargs: cssselect setup.py tests docs/conf.py} + {posargs: cssselect tests docs} [testenv:pylint] deps = {[testenv]deps} - pylint==3.0.0 + pylint==4.0.4 commands = - pylint {posargs: cssselect setup.py tests docs} - -[testenv:security] -deps = - bandit -commands = - bandit -r -c .bandit.yml {posargs: cssselect} + pylint {posargs: cssselect tests docs} [testenv:docs] changedir = docs deps = - sphinx - sphinx_rtd_theme + -r docs/requirements.txt commands = sphinx-build -W -b html . {envtmpdir}/html [testenv:typing] deps = {[testenv]deps} - lxml-stubs==0.4.0 - mypy==0.982 + mypy==1.19.1 + types-lxml==2026.1.1 +commands = + mypy {posargs: cssselect tests} + +[testenv:pre-commit] +deps = pre-commit +commands = pre-commit run --all-files --show-diff-on-failure +skip_install = true + +[testenv:twinecheck] +basepython = python3 +deps = + twine==6.2.0 + build==1.4.0 commands = - mypy --strict {posargs: cssselect tests} + python -m build --sdist + twine check dist/*