diff --git a/.bandit.yml b/.bandit.yml deleted file mode 100644 index 4f60a02..0000000 --- a/.bandit.yml +++ /dev/null @@ -1,6 +0,0 @@ -skips: -- B101 -- B311 -- B320 -- B410 -exclude_dirs: ['tests'] diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 56cfabc..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 1.2.0 -commit = True -tag = True - -[bumpversion:file:cssselect/__init__.py] diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index ed1fac6..0000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -branch = True -source = cssselect - -[report] -exclude_lines = - pragma: no cover - def __repr__ - if sys.version_info - if __name__ == '__main__': diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 2417f2e..0000000 --- a/.flake8 +++ /dev/null @@ -1,16 +0,0 @@ -[flake8] -max-line-length = 99 -ignore = - W503 - # too many leading '#' for block comment - E266 - E704 -exclude = - .git - .tox - venv* - - # pending revision - docs/conf.py -per-file-ignores = - cssselect/__init__.py:F401 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 9d2c8f6..bb4f6e1 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,2 @@ # applying pre-commit hooks to the project -e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb \ No newline at end of file +e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 5b6cfbf..41ff7e1 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -5,23 +5,27 @@ jobs: checks: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: include: - - python-version: 3.12 + - python-version: 3.14 env: TOXENV: pylint - - python-version: 3.12 + - python-version: 3.14 # Keep in sync with .readthedocs.yml env: TOXENV: docs - - python-version: 3.12 + - python-version: 3.14 env: TOXENV: typing + - python-version: 3.14 + env: + TOXENV: twinecheck steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -31,9 +35,9 @@ jobs: pip install -U pip pip install -U tox tox - + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: pre-commit/action@v3.0.0 + - uses: actions/checkout@v6 + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 67d9c5a..526c458 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,31 +1,32 @@ name: Publish -on: [push] +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' jobs: publish: runs-on: ubuntu-latest - if: startsWith(github.event.ref, 'refs/tags/') + + environment: + name: pypi + url: https://pypi.org/p/cssselect + + permissions: + id-token: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - name: Set up Python 3.12 - uses: actions/setup-python@v4 + - name: Set up Python + uses: actions/setup-python@v6 with: - python-version: 3.12 + python-version: 3.14 - - name: Check Tag - id: check-release-tag + - name: Build run: | - if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then - echo ::set-output name=release_tag::true - fi + python -m pip install --upgrade build + python -m build - name: Publish to PyPI - if: steps.check-release-tag.outputs.release_tag == 'true' - run: | - pip install --upgrade setuptools wheel twine - python setup.py sdist bdist_wheel - export TWINE_USERNAME=__token__ - export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} - twine upload dist/* + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests-macos.yml similarity index 59% rename from .github/workflows/tests.yml rename to .github/workflows/tests-macos.yml index a1a0524..4947937 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests-macos.yml @@ -1,18 +1,19 @@ -name: Tests +name: macOS on: [push, pull_request] jobs: tests: - runs-on: ubuntu-latest + runs-on: macos-latest strategy: + fail-fast: false matrix: - python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -23,4 +24,4 @@ jobs: tox -e py - name: Upload coverage report - run: bash <(curl -s https://codecov.io/bash) + uses: codecov/codecov-action@v5 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml new file mode 100644 index 0000000..1ef905b --- /dev/null +++ b/.github/workflows/tests-ubuntu.yml @@ -0,0 +1,33 @@ +name: Ubuntu +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3.11"] + + steps: + - uses: actions/checkout@v6 + + - name: Install system libraries + if: contains(matrix.python-version, 'pypy') + run: | + sudo apt-get update + sudo apt-get install libxml2-dev libxslt-dev + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml new file mode 100644 index 0000000..24d7ee8 --- /dev/null +++ b/.github/workflows/tests-windows.yml @@ -0,0 +1,27 @@ +name: Windows +on: [push, pull_request] + +jobs: + tests: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + run: | + pip install -U pip + pip install -U tox + tox -e py + + - name: Upload coverage report + uses: codecov/codecov-action@v5 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index 6860bdb..0000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile = black \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a27d3db..81ca890 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,26 @@ repos: -- repo: https://github.com/PyCQA/bandit - rev: 1.7.8 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.4 hooks: - - id: bandit - args: [-r, -c, .bandit.yml] -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 + - id: ruff-check + args: [ --fix ] + - id: ruff-format +- repo: https://github.com/adamchainz/blacken-docs + rev: 1.20.0 hooks: - - id: flake8 -- repo: https://github.com/psf/black.git - rev: 24.3.0 + - id: blacken-docs + additional_dependencies: + - black==26.1.0 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.13.2 + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v1.0.0 hooks: - - id: isort \ No newline at end of file + - id: sphinx-lint +- repo: https://github.com/rhysd/actionlint + rev: v1.7.10 + hooks: + - id: actionlint diff --git a/.readthedocs.yml b/.readthedocs.yml index 7d13c50..b91642a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,11 +4,11 @@ sphinx: configuration: docs/conf.py fail_on_warning: true build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: # For available versions, see: # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python - python: "3.12" # Keep in sync with .github/workflows/checks.yml + python: "3.14" # Keep in sync with .github/workflows/checks.yml python: install: - requirements: docs/requirements.txt diff --git a/CHANGES b/CHANGES index dc38826..5ca2959 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,34 @@ Changelog ========= +Version 1.4.0 +------------- + +Released on 2026-01-29. + +* Dropped support for Python 3.9 and PyPy 3.10. + +* Added support for Python 3.14 and PyPy 3.11. + +* Switched the build system to ``hatchling``. + +* CI fixes and improvements. + +Version 1.3.0 +------------- + +Released on 2025-03-10. + +* Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and + PyPy 3.10. + +* Removed ``_unicode_safe_getattr()``, deprecated in 1.2.0. + +* Added ``pre-commit`` and formatted the code with ``ruff``. + +* Many CI additions and improvements. + + Version 1.2.0 ------------- diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 7fc2933..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc py.typed -recursive-include docs * -recursive-include tests * -prune docs/_build diff --git a/README.rst b/README.rst index d62b320..c055295 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ cssselect: CSS Selectors for Python :target: https://pypi.python.org/pypi/cssselect :alt: Supported Python Versions -.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests.yml/badge.svg - :target: https://github.com/scrapy/cssselect/actions/workflows/tests.yml +.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml/badge.svg + :target: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml :alt: Tests .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg diff --git a/cssselect/__init__.py b/cssselect/__init__.py index a59995c..59d62df 100644 --- a/cssselect/__init__.py +++ b/cssselect/__init__.py @@ -1,15 +1,14 @@ -# -*- coding: utf-8 -*- """ - CSS Selectors based on XPath - ============================ +CSS Selectors based on XPath +============================ - This module supports selecting XML/HTML elements based on CSS selectors. - See the `CSSSelector` class for details. +This module supports selecting XML/HTML elements based on CSS selectors. +See the `CSSSelector` class for details. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ @@ -27,11 +26,11 @@ "FunctionalPseudoElement", "GenericTranslator", "HTMLTranslator", - "parse", "Selector", "SelectorError", "SelectorSyntaxError", + "parse", ) -VERSION = "1.2.0" +VERSION = "1.4.0" __version__ = VERSION diff --git a/cssselect/parser.py b/cssselect/parser.py index 354713d..f969769 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -1,22 +1,28 @@ -# -*- coding: utf-8 -*- """ - cssselect.parser - ================ +cssselect.parser +================ - Tokenizer, parser and parsed objects for CSS selectors. +Tokenizer, parser and parsed objects for CSS selectors. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import operator import re import sys -import typing -from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, Union, cast, overload + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + # typing.Self requires Python 3.11 + from typing_extensions import Self def ascii_lower(string: str) -> str: @@ -40,7 +46,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): #### Parsed objects -Tree = Union[ +Tree: TypeAlias = Union[ "Element", "Hash", "Class", @@ -53,7 +59,7 @@ class SelectorSyntaxError(SelectorError, SyntaxError): "SpecificityAdjustment", "CombinedSelector", ] -PseudoElement = Union["FunctionalPseudoElement", str] +PseudoElement: TypeAlias = Union["FunctionalPseudoElement", str] class Selector: @@ -67,9 +73,7 @@ class Selector: """ - def __init__( - self, tree: Tree, pseudo_element: Optional[PseudoElement] = None - ) -> None: + def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: self.parsed_tree = tree if pseudo_element is not None and not isinstance( pseudo_element, FunctionalPseudoElement @@ -101,25 +105,25 @@ def __repr__(self) -> str: if isinstance(self.pseudo_element, FunctionalPseudoElement): pseudo_element = repr(self.pseudo_element) elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) + return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]" def canonical(self) -> str: """Return a CSS representation for this selector (a string)""" if isinstance(self.pseudo_element, FunctionalPseudoElement): - pseudo_element = "::%s" % self.pseudo_element.canonical() + pseudo_element = f"::{self.pseudo_element.canonical()}" elif self.pseudo_element: - pseudo_element = "::%s" % self.pseudo_element + pseudo_element = f"::{self.pseudo_element}" else: pseudo_element = "" - res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) + res = f"{self.parsed_tree.canonical()}{pseudo_element}" if len(res) > 1: res = res.lstrip("*") return res - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: """Return the specificity_ of this selector as a tuple of 3 integers. .. _specificity: http://www.w3.org/TR/selectors/#specificity @@ -141,12 +145,12 @@ def __init__(self, selector: Tree, class_name: str) -> None: self.class_name = class_name def __repr__(self) -> str: - return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) + return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]" def canonical(self) -> str: - return "%s.%s" % (self.selector.canonical(), self.class_name) + return f"{self.selector.canonical()}.{self.class_name}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -170,23 +174,20 @@ class FunctionalPseudoElement: """ - def __init__(self, name: str, arguments: Sequence["Token"]): + def __init__(self, name: str, arguments: Sequence[Token]): self.name = ascii_lower(name) self.arguments = arguments def __repr__(self) -> str: - return "%s[::%s(%r)]" % ( - self.__class__.__name__, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[::{self.name}({token_values!r})]" - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s(%s)" % (self.name, args) + return f"{self.name}({args})" class Function: @@ -194,27 +195,23 @@ class Function: Represents selector:name(expr) """ - def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: + def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments def __repr__(self) -> str: - return "%s[%r:%s(%r)]" % ( - self.__class__.__name__, - self.selector, - self.name, - [token.value for token in self.arguments], - ) + token_values = [token.value for token in self.arguments] + return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]" - def argument_types(self) -> List[str]: + def argument_types(self) -> list[str]: return [token.type for token in self.arguments] def canonical(self) -> str: args = "".join(token.css() for token in self.arguments) - return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) + return f"{self.selector.canonical()}:{self.name}({args})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -230,12 +227,12 @@ def __init__(self, selector: Tree, ident: str) -> None: self.ident = ascii_lower(ident) def __repr__(self) -> str: - return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) + return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]" def canonical(self) -> str: - return "%s:%s" % (self.selector.canonical(), self.ident) + return f"{self.selector.canonical()}:{self.ident}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -251,19 +248,15 @@ def __init__(self, selector: Tree, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:not(%r)]" % ( - self.__class__.__name__, - self.selector, - self.subselector, - ) + return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]" def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:not(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:not({subsel})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -274,31 +267,27 @@ class Relation: Represents selector:has(subselector) """ - def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): + def __init__(self, selector: Tree, combinator: Token, subselector: Selector): self.selector = selector self.combinator = combinator self.subselector = subselector def __repr__(self) -> str: - return "%s[%r:has(%r)]" % ( - self.__class__.__name__, - self.selector, - self.subselector, - ) + return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]" def canonical(self) -> str: try: - subsel = self.subselector[0].canonical() # type: ignore + subsel = self.subselector[0].canonical() # type: ignore[index] except TypeError: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s:has(%s)" % (self.selector.canonical(), subsel) + return f"{self.selector.canonical()}:has({subsel})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() try: - a2, b2, c2 = self.subselector[-1].specificity() # type: ignore + a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] except TypeError: a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -314,23 +303,18 @@ def __init__(self, selector: Tree, selector_list: Iterable[Tree]): self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:is(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:is(%s)" % ( - self.selector.canonical(), - ", ".join(map(str, selector_arguments)), - ) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:is({args_str})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return max(x.specificity() for x in self.selector_list) @@ -340,28 +324,23 @@ class SpecificityAdjustment: Same as selector:is(selector_list), but its specificity is always 0 """ - def __init__(self, selector: Tree, selector_list: List[Tree]): + def __init__(self, selector: Tree, selector_list: list[Tree]): self.selector = selector self.selector_list = selector_list def __repr__(self) -> str: - return "%s[%r:where(%s)]" % ( - self.__class__.__name__, - self.selector, - ", ".join(map(repr, self.selector_list)), - ) + args_str = ", ".join(repr(s) for s in self.selector_list) + return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]" def canonical(self) -> str: selector_arguments = [] for s in self.selector_list: selarg = s.canonical() selector_arguments.append(selarg.lstrip("*")) - return "%s:where(%s)" % ( - self.selector.canonical(), - ", ".join(map(str, selector_arguments)), - ) + args_str = ", ".join(str(s) for s in selector_arguments) + return f"{self.selector.canonical()}:where({args_str})" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: return 0, 0, 0 @@ -370,33 +349,33 @@ class Attrib: Represents selector[namespace|attrib operator value] """ - @typing.overload + @overload def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, - operator: 'typing.Literal["exists"]', + operator: Literal["exists"], value: None, ) -> None: ... - @typing.overload + @overload def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: "Token", + value: Token, ) -> None: ... def __init__( self, selector: Tree, - namespace: Optional[str], + namespace: str | None, attrib: str, operator: str, - value: Optional["Token"], + value: Token | None, ) -> None: self.selector = selector self.namespace = namespace @@ -405,39 +384,24 @@ def __init__( self.value = value def __repr__(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": - return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) - else: - return "%s[%r[%s %s %r]]" % ( - self.__class__.__name__, - self.selector, - attrib, - self.operator, - typing.cast("Token", self.value).value, - ) + return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]" + assert self.value is not None + return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]" def canonical(self) -> str: - if self.namespace: - attrib = "%s|%s" % (self.namespace, self.attrib) - else: - attrib = self.attrib + attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib if self.operator == "exists": op = attrib else: - op = "%s%s%s" % ( - attrib, - self.operator, - typing.cast("Token", self.value).css(), - ) + assert self.value is not None + op = f"{attrib}{self.operator}{self.value.css()}" - return "%s[%s]" % (self.selector.canonical(), op) + return f"{self.selector.canonical()}[{op}]" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() b += 1 return a, b, c @@ -452,25 +416,24 @@ class Element: """ def __init__( - self, namespace: Optional[str] = None, element: Optional[str] = None + self, namespace: str | None = None, element: str | None = None ) -> None: self.namespace = namespace self.element = element def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self.canonical()) + return f"{self.__class__.__name__}[{self.canonical()}]" def canonical(self) -> str: element = self.element or "*" if self.namespace: - element = "%s|%s" % (self.namespace, element) + element = f"{self.namespace}|{element}" return element - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: if self.element: return 0, 0, 1 - else: - return 0, 0, 0 + return 0, 0, 0 class Hash: @@ -478,17 +441,17 @@ class Hash: Represents selector#id """ - def __init__(self, selector: Tree, id: str) -> None: + def __init__(self, selector: Tree, id: str) -> None: # noqa: A002 self.selector = selector self.id = id def __repr__(self) -> str: - return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) + return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" def canonical(self) -> str: - return "%s#%s" % (self.selector.canonical(), self.id) + return f"{self.selector.canonical()}#{self.id}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a, b, c = self.selector.specificity() a += 1 return a, b, c @@ -502,24 +465,18 @@ def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: self.subselector = subselector def __repr__(self) -> str: - if self.combinator == " ": - comb = "" - else: - comb = self.combinator - return "%s[%r %s %r]" % ( - self.__class__.__name__, - self.selector, - comb, - self.subselector, + comb = "" if self.combinator == " " else self.combinator + return ( + f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]" ) def canonical(self) -> str: subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") - return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) + return f"{self.selector.canonical()} {self.combinator} {subsel}" - def specificity(self) -> Tuple[int, int, int]: + def specificity(self) -> tuple[int, int, int]: a1, b1, c1 = self.selector.specificity() a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -539,7 +496,7 @@ def specificity(self) -> Tuple[int, int, int]: ) -def parse(css: str) -> List[Selector]: +def parse(css: str) -> list[Selector]: """Parse a CSS *group of selectors*. If you don't care about pseudo-elements or selector specificity, @@ -581,7 +538,7 @@ def parse(css: str) -> List[Selector]: # raise -def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: +def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: stream.skip_whitespace() while 1: yield Selector(*parse_selector(stream)) @@ -592,7 +549,7 @@ def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: break -def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: +def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: result, pseudo_element = parse_simple_selector(stream) while 1: stream.skip_whitespace() @@ -601,11 +558,11 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.is_delim("+", ">", "~"): # A combinator - combinator = typing.cast(str, stream.next().value) + combinator = cast("str", stream.next().value) stream.skip_whitespace() else: # By exclusion, the last parse_simple_selector() ended @@ -617,8 +574,8 @@ def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement] def parse_simple_selector( - stream: "TokenStream", inside_negation: bool = False -) -> Tuple[Tree, Optional[PseudoElement]]: + stream: TokenStream, inside_negation: bool = False +) -> tuple[Tree, PseudoElement | None]: stream.skip_whitespace() selector_start = len(stream.used) peek = stream.peek() @@ -637,7 +594,7 @@ def parse_simple_selector( else: element = namespace = None result: Tree = Element(namespace, element) - pseudo_element: Optional[PseudoElement] = None + pseudo_element: PseudoElement | None = None while 1: peek = stream.peek() if ( @@ -648,10 +605,10 @@ def parse_simple_selector( break if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s not at the end of a selector" % pseudo_element + f"Got pseudo-element ::{pseudo_element} not at the end of a selector" ) if peek.type == "HASH": - result = Hash(result, typing.cast(str, stream.next().value)) + result = Hash(result, cast("str", stream.next().value)) elif peek == ("DELIM", "."): stream.next() result = Class(result, stream.next_ident()) @@ -680,21 +637,20 @@ def parse_simple_selector( continue if stream.peek() != ("DELIM", "("): result = Pseudo(result, ident) - if repr(result) == "Pseudo[Element[*]:scope]": - if not ( - len(stream.used) == 2 - or (len(stream.used) == 3 and stream.used[0].type == "S") - or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) - or ( - len(stream.used) >= 4 - and stream.used[-3].type == "S" - and stream.used[-4].is_delim(",") - ) - ): - raise SelectorSyntaxError( - 'Got immediate child pseudo-element ":scope" ' - "not at the start of a selector" - ) + if repr(result) == "Pseudo[Element[*]:scope]" and not ( + len(stream.used) == 2 + or (len(stream.used) == 3 and stream.used[0].type == "S") + or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) + or ( + len(stream.used) >= 4 + and stream.used[-3].type == "S" + and stream.used[-4].is_delim(",") + ) + ): + raise SelectorSyntaxError( + 'Got immediate child pseudo-element ":scope" ' + "not at the start of a selector" + ) continue stream.next() stream.skip_whitespace() @@ -704,14 +660,13 @@ def parse_simple_selector( argument, argument_pseudo_element = parse_simple_selector( stream, inside_negation=True ) - next = stream.next() + next_ = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside :not() at %s" - % (argument_pseudo_element, next.pos) + f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next_.pos}" ) - if next != ("DELIM", ")"): - raise SelectorSyntaxError("Expected ')', got %s" % (next,)) + if next_ != ("DELIM", ")"): + raise SelectorSyntaxError(f"Expected ')', got {next_}") result = Negation(result, argument) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) @@ -726,83 +681,83 @@ def parse_simple_selector( else: result = Function(result, ident, parse_arguments(stream)) else: - raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) + raise SelectorSyntaxError(f"Expected selector, got {peek}") if len(stream.used) == selector_start: - raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) + raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}") return result, pseudo_element -def parse_arguments(stream: "TokenStream") -> List["Token"]: - arguments: List["Token"] = [] +def parse_arguments(stream: TokenStream) -> list[Token]: # noqa: RET503 + arguments: list[Token] = [] while 1: stream.skip_whitespace() - next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + next_ = stream.next() + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "+"), ("DELIM", "-"), ]: - arguments.append(next) - elif next == ("DELIM", ")"): + arguments.append(next_) + elif next_ == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next_}") -def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: # noqa: RET503 stream.skip_whitespace() subselector = "" - next = stream.next() + next_ = stream.next() - if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - combinator = next + if next_ in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next_ stream.skip_whitespace() - next = stream.next() + next_ = stream.next() else: combinator = Token("DELIM", " ", pos=0) while 1: - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "."), ("DELIM", "*"), ]: - subselector += typing.cast(str, next.value) - elif next == ("DELIM", ")"): + subselector += cast("str", next_.value) + elif next_ == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) - next = stream.next() + raise SelectorSyntaxError(f"Expected an argument, got {next_}") + next_ = stream.next() -def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: +def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: arguments = [] while 1: result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - "Got pseudo-element ::%s inside function" % (pseudo_element,) + f"Got pseudo-element ::{pseudo_element} inside function" ) stream.skip_whitespace() - next = stream.next() - if next in (("EOF", None), ("DELIM", ",")): + next_ = stream.next() + if next_ in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ("DELIM", ")"): + elif next_ == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError(f"Expected an argument, got {next_}") return arguments -def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: +def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: stream.skip_whitespace() attrib = stream.next_ident_or_star() if attrib is None and stream.peek() != ("DELIM", "|"): - raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) - namespace: Optional[str] - op: Optional[str] + raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}") + namespace: str | None + op: str | None if stream.peek() == ("DELIM", "|"): stream.next() if stream.peek() == ("DELIM", "="): @@ -817,30 +772,30 @@ def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: namespace = op = None if op is None: stream.skip_whitespace() - next = stream.next() - if next == ("DELIM", "]"): - return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) - elif next == ("DELIM", "="): + next_ = stream.next() + if next_ == ("DELIM", "]"): + return Attrib(selector, namespace, cast("str", attrib), "exists", None) + if next_ == ("DELIM", "="): op = "=" - elif next.is_delim("^", "$", "*", "~", "|", "!") and ( + elif next_.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = typing.cast(str, next.value) + "=" + op = cast("str", next_.value) + "=" stream.next() else: - raise SelectorSyntaxError("Operator expected, got %s" % (next,)) + raise SelectorSyntaxError(f"Operator expected, got {next_}") stream.skip_whitespace() value = stream.next() if value.type not in ("IDENT", "STRING"): - raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) + raise SelectorSyntaxError(f"Expected string or ident, got {value}") stream.skip_whitespace() - next = stream.next() - if next != ("DELIM", "]"): - raise SelectorSyntaxError("Expected ']', got %s" % (next,)) - return Attrib(selector, namespace, typing.cast(str, attrib), op, value) + next_ = stream.next() + if next_ != ("DELIM", "]"): + raise SelectorSyntaxError(f"Expected ']', got {next_}") + return Attrib(selector, namespace, cast("str", attrib), op, value) -def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: +def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: """ Parses the arguments for :nth-child() and friends. @@ -851,12 +806,12 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: for token in tokens: if token.type == "STRING": raise ValueError("String tokens not allowed in series.") - s = "".join(typing.cast(str, token.value) for token in tokens).strip() + s = "".join(cast("str", token.value) for token in tokens).strip() if s == "odd": return 2, 1 - elif s == "even": + if s == "even": return 2, 0 - elif s == "n": + if s == "n": return 1, 0 if "n" not in s: # Just b @@ -865,42 +820,36 @@ def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: a_as_int: int if not a: a_as_int = 1 - elif a == "-" or a == "+": + elif a in {"-", "+"}: a_as_int = int(a + "1") else: a_as_int = int(a) - b_as_int: int - if not b: - b_as_int = 0 - else: - b_as_int = int(b) + b_as_int = int(b) if b else 0 return a_as_int, b_as_int #### Token objects -class Token(Tuple[str, Optional[str]]): - @typing.overload +class Token(tuple[str, str | None]): # noqa: SLOT001 + @overload def __new__( cls, - type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', + type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], value: str, pos: int, - ) -> "Token": ... + ) -> Self: ... - @typing.overload - def __new__( - cls, type_: 'typing.Literal["EOF"]', value: None, pos: int - ) -> "Token": ... + @overload + def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... - def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": + def __new__(cls, type_: str, value: str | None, pos: int) -> Self: obj = tuple.__new__(cls, (type_, value)) obj.pos = pos return obj def __repr__(self) -> str: - return "<%s '%s' at %i>" % (self.type, self.value, self.pos) + return f"<{self.type} '{self.value}' at {self.pos}>" def is_delim(self, *values: str) -> bool: return self.type == "DELIM" and self.value in values @@ -912,22 +861,21 @@ def type(self) -> str: return self[0] @property - def value(self) -> Optional[str]: + def value(self) -> str | None: return self[1] def css(self) -> str: if self.type == "STRING": return repr(self.value) - else: - return typing.cast(str, self.value) + return cast("str", self.value) class EOFToken(Token): - def __new__(cls, pos: int) -> "EOFToken": - return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) + def __new__(cls, pos: int) -> Self: + return Token.__new__(cls, "EOF", None, pos) def __repr__(self) -> str: - return "<%s at %i>" % (self.type, self.pos) + return f"<{self.type} at {self.pos}>" #### Tokenizer @@ -938,19 +886,17 @@ class TokenMacros: escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape nonascii = r"[^\0-\177]" - nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) - nmstart = "[_a-z]|%s|%s" % (escape, nonascii) - + nmchar = f"[_a-z0-9-]|{escape}|{nonascii}" + nmstart = f"[_a-z]|{escape}|{nonascii}" -if typing.TYPE_CHECKING: - class MatchFunc(typing.Protocol): - def __call__( - self, string: str, pos: int = ..., endpos: int = ... - ) -> Optional["re.Match[str]"]: ... +class MatchFunc(Protocol): + def __call__( + self, string: str, pos: int = ..., endpos: int = ... + ) -> re.Match[str] | None: ... -def _compile(pattern: str) -> "MatchFunc": +def _compile(pattern: str) -> MatchFunc: return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match @@ -964,14 +910,14 @@ def _compile(pattern: str) -> "MatchFunc": } _sub_simple_escape = re.compile(r"\\(.)").sub -_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub +_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub # Same as r'\1', but faster on CPython _replace_simple = operator.methodcaller("group", 1) -def _replace_unicode(match: "re.Match[str]") -> str: +def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD @@ -980,8 +926,7 @@ def _replace_unicode(match: "re.Match[str]") -> str: def unescape_ident(value: str) -> str: value = _sub_unicode_escape(_replace_unicode, value) - value = _sub_simple_escape(_replace_simple, value) - return value + return _sub_simple_escape(_replace_simple, value) def tokenize(s: str) -> Iterator[Token]: @@ -1019,9 +964,9 @@ def tokenize(s: str) -> Iterator[Token]: assert match, "Should have found at least an empty match" end_pos = match.end() if end_pos == len_s: - raise SelectorSyntaxError("Unclosed string at %s" % pos) + raise SelectorSyntaxError(f"Unclosed string at {pos}") if s[end_pos] != quote: - raise SelectorSyntaxError("Invalid string at %s" % pos) + raise SelectorSyntaxError(f"Invalid string at {pos}") value = _sub_simple_escape( _replace_simple, _sub_unicode_escape( @@ -1056,44 +1001,44 @@ def tokenize(s: str) -> Iterator[Token]: class TokenStream: - def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: - self.used: List[Token] = [] + def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: + self.used: list[Token] = [] self.tokens = iter(tokens) self.source = source - self.peeked: Optional[Token] = None + self.peeked: Token | None = None self._peeking = False self.next_token = self.tokens.__next__ def next(self) -> Token: if self._peeking: self._peeking = False - self.used.append(typing.cast(Token, self.peeked)) - return typing.cast(Token, self.peeked) - else: - next = self.next_token() - self.used.append(next) - return next + assert self.peeked is not None + self.used.append(self.peeked) + return self.peeked + next_ = self.next_token() + self.used.append(next_) + return next_ def peek(self) -> Token: if not self._peeking: self.peeked = self.next_token() self._peeking = True - return typing.cast(Token, self.peeked) + assert self.peeked is not None + return self.peeked def next_ident(self) -> str: - next = self.next() - if next.type != "IDENT": - raise SelectorSyntaxError("Expected ident, got %s" % (next,)) - return typing.cast(str, next.value) - - def next_ident_or_star(self) -> Optional[str]: - next = self.next() - if next.type == "IDENT": - return next.value - elif next == ("DELIM", "*"): + next_ = self.next() + if next_.type != "IDENT": + raise SelectorSyntaxError(f"Expected ident, got {next_}") + return cast("str", next_.value) + + def next_ident_or_star(self) -> str | None: + next_ = self.next() + if next_.type == "IDENT": + return next_.value + if next_ == ("DELIM", "*"): return None - else: - raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) + raise SelectorSyntaxError(f"Expected ident or '*', got {next_}") def skip_whitespace(self) -> None: peek = self.peek() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 4255f66..96eac3f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -1,21 +1,20 @@ -# -*- coding: utf-8 -*- """ - cssselect.xpath - =============== +cssselect.xpath +=============== - Translation of parsed CSS selectors to XPath expressions. +Translation of parsed CSS selectors to XPath expressions. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import re -import typing -import warnings -from typing import Optional +from typing import TYPE_CHECKING, cast from cssselect.parser import ( Attrib, @@ -37,16 +36,11 @@ parse_series, ) +if TYPE_CHECKING: + from collections.abc import Callable -@typing.no_type_check -def _unicode_safe_getattr(obj, name, default=None): - warnings.warn( - "_unicode_safe_getattr is deprecated and will be removed in the" - " next release, use getattr() instead", - DeprecationWarning, - stacklevel=2, - ) - return getattr(obj, name, default) + # typing.Self requires Python 3.11 + from typing_extensions import Self class ExpressionError(SelectorError, RuntimeError): @@ -71,15 +65,15 @@ def __init__( def __str__(self) -> str: path = str(self.path) + str(self.element) if self.condition: - path += "[%s]" % self.condition + path += f"[{self.condition}]" return path def __repr__(self) -> str: - return "%s[%s]" % (self.__class__.__name__, self) + return f"{self.__class__.__name__}[{self}]" - def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": + def add_condition(self, condition: str, conjuction: str = "and") -> Self: if self.condition: - self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) + self.condition = f"({self.condition}) {conjuction} ({condition})" else: self.condition = condition return self @@ -88,9 +82,7 @@ def add_name_test(self) -> None: if self.element == "*": # We weren't doing a test anyway return - self.add_condition( - "name() = %s" % GenericTranslator.xpath_literal(self.element) - ) + self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}") self.element = "*" def add_star_prefix(self) -> None: @@ -103,10 +95,10 @@ def add_star_prefix(self) -> None: def join( self, combiner: str, - other: "XPathExpr", - closing_combiner: Optional[str] = None, + other: XPathExpr, + closing_combiner: str | None = None, has_inner_condition: bool = False, - ) -> "XPathExpr": + ) -> Self: path = str(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": @@ -260,7 +252,7 @@ def selector_to_xpath( """ tree = getattr(selector, "parsed_tree", None) if not tree: - raise TypeError("Expected a parsed selector, got %r" % (selector,)) + raise TypeError(f"Expected a parsed selector, got {selector!r}") xpath = self.xpath(tree) assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' if translate_pseudo_elements and selector.pseudo_element: @@ -282,58 +274,61 @@ def xpath_pseudo_element( def xpath_literal(s: str) -> str: s = str(s) if "'" not in s: - s = "'%s'" % s + s = f"'{s}'" elif '"' not in s: - s = '"%s"' % s + s = f'"{s}"' else: - s = "concat(%s)" % ",".join( - [ - (("'" in part) and '"%s"' or "'%s'") % part - for part in split_at_single_quotes(s) - if part - ] - ) + parts_quoted = [ + f'"{part}"' if "'" in part else f"'{part}'" + for part in split_at_single_quotes(s) + if part + ] + s = "concat({})".format(",".join(parts_quoted)) return s def xpath(self, parsed_selector: Tree) -> XPathExpr: """Translate any parsed selector object.""" type_name = type(parsed_selector).__name__ - method = getattr(self, "xpath_%s" % type_name.lower(), None) + method = cast( + "Callable[[Tree], XPathExpr] | None", + getattr(self, f"xpath_{type_name.lower()}", None), + ) if method is None: - raise ExpressionError("%s is not supported." % type_name) - return typing.cast(XPathExpr, method(parsed_selector)) + raise ExpressionError(f"{type_name} is not supported.") + return method(parsed_selector) # Dispatched by parsed object type def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: """Translate a combined selector.""" combinator = self.combinator_mapping[combined.combinator] - method = getattr(self, "xpath_%s_combinator" % combinator) - return typing.cast( - XPathExpr, - method(self.xpath(combined.selector), self.xpath(combined.subselector)), + method = cast( + "Callable[[XPathExpr, XPathExpr], XPathExpr]", + getattr(self, f"xpath_{combinator}_combinator"), ) + return method(self.xpath(combined.selector), self.xpath(combined.subselector)) def xpath_negation(self, negation: Negation) -> XPathExpr: xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) sub_xpath.add_name_test() if sub_xpath.condition: - return xpath.add_condition("not(%s)" % sub_xpath.condition) - else: - return xpath.add_condition("0") + return xpath.add_condition(f"not({sub_xpath.condition})") + return xpath.add_condition("0") def xpath_relation(self, relation: Relation) -> XPathExpr: xpath = self.xpath(relation.selector) combinator = relation.combinator subselector = relation.subselector right = self.xpath(subselector.parsed_tree) - method = getattr( - self, - "xpath_relation_%s_combinator" - % self.combinator_mapping[typing.cast(str, combinator.value)], + method = cast( + "Callable[[XPathExpr, XPathExpr], XPathExpr]", + getattr( + self, + f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator", + ), ) - return typing.cast(XPathExpr, method(xpath, right)) + return method(xpath, right) def xpath_matching(self, matching: Matching) -> XPathExpr: xpath = self.xpath(matching.selector) @@ -355,46 +350,53 @@ def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathE def xpath_function(self, function: Function) -> XPathExpr: """Translate a functional pseudo-class.""" - method_name = "xpath_%s_function" % function.name.replace("-", "_") - method = getattr(self, method_name, None) + method_name = "xpath_{}_function".format(function.name.replace("-", "_")) + method = cast( + "Callable[[XPathExpr, Function], XPathExpr] | None", + getattr(self, method_name, None), + ) if not method: - raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) - return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) + raise ExpressionError(f"The pseudo-class :{function.name}() is unknown") + return method(self.xpath(function.selector), function) def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: """Translate a pseudo-class.""" - method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") - method = getattr(self, method_name, None) + method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_")) + method = cast( + "Callable[[XPathExpr], XPathExpr] | None", + getattr(self, method_name, None), + ) if not method: # TODO: better error message for pseudo-elements? - raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) - return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) + raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown") + return method(self.xpath(pseudo.selector)) def xpath_attrib(self, selector: Attrib) -> XPathExpr: """Translate an attribute selector.""" operator = self.attribute_operator_mapping[selector.operator] - method = getattr(self, "xpath_attrib_%s" % operator) + method = cast( + "Callable[[XPathExpr, str, str | None], XPathExpr]", + getattr(self, f"xpath_attrib_{operator}"), + ) if self.lower_case_attribute_names: name = selector.attrib.lower() else: name = selector.attrib safe = is_safe_name(name) if selector.namespace: - name = "%s:%s" % (selector.namespace, name) + name = f"{selector.namespace}:{name}" safe = safe and is_safe_name(selector.namespace) if safe: attrib = "@" + name else: - attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) + attrib = f"attribute::*[name() = {self.xpath_literal(name)}]" if selector.value is None: value = None elif self.lower_case_attribute_values: - value = typing.cast(str, selector.value.value).lower() + value = cast("str", selector.value.value).lower() else: value = selector.value.value - return typing.cast( - XPathExpr, method(self.xpath(selector.selector), attrib, value) - ) + return method(self.xpath(selector.selector), attrib, value) def xpath_class(self, class_selector: Class) -> XPathExpr: """Translate a class selector.""" @@ -420,7 +422,7 @@ def xpath_element(self, selector: Element) -> XPathExpr: if selector.namespace: # Namespace prefixes are case-sensitive. # http://www.w3.org/TR/css3-namespace/#prefixes - element = "%s:%s" % (selector.namespace, element) + element = f"{selector.namespace}:{element}" safe = safe and bool(is_safe_name(selector.namespace)) xpath = self.xpathexpr_cls(element=element) if not safe: @@ -471,12 +473,9 @@ def xpath_relation_direct_adjacent_combinator( self, left: XPathExpr, right: XPathExpr ) -> XPathExpr: """right is a sibling immediately after left; select left""" - xpath = left.add_condition( - "following-sibling::*[(name() = '{}') and (position() = 1)]".format( - right.element - ) + return left.add_condition( + f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]" ) - return xpath def xpath_relation_indirect_adjacent_combinator( self, left: XPathExpr, right: XPathExpr @@ -495,8 +494,8 @@ def xpath_nth_child_function( ) -> XPathExpr: try: a, b = parse_series(function.arguments) - except ValueError: - raise ExpressionError("Invalid series: '%r'" % function.arguments) + except ValueError as ex: + raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # @@ -558,23 +557,20 @@ def xpath_nth_child_function( # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: - nodetest = "*" - else: - nodetest = "%s" % xpath.element + nodetest = "*" if add_name_test else f"{xpath.element}" # count siblings before or after the element if not last: - siblings_count = "count(preceding-sibling::%s)" % nodetest + siblings_count = f"count(preceding-sibling::{nodetest})" else: - siblings_count = "count(following-sibling::%s)" % nodetest + siblings_count = f"count(following-sibling::{nodetest})" # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if a == 0: - return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) + return xpath.add_condition(f"{siblings_count} = {b_min_1}") expressions = [] @@ -583,12 +579,12 @@ def xpath_nth_child_function( # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if b_min_1 > 0: - expressions.append("%s >= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} >= {b_min_1}") else: # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, - expressions.append("%s <= %s" % (siblings_count, b_min_1)) + expressions.append(f"{siblings_count} <= {b_min_1}") # operations modulo 1 or -1 are simpler, one only needs to verify: # @@ -611,15 +607,11 @@ def xpath_nth_child_function( b_neg = (-b_min_1) % abs(a) if b_neg != 0: - b_neg_as_str = "+%s" % b_neg - left = "(%s %s)" % (left, b_neg_as_str) + left = f"({left} +{b_neg})" - expressions.append("%s mod %s = 0" % (left, a)) + expressions.append(f"{left} mod {a} = 0") - if len(expressions) > 1: - template = "(%s)" - else: - template = "%s" + template = "(%s)" if len(expressions) > 1 else "%s" xpath.add_condition( " and ".join(template % expression for expression in expressions) ) @@ -653,20 +645,18 @@ def xpath_contains_function( # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" - % function.arguments + f"Expected a single string or ident for :contains(), got {function.arguments!r}" ) - value = typing.cast(str, function.arguments[0].value) - return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) + value = cast("str", function.arguments[0].value) + return xpath.add_condition(f"contains(., {self.xpath_literal(value)})") def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) - value = typing.cast(str, function.arguments[0].value) - return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) + value = cast("str", function.arguments[0].value) + return xpath.add_condition(f"lang({self.xpath_literal(value)})") # Pseudo: dispatch by pseudo-class name @@ -690,12 +680,12 @@ def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:first-of-type is not implemented") - return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0") def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:last-of-type is not implemented") - return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) + return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0") def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("count(parent::*/child::*) = 1") @@ -703,7 +693,7 @@ def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: if xpath.element == "*": raise ExpressionError("*:only-of-type is not implemented") - return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) + return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1") def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition("not(*) and not(string-length())") @@ -725,92 +715,84 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: # Attrib: dispatch by attribute operator def xpath_attrib_exists( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert not value xpath.add_condition(name) return xpath def xpath_attrib_equals( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None - xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} = {self.xpath_literal(value)}") return xpath def xpath_attrib_different( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None # FIXME: this seems like a weird hack... if value: - xpath.add_condition( - "not(%s) or %s != %s" % (name, name, self.xpath_literal(value)) - ) + xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}") else: - xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) + xpath.add_condition(f"{name} != {self.xpath_literal(value)}") return xpath def xpath_attrib_includes( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value and is_non_whitespace(value): + arg = self.xpath_literal(" " + value + " ") xpath.add_condition( - "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" - % (name, name, self.xpath_literal(" " + value + " ")) + f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})" ) else: xpath.add_condition("0") return xpath def xpath_attrib_dashmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: assert value is not None + arg = self.xpath_literal(value) + arg_dash = self.xpath_literal(value + "-") # Weird, but true... xpath.add_condition( - "%s and (%s = %s or starts-with(%s, %s))" - % ( - name, - name, - self.xpath_literal(value), - name, - self.xpath_literal(value + "-"), - ) + f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))" ) return xpath def xpath_attrib_prefixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: xpath.add_condition( - "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and starts-with({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") return xpath def xpath_attrib_suffixmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Oddly there is a starts-with in XPath 1.0, but not ends-with xpath.add_condition( - "%s and substring(%s, string-length(%s)-%s) = %s" - % (name, name, name, len(value) - 1, self.xpath_literal(value)) + f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}" ) else: xpath.add_condition("0") return xpath def xpath_attrib_substringmatch( - self, xpath: XPathExpr, name: str, value: Optional[str] + self, xpath: XPathExpr, name: str, value: str | None ) -> XPathExpr: if value: # Attribute selectors are case sensitive xpath.add_condition( - "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) + f"{name} and contains({name}, {self.xpath_literal(value)})" ) else: xpath.add_condition("0") @@ -843,7 +825,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -855,21 +837,20 @@ def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: if function.argument_types() not in (["STRING"], ["IDENT"]): raise ExpressionError( - "Expected a single string or ident for :lang(), got %r" - % function.arguments + f"Expected a single string or ident for :lang(), got {function.arguments!r}" ) value = function.arguments[0].value assert value + arg = self.xpath_literal(value.lower() + "-") return xpath.add_condition( "ancestor-or-self::*[@lang][1][starts-with(concat(" # XPath 1.0 has no lower-case function... - "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " + f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " "'abcdefghijklmnopqrstuvwxyz'), " - "'-'), %s)]" - % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) + f"'-'), {arg})]" ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -877,7 +858,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -907,7 +888,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/docs/conf.py b/docs/conf.py index aa5ae22..da3f023 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # cssselect documentation build configuration file, created by # sphinx-quickstart on Tue Mar 27 14:20:34 2012. @@ -12,9 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import os import re -import sys +from pathlib import Path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -34,7 +32,7 @@ templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = ".rst" +source_suffix = {".rst": "restructuredtext"} # The encoding of source files. # source_encoding = 'utf-8-sig' @@ -44,17 +42,14 @@ # General information about the project. project = "cssselect" -copyright = "2012-2017, Simon Sapin, Scrapy developers" +project_copyright = "2012-2017, Simon Sapin, Scrapy developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. -with open( - os.path.join(os.path.dirname(__file__), "..", "cssselect", "__init__.py") -) as init_file: - init_py = init_file.read() +init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text() release = re.search('VERSION = "([^"]+)"', init_py).group(1) # The short X.Y version. version = release.rstrip("dev") @@ -258,5 +253,5 @@ nitpicky = True nitpick_ignore = [ # explicitly not a part of the public API - ("py:class", "cssselect.parser.Token"), + ("py:class", "Token"), ] diff --git a/docs/requirements.txt b/docs/requirements.txt index d5476d8..21cb2eb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx==7.2.6 -sphinx-rtd-theme==2.0.0 +sphinx==8.2.3 +sphinx-rtd-theme==3.0.2 diff --git a/pylintrc b/pylintrc deleted file mode 100644 index e35425e..0000000 --- a/pylintrc +++ /dev/null @@ -1,32 +0,0 @@ -[MASTER] -persistent=no - -[MESSAGES CONTROL] -disable=assignment-from-no-return, - c-extension-no-member, - consider-using-f-string, - consider-using-in, - fixme, - inconsistent-return-statements, - invalid-name, - missing-class-docstring, - missing-function-docstring, - missing-module-docstring, - multiple-imports, - no-else-return, - no-member, - raise-missing-from, - redefined-builtin, - redefined-outer-name, - too-few-public-methods, - too-many-arguments, - too-many-branches, - too-many-function-args, - too-many-lines, - too-many-public-methods, - too-many-statements, - undefined-variable, - unidiomatic-typecheck, - unspecified-encoding, - unused-argument, - unused-import, diff --git a/pyproject.toml b/pyproject.toml index 261fe3e..c7c54a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,239 @@ -[tool.isort] -profile = "black" -multi_line_output = 3 +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling>=1.27.0"] + +[project] +name = "cssselect" +license = "BSD-3-Clause" +license-files = ["LICENSE", "AUTHORS"] +description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +readme = "README.rst" +authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }] +maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[project.urls] +"Homepage" = "https://github.com/scrapy/cssselect" + +[tool.hatch.version] +path = "cssselect/__init__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/cssselect", + "/docs", + "/tests", + "/CHANGES", + "/README.rst", + "/tox.ini", +] +exclude = [ + "/docs/_build", +] + +[tool.hatch.build.targets.wheel] +packages = ["cssselect"] + +[tool.bumpversion] +current_version = "1.4.0" +commit = true +tag = true + +[[tool.bumpversion.files]] +filename = "cssselect/__init__.py" + +[[tool.bumpversion.files]] +filename = "CHANGES" +search = "^Unreleased\\.$" +replace = "Released on {now:%Y-%m-%d}." +regex = true + +[tool.coverage.run] +branch = true +source = ["cssselect"] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if sys.version_info", + "if __name__ == '__main__':", +] [tool.mypy] -check_untyped_defs = true -ignore_missing_imports = true -no_warn_no_return = true +strict = true + +[tool.pylint.MASTER] +persistent = "no" +extension-pkg-allow-list = ["lxml"] + +[tool.pylint."MESSAGES CONTROL"] +enable = [ + "useless-suppression", +] +disable = [ + "consider-using-f-string", + "fixme", + "invalid-name", + "line-too-long", + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "no-member", + "not-callable", + "redefined-builtin", + "redefined-outer-name", + "too-few-public-methods", + "too-many-arguments", + "too-many-branches", + "too-many-function-args", + "too-many-lines", + "too-many-locals", + "too-many-positional-arguments", + "too-many-public-methods", + "too-many-statements", + "unused-argument", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] + +[tool.ruff.lint] +extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # flake8-commas + "COM", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-pytest-style + "PT", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Trailing comma missing + "COM812", + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # First word of the first line should be properly capitalized + "D403", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", +] + +[tool.ruff.lint.isort] +split-on-trailing-comma = false -[tool.black] -target-version = ["py38", "py39", "py310", "py311", "py312"] \ No newline at end of file +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b8c93b1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,13 +0,0 @@ -[build_sphinx] -source-dir = docs -build-dir = docs/_build -#all_files = 1 - -[upload_sphinx] # Sphinx-PyPI-upload -upload-dir = docs/_build/html - -[tool:pytest] -testpaths = tests - -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py deleted file mode 100644 index f01a174..0000000 --- a/setup.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- - -import os.path -import re - -from setuptools import setup - -ROOT = os.path.dirname(__file__) -with open(os.path.join(ROOT, "README.rst")) as readme_file: - README = readme_file.read() -with open(os.path.join(ROOT, "cssselect", "__init__.py")) as init_file: - INIT_PY = init_file.read() -VERSION = re.search('VERSION = "([^"]+)"', INIT_PY).group(1) - - -setup( - name="cssselect", - version=VERSION, - author="Ian Bicking", - author_email="ianb@colorstudy.com", - maintainer="Paul Tremberth", - maintainer_email="paul.tremberth@gmail.com", - description="cssselect parses CSS3 Selectors and translates them to XPath 1.0", - long_description=README, - url="https://github.com/scrapy/cssselect", - license="BSD", - packages=["cssselect"], - test_suite="cssselect.tests", - package_data={ - "cssselect": ["py.typed"], - }, - include_package_data=True, - python_requires=">=3.8", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - ], -) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 32c1683..dc67bb7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -1,27 +1,29 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ - Tests for cssselect - =================== +Tests for cssselect +=================== - These tests can be run either by py.test or by the standard library's - unittest. They use plain ``assert`` statements and do little reporting - themselves in case of failure. +These tests can be run either by py.test or by the standard library's +unittest. They use plain ``assert`` statements and do little reporting +themselves in case of failure. - Use py.test to get fancy error reporting and assert introspection. +Use py.test to get fancy error reporting and assert introspection. - :copyright: (c) 2007-2012 Ian Bicking and contributors. - See AUTHORS for more details. - :license: BSD, see LICENSE for more details. +:copyright: (c) 2007-2012 Ian Bicking and contributors. +See AUTHORS for more details. +:license: BSD, see LICENSE for more details. """ +from __future__ import annotations + import sys import typing import unittest -from typing import List, Optional, Sequence, Tuple +from typing import TYPE_CHECKING +import pytest from lxml import etree, html from cssselect import ( @@ -41,6 +43,9 @@ ) from cssselect.xpath import XPathExpr +if TYPE_CHECKING: + from collections.abc import Sequence + class TestCssselect(unittest.TestCase): def test_tokenizer(self) -> None: @@ -70,16 +75,13 @@ def test_tokenizer(self) -> None: ] def test_parser(self) -> None: - def repr_parse(css: str) -> List[str]: + def repr_parse(css: str) -> list[str]: selectors = parse(css) for selector in selectors: assert selector.pseudo_element is None - return [ - repr(selector.parsed_tree).replace("(u'", "('") - for selector in selectors - ] + return [repr(selector.parsed_tree) for selector in selectors] - def parse_many(first: str, *others: str) -> List[str]: + def parse_many(first: str, *others: str) -> list[str]: result = repr_parse(first) for other in others: assert repr_parse(other) == result @@ -185,18 +187,18 @@ def parse_many(first: str, *others: str) -> List[str]: ] def test_pseudo_elements(self) -> None: - def parse_pseudo(css: str) -> List[Tuple[str, Optional[str]]]: - result: List[Tuple[str, Optional[str]]] = [] + def parse_pseudo(css: str) -> list[tuple[str, str | None]]: + result: list[tuple[str, str | None]] = [] for selector in parse(css): pseudo = selector.pseudo_element pseudo = str(pseudo) if pseudo else pseudo # No Symbol here - assert pseudo is None or type(pseudo) is str - selector_as_str = repr(selector.parsed_tree).replace("(u'", "('") + assert pseudo is None or isinstance(pseudo, str) + selector_as_str = repr(selector.parsed_tree) result.append((selector_as_str, pseudo)) return result - def parse_one(css: str) -> Tuple[str, Optional[str]]: + def parse_one(css: str) -> tuple[str, str | None]: result = parse_pseudo(css) assert len(result) == 1 return result[0] @@ -257,7 +259,7 @@ def test_pseudo_repr(css: str) -> str: # Special cases for CSS 2.1 pseudo-elements are ignored by default for pseudo in ("after", "before", "first-line", "first-letter"): - (selector,) = parse("e:%s" % pseudo) + (selector,) = parse(f"e:{pseudo}") assert selector.pseudo_element == pseudo assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e" @@ -267,12 +269,8 @@ def test_pseudo_repr(css: str) -> str: (selector,) = parse("e::foo") assert selector.pseudo_element == "foo" assert tr.selector_to_xpath(selector, prefix="") == "e" - self.assertRaises( - ExpressionError, - tr.selector_to_xpath, - selector, - translate_pseudo_elements=True, - ) + with pytest.raises(ExpressionError): + tr.selector_to_xpath(selector, translate_pseudo_elements=True) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() @@ -280,7 +278,7 @@ def test_pseudo_repr(css: str) -> str: assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]" def test_specificity(self) -> None: - def specificity(css: str) -> Tuple[int, int, int]: + def specificity(css: str) -> tuple[int, int, int]: selectors = parse(css) assert len(selectors) == 1 return selectors[0].specificity() @@ -326,7 +324,7 @@ def specificity(css: str) -> Tuple[int, int, int]: ) def test_css_export(self) -> None: - def css2css(css: str, res: Optional[str] = None) -> None: + def css2css(css: str, res: str | None = None) -> None: selectors = parse(css) assert len(selectors) == 1 assert selectors[0].canonical() == (res or css) @@ -365,12 +363,11 @@ def css2css(css: str, res: Optional[str] = None) -> None: css2css("foo > *") def test_parse_errors(self) -> None: - def get_error(css: str) -> Optional[str]: + def get_error(css: str) -> str | None: try: parse(css) except SelectorSyntaxError: - # Py2, Py3, ... - return str(sys.exc_info()[1]).replace("(u'", "('") + return str(sys.exc_info()[1]) return None assert get_error("attributes(href)/html/body/a") == ( @@ -452,7 +449,7 @@ def xpath(css: str) -> str: assert xpath("e[foo|bar]") == "e[@foo:bar]" assert xpath('e[foo="bar"]') == "e[@foo = 'bar']" assert xpath('e[foo~="bar"]') == ( - "e[@foo and contains(" "concat(' ', normalize-space(@foo), ' '), ' bar ')]" + "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]" ) assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]") assert xpath('e[foo$="bar"]') == ( @@ -460,7 +457,7 @@ def xpath(css: str) -> str: ) assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]") assert xpath('e[hreflang|="en"]') == ( - "e[@hreflang and (" "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" + "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]" ) # --- nth-* and nth-last-* ------------------------------------- @@ -567,19 +564,32 @@ def xpath(css: str) -> str: assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") - self.assertRaises(ExpressionError, xpath, ":fİrst-child") - self.assertRaises(ExpressionError, xpath, ":first-of-type") - self.assertRaises(ExpressionError, xpath, ":only-of-type") - self.assertRaises(ExpressionError, xpath, ":last-of-type") - self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") - self.assertRaises(ExpressionError, xpath, ":after") - self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") - self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") - self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") - self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") + with pytest.raises(ExpressionError): + xpath(":fİrst-child") + with pytest.raises(ExpressionError): + xpath(":first-of-type") + with pytest.raises(ExpressionError): + xpath(":only-of-type") + with pytest.raises(ExpressionError): + xpath(":last-of-type") + with pytest.raises(ExpressionError): + xpath(":nth-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-last-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-child(n-)") + with pytest.raises(ExpressionError): + xpath(":after") + with pytest.raises(ExpressionError): + xpath(":lorem-ipsum") + with pytest.raises(ExpressionError): + xpath(":lorem(ipsum)") + with pytest.raises(ExpressionError): + xpath("::lorem-ipsum") + with pytest.raises(TypeError): + GenericTranslator().css_to_xpath(4) # type: ignore[arg-type] + with pytest.raises(TypeError): + GenericTranslator().selector_to_xpath("foo") # type: ignore[arg-type] def test_unicode(self) -> None: css = ".a\xc1b" @@ -631,24 +641,23 @@ def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): - method_name = "xpath_%s_functional_pseudo_element" % ( + method_name = "xpath_{}_functional_pseudo_element".format( pseudo_element.name.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The functional pseudo-element ::%s() is unknown" - % pseudo_element.name + f"The functional pseudo-element ::{pseudo_element.name}() is unknown" ) xpath = method(xpath, pseudo_element.arguments) else: - method_name = "xpath_%s_simple_pseudo_element" % ( + method_name = "xpath_{}_simple_pseudo_element".format( pseudo_element.replace("-", "_") ) method = getattr(self, method_name, None) if not method: raise ExpressionError( - "The pseudo-element ::%s is unknown" % pseudo_element + f"The pseudo-element ::{pseudo_element} is unknown" ) xpath = method(xpath) return xpath @@ -660,7 +669,7 @@ def xpath_nb_attr_function( ) -> XPathExpr: assert function.arguments[0].value nb_attributes = int(function.arguments[0].value) - return xpath.add_condition("count(@*)=%d" % nb_attributes) + return xpath.add_condition(f"count(@*)={nb_attributes}") # pseudo-class: # elements that have 5 attributes @@ -674,7 +683,7 @@ def xpath_attr_functional_pseudo_element( ) -> XPathExpr: attribute_name = arguments[0].value other = XPathExpr( - "@%s" % attribute_name, + f"@{attribute_name}", "", ) return xpath.join("/", other) @@ -719,19 +728,17 @@ def xpath(css: str) -> str: ) assert xpath(":scope") == "descendant-or-self::*[1]" assert xpath(":first-or-second[href]") == ( - "descendant-or-self::*[(@id = 'first' or @id = 'second') " "and (@href)]" + "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]" ) assert str(XPathExpr("", "", condition="@href")) == "[@href]" document = etree.fromstring(OPERATOR_PRECEDENCE_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ - def operator_id(selector: str) -> List[str]: + def operator_id(selector: str) -> list[str]: xpath = CustomTranslator().css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -740,9 +747,11 @@ def operator_id(selector: str) -> List[str]: assert operator_id("[href]:first-or-second") == ["second"] def test_series(self) -> None: - def series(css: str) -> Optional[Tuple[int, int]]: - (selector,) = parse(":nth-child(%s)" % css) - args = typing.cast(FunctionalPseudoElement, selector.parsed_tree).arguments + def series(css: str) -> tuple[int, int] | None: + (selector,) = parse(f":nth-child({css})") + args = typing.cast( + "FunctionalPseudoElement", selector.parsed_tree + ).arguments try: return parse_series(args) except ValueError: @@ -769,14 +778,12 @@ def series(css: str) -> Optional[Tuple[int, int]]: def test_lang(self) -> None: document = etree.fromstring(XMLLANG_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath - def langid(selector: str) -> List[str]: + def langid(selector: str) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] @@ -799,22 +806,22 @@ def langid(selector: str) -> List[str]: def test_argument_types(self) -> None: class CustomTranslator(GenericTranslator): def __init__(self) -> None: - self.argument_types: List[str] = [] + self.argument_types: list[str] = [] def xpath_pseudo_element( self, xpath: XPathExpr, pseudo_element: PseudoElement ) -> XPathExpr: self.argument_types += typing.cast( - FunctionalPseudoElement, pseudo_element + "FunctionalPseudoElement", pseudo_element ).argument_types() return xpath - def argument_types(css: str) -> List[str]: + def argument_types(css: str) -> list[str]: translator = CustomTranslator() translator.css_to_xpath(css) return translator.argument_types - mappings: List[Tuple[str, List[str]]] = [ + mappings: list[tuple[str, list[str]]] = [ ("", []), ("ident", ["IDENT"]), ('"string"', ["STRING"]), @@ -826,23 +833,21 @@ def argument_types(css: str) -> List[str]: def test_select(self) -> None: document = etree.fromstring(HTML_IDS) - sort_key = dict( - (el, count) for count, el in enumerate(document.iter()) - ).__getitem__ + sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__ css_to_xpath = GenericTranslator().css_to_xpath html_css_to_xpath = HTMLTranslator().css_to_xpath - def select_ids(selector: str, html_only: bool) -> List[str]: + def select_ids(selector: str, html_only: bool) -> list[str]: xpath = css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) if html_only: assert items == [] xpath = html_css_to_xpath(selector) - items = typing.cast(List["etree._Element"], document.xpath(xpath)) + items = typing.cast("list[etree._Element]", document.xpath(xpath)) items.sort(key=sort_key) return [element.get("id", "nil") for element in items] - def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: + def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: html_only = kwargs.pop("html_only", False) result = select_ids(main, html_only) for selector in selectors: @@ -972,7 +977,8 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: assert pcss("span:only-child") == ["foobar-span"] assert pcss("li div:only-child") == ["li-div"] assert pcss("div *:only-child") == ["li-div", "foobar-span"] - self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + with pytest.raises(ExpressionError): + pcss("p *:only-of-type") assert pcss("p:only-of-type") == ["paragraph"] assert pcss("a:empty", "a:EMpty") == ["name-anchor"] assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] @@ -1072,14 +1078,14 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> List[str]: def test_select_shakespeare(self) -> None: document = html.document_fromstring(HTML_SHAKESPEARE) - body = typing.cast(List["etree._Element"], document.xpath("//body"))[0] + body = typing.cast("list[etree._Element]", document.xpath("//body"))[0] css_to_xpath = GenericTranslator().css_to_xpath basestring_ = (str, bytes) def count(selector: str) -> int: xpath = css_to_xpath(selector) - results = typing.cast(List["etree._Element"], body.xpath(xpath)) + results = typing.cast("list[etree._Element]", body.xpath(xpath)) assert not isinstance(results, basestring_) found = set() for item in results: @@ -1527,7 +1533,7 @@ def count(selector: str) -> int: -""" # noqa: W191,E101 +""" if __name__ == "__main__": diff --git a/tox.ini b/tox.ini index 6831d3f..9ff54cf 100644 --- a/tox.ini +++ b/tox.ini @@ -4,21 +4,20 @@ envlist = pre-commit,pylint,py,docs,typing [testenv] deps = lxml>=4.4 - pytest-cov>=2.8 + pytest-cov>=7.0.0 pytest>=5.4 - setuptools sybil commands = pytest --cov=cssselect \ --cov-report=term-missing --cov-report=html --cov-report=xml \ - --verbose {posargs: cssselect tests docs} + {posargs: cssselect tests docs} [testenv:pylint] deps = {[testenv]deps} - pylint==3.0.0 + pylint==4.0.4 commands = - pylint {posargs: cssselect setup.py tests docs} + pylint {posargs: cssselect tests docs} [testenv:docs] changedir = docs @@ -30,12 +29,21 @@ commands = [testenv:typing] deps = {[testenv]deps} - lxml-stubs==0.4.0 - mypy==0.982 + mypy==1.19.1 + types-lxml==2026.1.1 commands = - mypy --strict {posargs: cssselect tests} + mypy {posargs: cssselect tests} [testenv:pre-commit] deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure -skip_install = true \ No newline at end of file +skip_install = true + +[testenv:twinecheck] +basepython = python3 +deps = + twine==6.2.0 + build==1.4.0 +commands = + python -m build --sdist + twine check dist/*