diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..2ee5ff3
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,9 @@
+[run]
+branch = True
+
+[report]
+exclude_lines =
+ pragma: no cover
+ def __repr__
+ if sys.version_info
+ if __name__ == '__main__':
diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index 38558bf..0000000
--- a/.editorconfig
+++ /dev/null
@@ -1,11 +0,0 @@
-root = true
-
-[*]
-charset = utf-8
-indent_style = space
-indent_size = 4
-insert_final_newline = true
-end_of_line = lf
-
-[*.{yml,yaml}]
-indent_size = 2
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
deleted file mode 100644
index bb4f6e1..0000000
--- a/.git-blame-ignore-revs
+++ /dev/null
@@ -1,2 +0,0 @@
-# applying pre-commit hooks to the project
-e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
deleted file mode 100644
index 41ff7e1..0000000
--- a/.github/workflows/checks.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: Checks
-on: [push, pull_request]
-
-jobs:
- checks:
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- include:
- - python-version: 3.14
- env:
- TOXENV: pylint
- - python-version: 3.14 # Keep in sync with .readthedocs.yml
- env:
- TOXENV: docs
- - python-version: 3.14
- env:
- TOXENV: typing
- - python-version: 3.14
- env:
- TOXENV: twinecheck
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Run check
- env: ${{ matrix.env }}
- run: |
- pip install -U pip
- pip install -U tox
- tox
-
- pre-commit:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v6
- - uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
deleted file mode 100644
index 526c458..0000000
--- a/.github/workflows/publish.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: Publish
-on:
- push:
- tags:
- - 'v[0-9]+.[0-9]+.[0-9]+'
-
-jobs:
- publish:
- runs-on: ubuntu-latest
-
- environment:
- name: pypi
- url: https://pypi.org/p/cssselect
-
- permissions:
- id-token: write
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Set up Python
- uses: actions/setup-python@v6
- with:
- python-version: 3.14
-
- - name: Build
- run: |
- python -m pip install --upgrade build
- python -m build
-
- - name: Publish to PyPI
- uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml
deleted file mode 100644
index 4947937..0000000
--- a/.github/workflows/tests-macos.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: macOS
-on: [push, pull_request]
-
-jobs:
- tests:
- runs-on: macos-latest
- strategy:
- fail-fast: false
- matrix:
- python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Run tests
- run: |
- pip install -U pip
- pip install -U tox
- tox -e py
-
- - name: Upload coverage report
- uses: codecov/codecov-action@v5
diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml
deleted file mode 100644
index 1ef905b..0000000
--- a/.github/workflows/tests-ubuntu.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Ubuntu
-on: [push, pull_request]
-
-jobs:
- tests:
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy3.11"]
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Install system libraries
- if: contains(matrix.python-version, 'pypy')
- run: |
- sudo apt-get update
- sudo apt-get install libxml2-dev libxslt-dev
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Run tests
- run: |
- pip install -U pip
- pip install -U tox
- tox -e py
-
- - name: Upload coverage report
- uses: codecov/codecov-action@v5
diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml
deleted file mode 100644
index 24d7ee8..0000000
--- a/.github/workflows/tests-windows.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Windows
-on: [push, pull_request]
-
-jobs:
- tests:
- runs-on: windows-latest
- strategy:
- fail-fast: false
- matrix:
- python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Run tests
- run: |
- pip install -U pip
- pip install -U tox
- tox -e py
-
- - name: Upload coverage report
- uses: codecov/codecov-action@v5
diff --git a/.gitignore b/.gitignore
index c276bd1..627d1c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,5 @@
+.DS_Store
+.pydevproject
+.project
*.pyc
-*.egg-info
-/.tox
-/MANIFEST
-/dist
-/docs/_build
-/.coverage
-.idea
-htmlcov/
-coverage.xml
+.settings/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index 81ca890..0000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-repos:
-- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.14.4
- hooks:
- - id: ruff-check
- args: [ --fix ]
- - id: ruff-format
-- repo: https://github.com/adamchainz/blacken-docs
- rev: 1.20.0
- hooks:
- - id: blacken-docs
- additional_dependencies:
- - black==26.1.0
-- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v6.0.0
- hooks:
- - id: end-of-file-fixer
- - id: trailing-whitespace
-- repo: https://github.com/sphinx-contrib/sphinx-lint
- rev: v1.0.0
- hooks:
- - id: sphinx-lint
-- repo: https://github.com/rhysd/actionlint
- rev: v1.7.10
- hooks:
- - id: actionlint
diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index b91642a..0000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-version: 2
-formats: all
-sphinx:
- configuration: docs/conf.py
- fail_on_warning: true
-build:
- os: ubuntu-24.04
- tools:
- # For available versions, see:
- # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
- python: "3.14" # Keep in sync with .github/workflows/checks.yml
-python:
- install:
- - requirements: docs/requirements.txt
- - path: .
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..93ad08a
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,12 @@
+language: python
+
+python:
+ - "2.6"
+ - "2.7"
+ - "3.2"
+ - "3.3"
+
+install:
+ - pip install --use-mirrors lxml -e .
+
+script: py.test
diff --git a/AUTHORS b/AUTHORS
index 66dcc22..bf826b9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,13 +1,9 @@
Daniel Graña
Ian Bicking
-James Salter
Laurence Rowe
Mikhail Korobov
-Nik Nyby
Paul Tremberth
Simon Potter
Simon Sapin
Stefan Behnel
-Thomas Grainger
Varialus
-Arthur Darcet
diff --git a/CHANGES b/CHANGES
index 5ca2959..edbbaca 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,133 +1,6 @@
Changelog
=========
-Version 1.4.0
--------------
-
-Released on 2026-01-29.
-
-* Dropped support for Python 3.9 and PyPy 3.10.
-
-* Added support for Python 3.14 and PyPy 3.11.
-
-* Switched the build system to ``hatchling``.
-
-* CI fixes and improvements.
-
-Version 1.3.0
--------------
-
-Released on 2025-03-10.
-
-* Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and
- PyPy 3.10.
-
-* Removed ``_unicode_safe_getattr()``, deprecated in 1.2.0.
-
-* Added ``pre-commit`` and formatted the code with ``ruff``.
-
-* Many CI additions and improvements.
-
-
-Version 1.2.0
--------------
-
-Released on 2022-10-27.
-
-* Drop support for Python 2.7, 3.4-3.6, add support for Python 3.7-3.11.
-
-* Add type annotations (PEP 484 and PEP 561).
-
-* More features from the CSS Selectors Level 4:
-
- * The ``:is()`` pseudo-class.
-
- * The ``:where()`` pseudo-class.
-
- * The ``:has()`` pseudo-class, with some limitations.
-
-* Fix parsing ``:scope`` after a comma.
-
-* Add parentheses to fix condition precedence in some cases.
-
-* Private API changes related to the removal of the Python 2 support:
-
- * Remove ``_unicode`` and ``_unichr`` aliases from ``csselect.parser``.
-
- * Remove ``_basestring`` and ``_unicode`` aliases from ``csselect.xpath``.
-
- * Deprecate ``csselect.xpath._unicode_safe_getattr()`` and change it to just
- call ``getattr()``.
-
-* Include tests in the PyPI tarball.
-
-* Many CI additions and improvements.
-
-* Improve the test coverage.
-
-
-Version 1.1.0
--------------
-
-Released on 2019-08-09.
-
-* Support for the ``:scope`` selector, which allows to access immediate
- children of a selector.
-
-* Support for the ``|E`` syntax for type selectors without a namespace.
-
-* A new selector method, ``canonical``, returns the CSS expression of the
- selector, as a string.
-
-
-Version 1.0.3
--------------
-
-Released on 2017-12-27.
-
-* Fix artifact uploads to pypi
-
-
-Version 1.0.2
--------------
-
-Released on 2017-12-26.
-
-* Drop support for Python 2.6 and Python 3.3.
-* Fix deprecation warning in Python 3.6.
-* Minor cleanups.
-
-
-Version 1.0.1
--------------
-
-Released on 2017-01-10.
-
-* Add support for Python 3.6.
-* Documentation hosted `on Read the Docs `_
-
-
-Version 1.0.0
--------------
-
-Released on 2016-10-21.
-
-* Add code coverage reports.
-* Fix ``:nth-*(an+b)`` pseudo-classes selectors.
- (except ``*:nth-child()`` which looks untranslatable to XPath 1.0.)
-
-
-Version 0.9.2
--------------
-
-Released on 2016-06-15.
-
-* Distribute as universal wheel.
-* Add support for Python 3.3, 3.4 and 3.5.
-* Drop support for Python 2.5 as testing is getting difficult.
-* Improve tests on pseudo-elements.
-
-
Version 0.9.1
-------------
@@ -264,14 +137,14 @@ Version 0.3
Released on 2012-04-17.
* Fix many parsing bugs.
-* Rename the ``Translator`` class to :class:`GenericTranslator`
+* Rename the :class:`Translator` class to :class:`GenericTranslator`
* There, implement ``:target``, ``:hover``, ``:focus``, ``:active``
``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
as never matching.
* Make a new HTML-specific ``HTMLTranslator`` subclass. There, implement
``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
as appropriate for HTML, with all links "not visited".
-* Remove the ``css_to_xpath`` function. The translator classes
+* Remove the :func:`css_to_xpath` function. The translator classes
are the new API.
* Add support for ``:contains()`` back, but case-sensitive. lxml will
override it to be case-insensitive for backward-compatibility.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..e98d213
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include AUTHORS CHANGES LICENSE README.rst tox.ini .coveragerc
+recursive-include docs *
+prune docs/_build
diff --git a/README.rst b/README.rst
index c055295..f523c7f 100644
--- a/README.rst
+++ b/README.rst
@@ -1,40 +1,25 @@
-
===================================
cssselect: CSS Selectors for Python
===================================
-.. image:: https://img.shields.io/pypi/v/cssselect.svg
- :target: https://pypi.python.org/pypi/cssselect
- :alt: PyPI Version
-
-.. image:: https://img.shields.io/pypi/pyversions/cssselect.svg
- :target: https://pypi.python.org/pypi/cssselect
- :alt: Supported Python Versions
-
-.. image:: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml/badge.svg
- :target: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml
- :alt: Tests
+*cssselect* parses `CSS3 Selectors`_ and translate them to `XPath 1.0`_
+expressions. Such expressions can be used in lxml_ or another XPath engine
+to find the matching elements in an XML or HTML document.
-.. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg
- :target: https://codecov.io/github/scrapy/cssselect?branch=master
- :alt: Coverage report
+This module used to live inside of lxml as ``lxml.cssselect`` before it was
+extracted as a stand-alone project.
-**cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and
-translate them to `XPath 1.0`_ expressions.
+.. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/
+.. _XPath 1.0: http://www.w3.org/TR/xpath/
+.. _lxml: http://lxml.de/
-`XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find
-the matching elements in an XML or HTML document.
-
-Find the cssselect online documentation at https://cssselect.readthedocs.io.
Quick facts:
-* Source, issues and pull requests `on GitHub
- `_
-* Releases `on PyPI `_
+* Free software: BSD licensed
+* Compatible with Python 2.5+ and 3.2+
+* Latest documentation `on python.org `_
+* Source, issues and pull requests `on Github
+ `_
+* Releases `on PyPI `_
* Install with ``pip install cssselect``
-
-
-.. _CSS3 selectors: https://www.w3.org/TR/selectors-3/
-.. _XPath 1.0: https://www.w3.org/TR/xpath/all/
-.. _lxml: https://lxml.de/
diff --git a/cssselect/__init__.py b/cssselect/__init__.py
index 59d62df..871f1b2 100644
--- a/cssselect/__init__.py
+++ b/cssselect/__init__.py
@@ -1,36 +1,22 @@
+# coding: utf8
"""
-CSS Selectors based on XPath
-============================
+ CSS Selectors based on XPath
+ ============================
-This module supports selecting XML/HTML elements based on CSS selectors.
-See the `CSSSelector` class for details.
+ This module supports selecting XML/HTML elements based on CSS selectors.
+ See the `CSSSelector` class for details.
-:copyright: (c) 2007-2012 Ian Bicking and contributors.
-See AUTHORS for more details.
-:license: BSD, see LICENSE for more details.
+ :copyright: (c) 2007-2012 Ian Bicking and contributors.
+ See AUTHORS for more details.
+ :license: BSD, see LICENSE for more details.
"""
-from cssselect.parser import (
- FunctionalPseudoElement,
- Selector,
- SelectorError,
- SelectorSyntaxError,
- parse,
-)
-from cssselect.xpath import ExpressionError, GenericTranslator, HTMLTranslator
+from cssselect.parser import (parse, Selector, FunctionalPseudoElement,
+ SelectorError, SelectorSyntaxError)
+from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
-__all__ = (
- "ExpressionError",
- "FunctionalPseudoElement",
- "GenericTranslator",
- "HTMLTranslator",
- "Selector",
- "SelectorError",
- "SelectorSyntaxError",
- "parse",
-)
-VERSION = "1.4.0"
+VERSION = '0.9.1'
__version__ = VERSION
diff --git a/cssselect/parser.py b/cssselect/parser.py
index f969769..d71fdda 100644
--- a/cssselect/parser.py
+++ b/cssselect/parser.py
@@ -1,33 +1,33 @@
+# coding: utf8
"""
-cssselect.parser
-================
+ cssselect.parser
+ ================
-Tokenizer, parser and parsed objects for CSS selectors.
+ Tokenizer, parser and parsed objects for CSS selectors.
-:copyright: (c) 2007-2012 Ian Bicking and contributors.
-See AUTHORS for more details.
-:license: BSD, see LICENSE for more details.
+ :copyright: (c) 2007-2012 Ian Bicking and contributors.
+ See AUTHORS for more details.
+ :license: BSD, see LICENSE for more details.
"""
-from __future__ import annotations
-
-import operator
-import re
import sys
-from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, Union, cast, overload
+import re
+import operator
-if TYPE_CHECKING:
- from collections.abc import Iterable, Iterator, Sequence
- # typing.Self requires Python 3.11
- from typing_extensions import Self
+if sys.version_info[0] < 3:
+ _unicode = unicode
+ _unichr = unichr
+else:
+ _unicode = str
+ _unichr = chr
-def ascii_lower(string: str) -> str:
+def ascii_lower(string):
"""Lower-case, but only in the ASCII range."""
- return string.encode("utf8").lower().decode("utf8")
+ return string.encode('utf8').lower().decode('utf8')
class SelectorError(Exception):
@@ -39,30 +39,13 @@ class SelectorError(Exception):
"""
-
class SelectorSyntaxError(SelectorError, SyntaxError):
"""Parsing a selector that does not match the grammar."""
#### Parsed objects
-Tree: TypeAlias = Union[
- "Element",
- "Hash",
- "Class",
- "Function",
- "Pseudo",
- "Attrib",
- "Negation",
- "Relation",
- "Matching",
- "SpecificityAdjustment",
- "CombinedSelector",
-]
-PseudoElement: TypeAlias = Union["FunctionalPseudoElement", str]
-
-
-class Selector:
+class Selector(object):
"""
Represents a parsed selector.
@@ -72,12 +55,10 @@ class Selector:
or unsupported pseudo-elements.
"""
-
- def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None:
+ def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
if pseudo_element is not None and not isinstance(
- pseudo_element, FunctionalPseudoElement
- ):
+ pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: A :class:`FunctionalPseudoElement`,
#: or the identifier for the pseudo-element as a string,
@@ -95,35 +76,23 @@ def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> N
#: +-------------------------+----------------+--------------------------------+
#: | Invalid pseudo-class | ``li:marker`` | ``None`` |
#: +-------------------------+----------------+--------------------------------+
- #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
+ #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
#: +-------------------------+----------------+--------------------------------+
#:
#: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
self.pseudo_element = pseudo_element
- def __repr__(self) -> str:
+ def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
elif self.pseudo_element:
- pseudo_element = f"::{self.pseudo_element}"
+ pseudo_element = '::%s' % self.pseudo_element
else:
- pseudo_element = ""
- return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]"
+ pseudo_element = ''
+ return '%s[%r%s]' % (
+ self.__class__.__name__, self.parsed_tree, pseudo_element)
- def canonical(self) -> str:
- """Return a CSS representation for this selector (a string)"""
- if isinstance(self.pseudo_element, FunctionalPseudoElement):
- pseudo_element = f"::{self.pseudo_element.canonical()}"
- elif self.pseudo_element:
- pseudo_element = f"::{self.pseudo_element}"
- else:
- pseudo_element = ""
- res = f"{self.parsed_tree.canonical()}{pseudo_element}"
- if len(res) > 1:
- res = res.lstrip("*")
- return res
-
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
"""Return the specificity_ of this selector as a tuple of 3 integers.
.. _specificity: http://www.w3.org/TR/selectors/#specificity
@@ -135,28 +104,25 @@ def specificity(self) -> tuple[int, int, int]:
return a, b, c
-class Class:
+class Class(object):
"""
Represents selector.class_name
"""
-
- def __init__(self, selector: Tree, class_name: str) -> None:
+ def __init__(self, selector, class_name):
self.selector = selector
self.class_name = class_name
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]"
+ def __repr__(self):
+ return '%s[%r.%s]' % (
+ self.__class__.__name__, self.selector, self.class_name)
- def canonical(self) -> str:
- return f"{self.selector.canonical()}.{self.class_name}"
-
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
-class FunctionalPseudoElement:
+class FunctionalPseudoElement(object):
"""
Represents selector::name(arguments)
@@ -173,310 +139,171 @@ class FunctionalPseudoElement:
Use at your own risks.
"""
-
- def __init__(self, name: str, arguments: Sequence[Token]):
+ def __init__(self, name, arguments):
self.name = ascii_lower(name)
self.arguments = arguments
- def __repr__(self) -> str:
- token_values = [token.value for token in self.arguments]
- return f"{self.__class__.__name__}[::{self.name}({token_values!r})]"
+ def __repr__(self):
+ return '%s[::%s(%r)]' % (
+ self.__class__.__name__, self.name,
+ [token.value for token in self.arguments])
- def argument_types(self) -> list[str]:
+ def argument_types(self):
return [token.type for token in self.arguments]
- def canonical(self) -> str:
- args = "".join(token.css() for token in self.arguments)
- return f"{self.name}({args})"
+ def specificity(self):
+ a, b, c = self.selector.specificity()
+ b += 1
+ return a, b, c
-class Function:
+class Function(object):
"""
Represents selector:name(expr)
"""
-
- def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None:
+ def __init__(self, selector, name, arguments):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments
- def __repr__(self) -> str:
- token_values = [token.value for token in self.arguments]
- return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]"
+ def __repr__(self):
+ return '%s[%r:%s(%r)]' % (
+ self.__class__.__name__, self.selector, self.name,
+ [token.value for token in self.arguments])
- def argument_types(self) -> list[str]:
+ def argument_types(self):
return [token.type for token in self.arguments]
- def canonical(self) -> str:
- args = "".join(token.css() for token in self.arguments)
- return f"{self.selector.canonical()}:{self.name}({args})"
-
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
-class Pseudo:
+class Pseudo(object):
"""
Represents selector:ident
"""
-
- def __init__(self, selector: Tree, ident: str) -> None:
+ def __init__(self, selector, ident):
self.selector = selector
self.ident = ascii_lower(ident)
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]"
-
- def canonical(self) -> str:
- return f"{self.selector.canonical()}:{self.ident}"
+ def __repr__(self):
+ return '%s[%r:%s]' % (
+ self.__class__.__name__, self.selector, self.ident)
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
-class Negation:
+class Negation(object):
"""
Represents selector:not(subselector)
"""
-
- def __init__(self, selector: Tree, subselector: Tree) -> None:
+ def __init__(self, selector, subselector):
self.selector = selector
self.subselector = subselector
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]"
+ def __repr__(self):
+ return '%s[%r:not(%r)]' % (
+ self.__class__.__name__, self.selector, self.subselector)
- def canonical(self) -> str:
- subsel = self.subselector.canonical()
- if len(subsel) > 1:
- subsel = subsel.lstrip("*")
- return f"{self.selector.canonical()}:not({subsel})"
-
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
return a1 + a2, b1 + b2, c1 + c2
-class Relation:
- """
- Represents selector:has(subselector)
- """
-
- def __init__(self, selector: Tree, combinator: Token, subselector: Selector):
- self.selector = selector
- self.combinator = combinator
- self.subselector = subselector
-
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]"
-
- def canonical(self) -> str:
- try:
- subsel = self.subselector[0].canonical() # type: ignore[index]
- except TypeError:
- subsel = self.subselector.canonical()
- if len(subsel) > 1:
- subsel = subsel.lstrip("*")
- return f"{self.selector.canonical()}:has({subsel})"
-
- def specificity(self) -> tuple[int, int, int]:
- a1, b1, c1 = self.selector.specificity()
- try:
- a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index]
- except TypeError:
- a2, b2, c2 = self.subselector.specificity()
- return a1 + a2, b1 + b2, c1 + c2
-
-
-class Matching:
- """
- Represents selector:is(selector_list)
- """
-
- def __init__(self, selector: Tree, selector_list: Iterable[Tree]):
- self.selector = selector
- self.selector_list = selector_list
-
- def __repr__(self) -> str:
- args_str = ", ".join(repr(s) for s in self.selector_list)
- return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]"
-
- def canonical(self) -> str:
- selector_arguments = []
- for s in self.selector_list:
- selarg = s.canonical()
- selector_arguments.append(selarg.lstrip("*"))
- args_str = ", ".join(str(s) for s in selector_arguments)
- return f"{self.selector.canonical()}:is({args_str})"
-
- def specificity(self) -> tuple[int, int, int]:
- return max(x.specificity() for x in self.selector_list)
-
-
-class SpecificityAdjustment:
- """
- Represents selector:where(selector_list)
- Same as selector:is(selector_list), but its specificity is always 0
- """
-
- def __init__(self, selector: Tree, selector_list: list[Tree]):
- self.selector = selector
- self.selector_list = selector_list
-
- def __repr__(self) -> str:
- args_str = ", ".join(repr(s) for s in self.selector_list)
- return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]"
-
- def canonical(self) -> str:
- selector_arguments = []
- for s in self.selector_list:
- selarg = s.canonical()
- selector_arguments.append(selarg.lstrip("*"))
- args_str = ", ".join(str(s) for s in selector_arguments)
- return f"{self.selector.canonical()}:where({args_str})"
-
- def specificity(self) -> tuple[int, int, int]:
- return 0, 0, 0
-
-
-class Attrib:
+class Attrib(object):
"""
Represents selector[namespace|attrib operator value]
"""
-
- @overload
- def __init__(
- self,
- selector: Tree,
- namespace: str | None,
- attrib: str,
- operator: Literal["exists"],
- value: None,
- ) -> None: ...
-
- @overload
- def __init__(
- self,
- selector: Tree,
- namespace: str | None,
- attrib: str,
- operator: str,
- value: Token,
- ) -> None: ...
-
- def __init__(
- self,
- selector: Tree,
- namespace: str | None,
- attrib: str,
- operator: str,
- value: Token | None,
- ) -> None:
+ def __init__(self, selector, namespace, attrib, operator, value):
self.selector = selector
self.namespace = namespace
self.attrib = attrib
self.operator = operator
self.value = value
- def __repr__(self) -> str:
- attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib
- if self.operator == "exists":
- return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]"
- assert self.value is not None
- return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]"
-
- def canonical(self) -> str:
- attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib
-
- if self.operator == "exists":
- op = attrib
+ def __repr__(self):
+ if self.namespace:
+ attrib = '%s|%s' % (self.namespace, self.attrib)
else:
- assert self.value is not None
- op = f"{attrib}{self.operator}{self.value.css()}"
-
- return f"{self.selector.canonical()}[{op}]"
+ attrib = self.attrib
+ if self.operator == 'exists':
+ return '%s[%r[%s]]' % (
+ self.__class__.__name__, self.selector, attrib)
+ else:
+ return '%s[%r[%s %s %r]]' % (
+ self.__class__.__name__, self.selector, attrib,
+ self.operator, self.value)
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
-class Element:
+class Element(object):
"""
Represents namespace|element
`None` is for the universal selector '*'
"""
-
- def __init__(
- self, namespace: str | None = None, element: str | None = None
- ) -> None:
+ def __init__(self, namespace=None, element=None):
self.namespace = namespace
self.element = element
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.canonical()}]"
-
- def canonical(self) -> str:
- element = self.element or "*"
+ def __repr__(self):
+ element = self.element or '*'
if self.namespace:
- element = f"{self.namespace}|{element}"
- return element
+ element = '%s|%s' % (self.namespace, element)
+ return '%s[%s]' % (self.__class__.__name__, element)
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
if self.element:
return 0, 0, 1
- return 0, 0, 0
+ else:
+ return 0, 0, 0
-class Hash:
+class Hash(object):
"""
Represents selector#id
"""
-
- def __init__(self, selector: Tree, id: str) -> None: # noqa: A002
+ def __init__(self, selector, id):
self.selector = selector
self.id = id
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]"
-
- def canonical(self) -> str:
- return f"{self.selector.canonical()}#{self.id}"
+ def __repr__(self):
+ return '%s[%r#%s]' % (
+ self.__class__.__name__, self.selector, self.id)
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a, b, c = self.selector.specificity()
a += 1
return a, b, c
-class CombinedSelector:
- def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None:
+class CombinedSelector(object):
+ def __init__(self, selector, combinator, subselector):
assert selector is not None
self.selector = selector
self.combinator = combinator
self.subselector = subselector
- def __repr__(self) -> str:
- comb = "" if self.combinator == " " else self.combinator
- return (
- f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]"
- )
-
- def canonical(self) -> str:
- subsel = self.subselector.canonical()
- if len(subsel) > 1:
- subsel = subsel.lstrip("*")
- return f"{self.selector.canonical()} {self.combinator} {subsel}"
+ def __repr__(self):
+ if self.combinator == ' ':
+ comb = ''
+ else:
+ comb = self.combinator
+ return '%s[%r %s %r]' % (
+ self.__class__.__name__, self.selector, comb, self.subselector)
- def specificity(self) -> tuple[int, int, int]:
+ def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
return a1 + a2, b1 + b2, c1 + c2
@@ -485,25 +312,24 @@ def specificity(self) -> tuple[int, int, int]:
#### Parser
# foo
-_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$")
+_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
# foo#bar or #bar
-_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")
+_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
# foo.bar or .bar
_class_re = re.compile(
- r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$"
-)
+ r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
-def parse(css: str) -> list[Selector]:
+def parse(css):
"""Parse a CSS *group of selectors*.
If you don't care about pseudo-elements or selector specificity,
you can skip this and use :meth:`~GenericTranslator.css_to_xpath`.
:param css:
- A *group of selectors* as a string.
+ A *group of selectors* as an Unicode string.
:raises:
:class:`SelectorSyntaxError` on invalid selectors.
:returns:
@@ -517,75 +343,72 @@ def parse(css: str) -> list[Selector]:
return [Selector(Element(element=match.group(1)))]
match = _id_re.match(css)
if match is not None:
- return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
+ return [Selector(Hash(Element(element=match.group(1) or None),
+ match.group(2)))]
match = _class_re.match(css)
if match is not None:
- return [
- Selector(Class(Element(element=match.group(1) or None), match.group(2)))
- ]
+ return [Selector(Class(Element(element=match.group(1) or None),
+ match.group(2)))]
stream = TokenStream(tokenize(css))
stream.source = css
return list(parse_selector_group(stream))
-
-
# except SelectorSyntaxError:
# e = sys.exc_info()[1]
# message = "%s at %s -> %r" % (
# e, stream.used, stream.peek())
# e.msg = message
+# if sys.version_info < (2,6):
+# e.message = message
# e.args = tuple([message])
# raise
-def parse_selector_group(stream: TokenStream) -> Iterator[Selector]:
+def parse_selector_group(stream):
stream.skip_whitespace()
while 1:
yield Selector(*parse_selector(stream))
- if stream.peek() == ("DELIM", ","):
+ if stream.peek() == ('DELIM', ','):
stream.next()
stream.skip_whitespace()
else:
break
-
-def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]:
+def parse_selector(stream):
result, pseudo_element = parse_simple_selector(stream)
while 1:
stream.skip_whitespace()
peek = stream.peek()
- if peek in (("EOF", None), ("DELIM", ",")):
+ if peek in (('EOF', None), ('DELIM', ',')):
break
if pseudo_element:
raise SelectorSyntaxError(
- f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
- )
- if peek.is_delim("+", ">", "~"):
+ 'Got pseudo-element ::%s not at the end of a selector'
+ % pseudo_element)
+ if peek.is_delim('+', '>', '~'):
# A combinator
- combinator = cast("str", stream.next().value)
+ combinator = stream.next().value
stream.skip_whitespace()
else:
# By exclusion, the last parse_simple_selector() ended
# at peek == ' '
- combinator = " "
+ combinator = ' '
next_selector, pseudo_element = parse_simple_selector(stream)
result = CombinedSelector(result, combinator, next_selector)
return result, pseudo_element
-def parse_simple_selector(
- stream: TokenStream, inside_negation: bool = False
-) -> tuple[Tree, PseudoElement | None]:
+def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
- if peek.type == "IDENT" or peek == ("DELIM", "*"):
- if peek.type == "IDENT":
+ if peek.type == 'IDENT' or peek == ('DELIM', '*'):
+ if peek.type == 'IDENT':
namespace = stream.next().value
else:
stream.next()
namespace = None
- if stream.peek() == ("DELIM", "|"):
+ if stream.peek() == ('DELIM', '|'):
stream.next()
element = stream.next_ident_or_star()
else:
@@ -593,177 +416,98 @@ def parse_simple_selector(
namespace = None
else:
element = namespace = None
- result: Tree = Element(namespace, element)
- pseudo_element: PseudoElement | None = None
+ result = Element(namespace, element)
+ pseudo_element = None
while 1:
peek = stream.peek()
- if (
- peek.type in ("S", "EOF")
- or peek.is_delim(",", "+", ">", "~")
- or (inside_negation and peek == ("DELIM", ")"))
- ):
+ if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
+ inside_negation and peek == ('DELIM', ')')):
break
if pseudo_element:
raise SelectorSyntaxError(
- f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
- )
- if peek.type == "HASH":
- result = Hash(result, cast("str", stream.next().value))
- elif peek == ("DELIM", "."):
+ 'Got pseudo-element ::%s not at the end of a selector'
+ % pseudo_element)
+ if peek.type == 'HASH':
+ result = Hash(result, stream.next().value)
+ elif peek == ('DELIM', '.'):
stream.next()
result = Class(result, stream.next_ident())
- elif peek == ("DELIM", "|"):
- stream.next()
- result = Element(None, stream.next_ident())
- elif peek == ("DELIM", "["):
+ elif peek == ('DELIM', '['):
stream.next()
result = parse_attrib(result, stream)
- elif peek == ("DELIM", ":"):
+ elif peek == ('DELIM', ':'):
stream.next()
- if stream.peek() == ("DELIM", ":"):
+ if stream.peek() == ('DELIM', ':'):
stream.next()
pseudo_element = stream.next_ident()
- if stream.peek() == ("DELIM", "("):
+ if stream.peek() == ('DELIM', '('):
stream.next()
pseudo_element = FunctionalPseudoElement(
- pseudo_element, parse_arguments(stream)
- )
+ pseudo_element, parse_arguments(stream))
continue
ident = stream.next_ident()
- if ident.lower() in ("first-line", "first-letter", "before", "after"):
+ if ident.lower() in ('first-line', 'first-letter',
+ 'before', 'after'):
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
- pseudo_element = str(ident)
+ pseudo_element = _unicode(ident)
continue
- if stream.peek() != ("DELIM", "("):
+ if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
- if repr(result) == "Pseudo[Element[*]:scope]" and not (
- len(stream.used) == 2
- or (len(stream.used) == 3 and stream.used[0].type == "S")
- or (len(stream.used) >= 3 and stream.used[-3].is_delim(","))
- or (
- len(stream.used) >= 4
- and stream.used[-3].type == "S"
- and stream.used[-4].is_delim(",")
- )
- ):
- raise SelectorSyntaxError(
- 'Got immediate child pseudo-element ":scope" '
- "not at the start of a selector"
- )
continue
stream.next()
stream.skip_whitespace()
- if ident.lower() == "not":
+ if ident.lower() == 'not':
if inside_negation:
- raise SelectorSyntaxError("Got nested :not()")
+ raise SelectorSyntaxError('Got nested :not()')
argument, argument_pseudo_element = parse_simple_selector(
- stream, inside_negation=True
- )
- next_ = stream.next()
+ stream, inside_negation=True)
+ next = stream.next()
if argument_pseudo_element:
raise SelectorSyntaxError(
- f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next_.pos}"
- )
- if next_ != ("DELIM", ")"):
- raise SelectorSyntaxError(f"Expected ')', got {next_}")
+ 'Got pseudo-element ::%s inside :not() at %s'
+ % (argument_pseudo_element, next.pos))
+ if next != ('DELIM', ')'):
+ raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
- elif ident.lower() == "has":
- combinator, arguments = parse_relative_selector(stream)
- result = Relation(result, combinator, arguments)
-
- elif ident.lower() in ("matches", "is"):
- selectors = parse_simple_selector_arguments(stream)
- result = Matching(result, selectors)
- elif ident.lower() == "where":
- selectors = parse_simple_selector_arguments(stream)
- result = SpecificityAdjustment(result, selectors)
else:
result = Function(result, ident, parse_arguments(stream))
else:
- raise SelectorSyntaxError(f"Expected selector, got {peek}")
+ raise SelectorSyntaxError(
+ "Expected selector, got %s" % (peek,))
if len(stream.used) == selector_start:
- raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}")
+ raise SelectorSyntaxError(
+ "Expected selector, got %s" % (stream.peek(),))
return result, pseudo_element
-def parse_arguments(stream: TokenStream) -> list[Token]: # noqa: RET503
- arguments: list[Token] = []
+def parse_arguments(stream):
+ arguments = []
while 1:
stream.skip_whitespace()
- next_ = stream.next()
- if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [
- ("DELIM", "+"),
- ("DELIM", "-"),
- ]:
- arguments.append(next_)
- elif next_ == ("DELIM", ")"):
+ next = stream.next()
+ if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
+ ('DELIM', '+'), ('DELIM', '-')]:
+ arguments.append(next)
+ elif next == ('DELIM', ')'):
return arguments
else:
- raise SelectorSyntaxError(f"Expected an argument, got {next_}")
-
-
-def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: # noqa: RET503
- stream.skip_whitespace()
- subselector = ""
- next_ = stream.next()
-
- if next_ in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]:
- combinator = next_
- stream.skip_whitespace()
- next_ = stream.next()
- else:
- combinator = Token("DELIM", " ", pos=0)
-
- while 1:
- if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [
- ("DELIM", "."),
- ("DELIM", "*"),
- ]:
- subselector += cast("str", next_.value)
- elif next_ == ("DELIM", ")"):
- result = parse(subselector)
- return combinator, result[0]
- else:
- raise SelectorSyntaxError(f"Expected an argument, got {next_}")
- next_ = stream.next()
-
-
-def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]:
- arguments = []
- while 1:
- result, pseudo_element = parse_simple_selector(stream, True)
- if pseudo_element:
raise SelectorSyntaxError(
- f"Got pseudo-element ::{pseudo_element} inside function"
- )
- stream.skip_whitespace()
- next_ = stream.next()
- if next_ in (("EOF", None), ("DELIM", ",")):
- stream.next()
- stream.skip_whitespace()
- arguments.append(result)
- elif next_ == ("DELIM", ")"):
- arguments.append(result)
- break
- else:
- raise SelectorSyntaxError(f"Expected an argument, got {next_}")
- return arguments
+ "Expected an argument, got %s" % (next,))
-def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib:
+def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
- if attrib is None and stream.peek() != ("DELIM", "|"):
- raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}")
- namespace: str | None
- op: str | None
- if stream.peek() == ("DELIM", "|"):
+ if attrib is None and stream.peek() != ('DELIM', '|'):
+ raise SelectorSyntaxError(
+ "Expected '|', got %s" % (stream.peek(),))
+ if stream.peek() == ('DELIM', '|'):
stream.next()
- if stream.peek() == ("DELIM", "="):
+ if stream.peek() == ('DELIM', '='):
namespace = None
stream.next()
- op = "|="
+ op = '|='
else:
namespace = attrib
attrib = stream.next_ident()
@@ -772,30 +516,32 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib:
namespace = op = None
if op is None:
stream.skip_whitespace()
- next_ = stream.next()
- if next_ == ("DELIM", "]"):
- return Attrib(selector, namespace, cast("str", attrib), "exists", None)
- if next_ == ("DELIM", "="):
- op = "="
- elif next_.is_delim("^", "$", "*", "~", "|", "!") and (
- stream.peek() == ("DELIM", "=")
- ):
- op = cast("str", next_.value) + "="
+ next = stream.next()
+ if next == ('DELIM', ']'):
+ return Attrib(selector, namespace, attrib, 'exists', None)
+ elif next == ('DELIM', '='):
+ op = '='
+ elif next.is_delim('^', '$', '*', '~', '|', '!') and (
+ stream.peek() == ('DELIM', '=')):
+ op = next.value + '='
stream.next()
else:
- raise SelectorSyntaxError(f"Operator expected, got {next_}")
+ raise SelectorSyntaxError(
+ "Operator expected, got %s" % (next,))
stream.skip_whitespace()
value = stream.next()
- if value.type not in ("IDENT", "STRING"):
- raise SelectorSyntaxError(f"Expected string or ident, got {value}")
+ if value.type not in ('IDENT', 'STRING'):
+ raise SelectorSyntaxError(
+ "Expected string or ident, got %s" % (value,))
stream.skip_whitespace()
- next_ = stream.next()
- if next_ != ("DELIM", "]"):
- raise SelectorSyntaxError(f"Expected ']', got {next_}")
- return Attrib(selector, namespace, cast("str", attrib), op, value)
+ next = stream.next()
+ if next != ('DELIM', ']'):
+ raise SelectorSyntaxError(
+ "Expected ']', got %s" % (next,))
+ return Attrib(selector, namespace, attrib, op, value.value)
-def parse_series(tokens: Iterable[Token]) -> tuple[int, int]:
+def parse_series(tokens):
"""
Parses the arguments for :nth-child() and friends.
@@ -804,243 +550,217 @@ def parse_series(tokens: Iterable[Token]) -> tuple[int, int]:
"""
for token in tokens:
- if token.type == "STRING":
- raise ValueError("String tokens not allowed in series.")
- s = "".join(cast("str", token.value) for token in tokens).strip()
- if s == "odd":
- return 2, 1
- if s == "even":
- return 2, 0
- if s == "n":
- return 1, 0
- if "n" not in s:
+ if token.type == 'STRING':
+ raise ValueError('String tokens not allowed in series.')
+ s = ''.join(token.value for token in tokens).strip()
+ if s == 'odd':
+ return (2, 1)
+ elif s == 'even':
+ return (2, 0)
+ elif s == 'n':
+ return (1, 0)
+ if 'n' not in s:
# Just b
- return 0, int(s)
- a, b = s.split("n", 1)
- a_as_int: int
+ return (0, int(s))
+ a, b = s.split('n', 1)
if not a:
- a_as_int = 1
- elif a in {"-", "+"}:
- a_as_int = int(a + "1")
+ a = 1
+ elif a == '-' or a == '+':
+ a = int(a+'1')
+ else:
+ a = int(a)
+ if not b:
+ b = 0
else:
- a_as_int = int(a)
- b_as_int = int(b) if b else 0
- return a_as_int, b_as_int
+ b = int(b)
+ return (a, b)
#### Token objects
-
-class Token(tuple[str, str | None]): # noqa: SLOT001
- @overload
- def __new__(
- cls,
- type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"],
- value: str,
- pos: int,
- ) -> Self: ...
-
- @overload
- def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ...
-
- def __new__(cls, type_: str, value: str | None, pos: int) -> Self:
+class Token(tuple):
+ def __new__(cls, type_, value, pos):
obj = tuple.__new__(cls, (type_, value))
obj.pos = pos
return obj
- def __repr__(self) -> str:
- return f"<{self.type} '{self.value}' at {self.pos}>"
+ def __repr__(self):
+ return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
- def is_delim(self, *values: str) -> bool:
- return self.type == "DELIM" and self.value in values
+ def is_delim(self, *values):
+ return self.type == 'DELIM' and self.value in values
- pos: int
-
- @property
- def type(self) -> str:
- return self[0]
-
- @property
- def value(self) -> str | None:
- return self[1]
-
- def css(self) -> str:
- if self.type == "STRING":
- return repr(self.value)
- return cast("str", self.value)
+ type = property(operator.itemgetter(0))
+ value = property(operator.itemgetter(1))
class EOFToken(Token):
- def __new__(cls, pos: int) -> Self:
- return Token.__new__(cls, "EOF", None, pos)
+ def __new__(cls, pos):
+ return Token.__new__(cls, 'EOF', None, pos)
- def __repr__(self) -> str:
- return f"<{self.type} at {self.pos}>"
+ def __repr__(self):
+ return '<%s at %i>' % (self.type, self.pos)
#### Tokenizer
class TokenMacros:
- unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?"
- escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]"
- string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape
- nonascii = r"[^\0-\177]"
- nmchar = f"[_a-z0-9-]|{escape}|{nonascii}"
- nmstart = f"[_a-z]|{escape}|{nonascii}"
-
-
-class MatchFunc(Protocol):
- def __call__(
- self, string: str, pos: int = ..., endpos: int = ...
- ) -> re.Match[str] | None: ...
-
-
-def _compile(pattern: str) -> MatchFunc:
+ unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
+ escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
+ string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
+ nonascii = r'[^\0-\177]'
+ nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
+ nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
+
+def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
-
-_match_whitespace = _compile(r"[ \t\r\n\f]+")
-_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)")
-_match_hash = _compile("#(?:%(nmchar)s)+")
-_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
+_match_whitespace = _compile(r'[ \t\r\n\f]+')
+_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
+_match_hash = _compile('#(?:%(nmchar)s)+')
+_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
_match_string_by_quote = {
"'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
'"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
}
-_sub_simple_escape = re.compile(r"\\(.)").sub
-_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub
-_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub
+_sub_simple_escape = re.compile(r'\\(.)').sub
+_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
+_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
# Same as r'\1', but faster on CPython
-_replace_simple = operator.methodcaller("group", 1)
-
-
-def _replace_unicode(match: re.Match[str]) -> str:
+if hasattr(operator, 'methodcaller'):
+ # Python 2.6+
+ _replace_simple = operator.methodcaller('group', 1)
+else:
+ def _replace_simple(match):
+ return match.group(1)
+
+def _replace_unicode(match):
codepoint = int(match.group(1), 16)
if codepoint > sys.maxunicode:
codepoint = 0xFFFD
- return chr(codepoint)
+ return _unichr(codepoint)
-def unescape_ident(value: str) -> str:
+def unescape_ident(value):
value = _sub_unicode_escape(_replace_unicode, value)
- return _sub_simple_escape(_replace_simple, value)
+ value = _sub_simple_escape(_replace_simple, value)
+ return value
-def tokenize(s: str) -> Iterator[Token]:
+def tokenize(s):
pos = 0
len_s = len(s)
while pos < len_s:
match = _match_whitespace(s, pos=pos)
if match:
- yield Token("S", " ", pos)
+ yield Token('S', ' ', pos)
pos = match.end()
continue
match = _match_ident(s, pos=pos)
if match:
- value = _sub_simple_escape(
- _replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
- )
- yield Token("IDENT", value, pos)
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode, match.group()))
+ yield Token('IDENT', value, pos)
pos = match.end()
continue
match = _match_hash(s, pos=pos)
if match:
- value = _sub_simple_escape(
- _replace_simple,
- _sub_unicode_escape(_replace_unicode, match.group()[1:]),
- )
- yield Token("HASH", value, pos)
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode, match.group()[1:]))
+ yield Token('HASH', value, pos)
pos = match.end()
continue
quote = s[pos]
if quote in _match_string_by_quote:
match = _match_string_by_quote[quote](s, pos=pos + 1)
- assert match, "Should have found at least an empty match"
+ assert match, 'Should have found at least an empty match'
end_pos = match.end()
if end_pos == len_s:
- raise SelectorSyntaxError(f"Unclosed string at {pos}")
+ raise SelectorSyntaxError('Unclosed string at %s' % pos)
if s[end_pos] != quote:
- raise SelectorSyntaxError(f"Invalid string at {pos}")
- value = _sub_simple_escape(
- _replace_simple,
- _sub_unicode_escape(
- _replace_unicode, _sub_newline_escape("", match.group())
- ),
- )
- yield Token("STRING", value, pos)
+ raise SelectorSyntaxError('Invalid string at %s' % pos)
+ value = _sub_simple_escape(_replace_simple,
+ _sub_unicode_escape(_replace_unicode,
+ _sub_newline_escape('', match.group())))
+ yield Token('STRING', value, pos)
pos = end_pos + 1
continue
match = _match_number(s, pos=pos)
if match:
value = match.group()
- yield Token("NUMBER", value, pos)
+ yield Token('NUMBER', value, pos)
pos = match.end()
continue
pos2 = pos + 2
- if s[pos:pos2] == "/*":
- pos = s.find("*/", pos2)
+ if s[pos:pos2] == '/*':
+ pos = s.find('*/', pos2)
if pos == -1:
pos = len_s
else:
pos += 2
continue
- yield Token("DELIM", s[pos], pos)
+ yield Token('DELIM', s[pos], pos)
pos += 1
assert pos == len_s
yield EOFToken(pos)
-class TokenStream:
- def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None:
- self.used: list[Token] = []
+class TokenStream(object):
+ def __init__(self, tokens, source=None):
+ self.used = []
self.tokens = iter(tokens)
self.source = source
- self.peeked: Token | None = None
+ self.peeked = None
self._peeking = False
- self.next_token = self.tokens.__next__
+ try:
+ self.next_token = self.tokens.next
+ except AttributeError:
+ # Python 3
+ self.next_token = self.tokens.__next__
- def next(self) -> Token:
+ def next(self):
if self._peeking:
self._peeking = False
- assert self.peeked is not None
self.used.append(self.peeked)
return self.peeked
- next_ = self.next_token()
- self.used.append(next_)
- return next_
+ else:
+ next = self.next_token()
+ self.used.append(next)
+ return next
- def peek(self) -> Token:
+ def peek(self):
if not self._peeking:
self.peeked = self.next_token()
self._peeking = True
- assert self.peeked is not None
return self.peeked
- def next_ident(self) -> str:
- next_ = self.next()
- if next_.type != "IDENT":
- raise SelectorSyntaxError(f"Expected ident, got {next_}")
- return cast("str", next_.value)
-
- def next_ident_or_star(self) -> str | None:
- next_ = self.next()
- if next_.type == "IDENT":
- return next_.value
- if next_ == ("DELIM", "*"):
+ def next_ident(self):
+ next = self.next()
+ if next.type != 'IDENT':
+ raise SelectorSyntaxError('Expected ident, got %s' % (next,))
+ return next.value
+
+ def next_ident_or_star(self):
+ next = self.next()
+ if next.type == 'IDENT':
+ return next.value
+ elif next == ('DELIM', '*'):
return None
- raise SelectorSyntaxError(f"Expected ident or '*', got {next_}")
+ else:
+ raise SelectorSyntaxError(
+ "Expected ident or '*', got %s" % (next,))
- def skip_whitespace(self) -> None:
+ def skip_whitespace(self):
peek = self.peek()
- if peek.type == "S":
+ if peek.type == 'S':
self.next()
diff --git a/cssselect/py.typed b/cssselect/py.typed
deleted file mode 100644
index e69de29..0000000
diff --git a/cssselect/tests.py b/cssselect/tests.py
new file mode 100755
index 0000000..a1fdc9e
--- /dev/null
+++ b/cssselect/tests.py
@@ -0,0 +1,1166 @@
+#!/usr/bin/env python
+# coding: utf8
+"""
+ Tests for cssselect
+ ===================
+
+ These tests can be run either by py.test or by the standard library's
+ unittest. They use plain ``assert`` statements and do little reporting
+ themselves in case of failure.
+
+ Use py.test to get fancy error reporting and assert introspection.
+
+
+ :copyright: (c) 2007-2012 Ian Bicking and contributors.
+ See AUTHORS for more details.
+ :license: BSD, see LICENSE for more details.
+
+"""
+
+import sys
+import unittest
+
+from lxml import etree, html
+from cssselect import (parse, GenericTranslator, HTMLTranslator,
+ SelectorSyntaxError, ExpressionError)
+from cssselect.parser import (tokenize, parse_series, _unicode,
+ FunctionalPseudoElement)
+from cssselect.xpath import _unicode_safe_getattr, XPathExpr
+
+
+if sys.version_info[0] < 3:
+ # Python 2
+ def u(text):
+ return text.decode('utf8')
+else:
+ # Python 3
+ def u(text):
+ return text
+
+
+class TestCssselect(unittest.TestCase):
+ def test_tokenizer(self):
+ tokens = [
+ _unicode(item) for item in tokenize(
+ u(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)'))]
+ assert tokens == [
+ u(""),
+ "",
+ "' at 5>",
+ "",
+ # the no-break space is not whitespace in CSS
+ u(""), # f\xa0
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ]
+
+ def test_parser(self):
+ def repr_parse(css):
+ selectors = parse(css)
+ for selector in selectors:
+ assert selector.pseudo_element is None
+ return [repr(selector.parsed_tree).replace("(u'", "('")
+ for selector in selectors]
+
+ def parse_many(first, *others):
+ result = repr_parse(first)
+ for other in others:
+ assert repr_parse(other) == result
+ return result
+
+ assert parse_many('*') == ['Element[*]']
+ assert parse_many('*|*') == ['Element[*]']
+ assert parse_many('*|foo') == ['Element[foo]']
+ assert parse_many('foo|*') == ['Element[foo|*]']
+ assert parse_many('foo|bar') == ['Element[foo|bar]']
+ # This will never match, but it is valid:
+ assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
+ assert parse_many(
+ 'div>.foo',
+ 'div> .foo',
+ 'div >.foo',
+ 'div > .foo',
+ 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
+ ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
+ assert parse_many('td.foo,.bar',
+ 'td.foo, .bar',
+ 'td.foo\t\r\n\f ,\t\r\n\f .bar'
+ ) == [
+ 'Class[Element[td].foo]',
+ 'Class[Element[*].bar]'
+ ]
+ assert parse_many('div, td.foo, div.bar span') == [
+ 'Element[div]',
+ 'Class[Element[td].foo]',
+ 'CombinedSelector[Class[Element[div].bar] '
+ ' Element[span]]']
+ assert parse_many('div > p') == [
+ 'CombinedSelector[Element[div] > Element[p]]']
+ assert parse_many('td:first') == [
+ 'Pseudo[Element[td]:first]']
+ assert parse_many('td:first') == [
+ 'Pseudo[Element[td]:first]']
+ assert parse_many('td :first') == [
+ 'CombinedSelector[Element[td] '
+ ' Pseudo[Element[*]:first]]']
+ assert parse_many('td :first') == [
+ 'CombinedSelector[Element[td] '
+ ' Pseudo[Element[*]:first]]']
+ assert parse_many('a[name]', 'a[ name\t]') == [
+ 'Attrib[Element[a][name]]']
+ assert parse_many('a [name]') == [
+ 'CombinedSelector[Element[a] Attrib[Element[*][name]]]']
+ assert parse_many('a[rel="include"]', 'a[rel = include]') == [
+ "Attrib[Element[a][rel = 'include']]"]
+ assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
+ "Attrib[Element[a][hreflang |= 'en']]"]
+ assert parse_many('div:nth-child(10)') == [
+ "Function[Element[div]:nth-child(['10'])]"]
+ assert parse_many(':nth-child(2n+2)') == [
+ "Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
+ assert parse_many('div:nth-of-type(10)') == [
+ "Function[Element[div]:nth-of-type(['10'])]"]
+ assert parse_many('div div:nth-of-type(10) .aclass') == [
+ 'CombinedSelector[CombinedSelector[Element[div] '
+ "Function[Element[div]:nth-of-type(['10'])]] "
+ ' Class[Element[*].aclass]]']
+ assert parse_many('label:only') == [
+ 'Pseudo[Element[label]:only]']
+ assert parse_many('a:lang(fr)') == [
+ "Function[Element[a]:lang(['fr'])]"]
+ assert parse_many('div:contains("foo")') == [
+ "Function[Element[div]:contains(['foo'])]"]
+ assert parse_many('div#foobar') == [
+ 'Hash[Element[div]#foobar]']
+ assert parse_many('div:not(div.foo)') == [
+ 'Negation[Element[div]:not(Class[Element[div].foo])]']
+ assert parse_many('td ~ th') == [
+ 'CombinedSelector[Element[td] ~ Element[th]]']
+
+ def test_pseudo_elements(self):
+ def parse_pseudo(css):
+ result = []
+ for selector in parse(css):
+ pseudo = selector.pseudo_element
+ pseudo = _unicode(pseudo) if pseudo else pseudo
+ # No Symbol here
+ assert pseudo is None or type(pseudo) is _unicode
+ selector = repr(selector.parsed_tree).replace("(u'", "('")
+ result.append((selector, pseudo))
+ return result
+
+ def parse_one(css):
+ result = parse_pseudo(css)
+ assert len(result) == 1
+ return result[0]
+
+ assert parse_one('foo') == ('Element[foo]', None)
+ assert parse_one('*') == ('Element[*]', None)
+ assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None)
+
+ # Special cases for CSS 2.1 pseudo-elements
+ assert parse_one(':BEfore') == ('Element[*]', 'before')
+ assert parse_one(':aftER') == ('Element[*]', 'after')
+ assert parse_one(':First-Line') == ('Element[*]', 'first-line')
+ assert parse_one(':First-Letter') == ('Element[*]', 'first-letter')
+
+ assert parse_one('::befoRE') == ('Element[*]', 'before')
+ assert parse_one('::AFter') == ('Element[*]', 'after')
+ assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
+ assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')
+
+ assert parse_one('::text-content') == ('Element[*]', 'text-content')
+ assert parse_one('::attr(name)') == (
+ "Element[*]", "FunctionalPseudoElement[::attr(['name'])]")
+
+ assert parse_one('::Selection') == ('Element[*]', 'selection')
+ assert parse_one('foo:after') == ('Element[foo]', 'after')
+ assert parse_one('foo::selection') == ('Element[foo]', 'selection')
+ assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == (
+ 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
+ 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
+ 'selection')
+
+ parse_pseudo('foo:before, bar, baz:after') == [
+ ('Element[foo]', 'before'),
+ ('Element[bar]', None),
+ ('Element[baz]', 'after')]
+
+ # Special cases for CSS 2.1 pseudo-elements are ignored by default
+ for pseudo in ('after', 'before', 'first-line', 'first-letter'):
+ selector, = parse('e:%s' % pseudo)
+ assert selector.pseudo_element == pseudo
+ assert GenericTranslator().selector_to_xpath(selector, prefix='') == "e"
+
+ # Pseudo Elements are ignored by default, but if allowed they are not
+ # supported by GenericTranslator
+ tr = GenericTranslator()
+ selector, = parse('e::foo')
+ assert selector.pseudo_element == 'foo'
+ assert tr.selector_to_xpath(selector, prefix='') == "e"
+ self.assertRaises(ExpressionError, tr.selector_to_xpath, selector,
+ translate_pseudo_elements=True)
+
+ def test_specificity(self):
+ def specificity(css):
+ selectors = parse(css)
+ assert len(selectors) == 1
+ return selectors[0].specificity()
+
+ assert specificity('*') == (0, 0, 0)
+ assert specificity(' foo') == (0, 0, 1)
+ assert specificity(':empty ') == (0, 1, 0)
+ assert specificity(':before') == (0, 0, 1)
+ assert specificity('*:before') == (0, 0, 1)
+ assert specificity(':nth-child(2)') == (0, 1, 0)
+ assert specificity('.bar') == (0, 1, 0)
+ assert specificity('[baz]') == (0, 1, 0)
+ assert specificity('[baz="4"]') == (0, 1, 0)
+ assert specificity('[baz^="4"]') == (0, 1, 0)
+ assert specificity('#lipsum') == (1, 0, 0)
+
+ assert specificity(':not(*)') == (0, 0, 0)
+ assert specificity(':not(foo)') == (0, 0, 1)
+ assert specificity(':not(.foo)') == (0, 1, 0)
+ assert specificity(':not([foo])') == (0, 1, 0)
+ assert specificity(':not(:empty)') == (0, 1, 0)
+ assert specificity(':not(#foo)') == (1, 0, 0)
+
+ assert specificity('foo:empty') == (0, 1, 1)
+ assert specificity('foo:before') == (0, 0, 2)
+ assert specificity('foo::before') == (0, 0, 2)
+ assert specificity('foo:empty::before') == (0, 1, 2)
+
+ assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
+ ) == (2, 1, 3)
+
+ def test_parse_errors(self):
+ def get_error(css):
+ try:
+ parse(css)
+ except SelectorSyntaxError:
+ # Py2, Py3, ...
+ return str(sys.exc_info()[1]).replace("(u'", "('")
+
+ assert get_error('attributes(href)/html/body/a') == (
+ "Expected selector, got ")
+ assert get_error('attributes(href)') == (
+ "Expected selector, got ")
+ assert get_error('html/body/a') == (
+ "Expected selector, got ")
+ assert get_error(' ') == (
+ "Expected selector, got ")
+ assert get_error('div, ') == (
+ "Expected selector, got ")
+ assert get_error(' , div') == (
+ "Expected selector, got ")
+ assert get_error('p, , div') == (
+ "Expected selector, got ")
+ assert get_error('div > ') == (
+ "Expected selector, got ")
+ assert get_error(' > div') == (
+ "Expected selector, got ' at 2>")
+ assert get_error('foo|#bar') == (
+ "Expected ident or '*', got ")
+ assert get_error('#.foo') == (
+ "Expected selector, got ")
+ assert get_error('.#foo') == (
+ "Expected ident, got ")
+ assert get_error(':#foo') == (
+ "Expected ident, got ")
+ assert get_error('[*]') == (
+ "Expected '|', got ")
+ assert get_error('[foo|]') == (
+ "Expected ident, got ")
+ assert get_error('[#]') == (
+ "Expected ident or '*', got ")
+ assert get_error('[foo=#]') == (
+ "Expected string or ident, got ")
+ assert get_error('[href]a') == (
+ "Expected selector, got ")
+ assert get_error('[rel=stylesheet]') == None
+ assert get_error('[rel:stylesheet]') == (
+ "Operator expected, got ")
+ assert get_error('[rel=stylesheet') == (
+ "Expected ']', got ")
+ assert get_error(':lang(fr)') == None
+ assert get_error(':lang(fr') == (
+ "Expected an argument, got ")
+ assert get_error(':contains("foo') == (
+ "Unclosed string at 10")
+ assert get_error('foo!') == (
+ "Expected selector, got ")
+
+ # Mis-placed pseudo-elements
+ assert get_error('a:before:empty') == (
+ "Got pseudo-element ::before not at the end of a selector")
+ assert get_error('li:before a') == (
+ "Got pseudo-element ::before not at the end of a selector")
+ assert get_error(':not(:before)') == (
+ "Got pseudo-element ::before inside :not() at 12")
+ assert get_error(':not(:not(a))') == (
+ "Got nested :not()")
+
+ def test_translation(self):
+ def xpath(css):
+ return _unicode(GenericTranslator().css_to_xpath(css, prefix=''))
+
+ assert xpath('*') == "*"
+ assert xpath('e') == "e"
+ assert xpath('*|e') == "e"
+ assert xpath('e|f') == "e:f"
+ assert xpath('e[foo]') == "e[@foo]"
+ assert xpath('e[foo|bar]') == "e[@foo:bar]"
+ assert xpath('e[foo="bar"]') == "e[@foo = 'bar']"
+ assert xpath('e[foo~="bar"]') == (
+ "e[@foo and contains("
+ "concat(' ', normalize-space(@foo), ' '), ' bar ')]")
+ assert xpath('e[foo^="bar"]') == (
+ "e[@foo and starts-with(@foo, 'bar')]")
+ assert xpath('e[foo$="bar"]') == (
+ "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']")
+ assert xpath('e[foo*="bar"]') == (
+ "e[@foo and contains(@foo, 'bar')]")
+ assert xpath('e[hreflang|="en"]') == (
+ "e[@hreflang and ("
+ "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")
+ assert xpath('e:nth-child(1)') == (
+ "*/*[name() = 'e' and (position() = 1)]")
+ assert xpath('e:nth-last-child(1)') == (
+ "*/*[name() = 'e' and (position() = last() - 1)]")
+ assert xpath('e:nth-last-child(2n+2)') == (
+ "*/*[name() = 'e' and ("
+ "(position() +2) mod -2 = 0 and position() < (last() -2))]")
+ assert xpath('e:nth-of-type(1)') == (
+ "*/e[position() = 1]")
+ assert xpath('e:nth-last-of-type(1)') == (
+ "*/e[position() = last() - 1]")
+ assert xpath('e:nth-last-of-type(1)') == (
+ "*/e[position() = last() - 1]")
+ assert xpath('div e:nth-last-of-type(1) .aclass') == (
+ "div/descendant-or-self::*/e[position() = last() - 1]"
+ "/descendant-or-self::*/*[@class and contains("
+ "concat(' ', normalize-space(@class), ' '), ' aclass ')]")
+ assert xpath('e:first-child') == (
+ "*/*[name() = 'e' and (position() = 1)]")
+ assert xpath('e:last-child') == (
+ "*/*[name() = 'e' and (position() = last())]")
+ assert xpath('e:first-of-type') == (
+ "*/e[position() = 1]")
+ assert xpath('e:last-of-type') == (
+ "*/e[position() = last()]")
+ assert xpath('e:only-child') == (
+ "*/*[name() = 'e' and (last() = 1)]")
+ assert xpath('e:only-of-type') == (
+ "e[last() = 1]")
+ assert xpath('e:empty') == (
+ "e[not(*) and not(string-length())]")
+ assert xpath('e:EmPTY') == (
+ "e[not(*) and not(string-length())]")
+ assert xpath('e:root') == (
+ "e[not(parent::*)]")
+ assert xpath('e:hover') == (
+ "e[0]") # never matches
+ assert xpath('e:contains("foo")') == (
+ "e[contains(., 'foo')]")
+ assert xpath('e:ConTains(foo)') == (
+ "e[contains(., 'foo')]")
+ assert xpath('e.warning') == (
+ "e[@class and contains("
+ "concat(' ', normalize-space(@class), ' '), ' warning ')]")
+ assert xpath('e#myid') == (
+ "e[@id = 'myid']")
+ assert xpath('e:not(:nth-child(odd))') == (
+ "e[not((position() -1) mod 2 = 0 and position() >= 1)]")
+ assert xpath('e:nOT(*)') == (
+ "e[0]") # never matches
+ assert xpath('e f') == (
+ "e/descendant-or-self::*/f")
+ assert xpath('e > f') == (
+ "e/f")
+ assert xpath('e + f') == (
+ "e/following-sibling::*[name() = 'f' and (position() = 1)]")
+ assert xpath('e ~ f') == (
+ "e/following-sibling::f")
+ assert xpath('div#container p') == (
+ "div[@id = 'container']/descendant-or-self::*/p")
+
+ # Invalid characters in XPath element names
+ assert xpath(r'di\a0 v') == (
+ u("*[name() = 'di v']")) # di\xa0v
+ assert xpath(r'di\[v') == (
+ "*[name() = 'di[v']")
+ assert xpath(r'[h\a0 ref]') == (
+ u("*[attribute::*[name() = 'h ref']]")) # h\xa0ref
+ assert xpath(r'[h\]ref]') == (
+ "*[attribute::*[name() = 'h]ref']]")
+
+ self.assertRaises(ExpressionError, xpath, u(':fİrst-child'))
+ self.assertRaises(ExpressionError, xpath, ':first-of-type')
+ self.assertRaises(ExpressionError, xpath, ':only-of-type')
+ self.assertRaises(ExpressionError, xpath, ':last-of-type')
+ self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)')
+ self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)')
+ self.assertRaises(ExpressionError, xpath, ':nth-child(n-)')
+ self.assertRaises(ExpressionError, xpath, ':after')
+ self.assertRaises(ExpressionError, xpath, ':lorem-ipsum')
+ self.assertRaises(ExpressionError, xpath, ':lorem(ipsum)')
+ self.assertRaises(ExpressionError, xpath, '::lorem-ipsum')
+ self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4)
+ self.assertRaises(TypeError, GenericTranslator().selector_to_xpath,
+ 'foo')
+
+ def test_unicode(self):
+ if sys.version_info[0] < 3:
+ css = '.a\xc1b'.decode('ISO-8859-1')
+ else:
+ css = '.a\xc1b'
+
+ xpath = GenericTranslator().css_to_xpath(css)
+ assert css[1:] in xpath
+ xpath = xpath.encode('ascii', 'xmlcharrefreplace').decode('ASCII')
+ assert xpath == (
+ "descendant-or-self::*[@class and contains("
+ "concat(' ', normalize-space(@class), ' '), ' aÁb ')]")
+
+ def test_quoting(self):
+ css_to_xpath = GenericTranslator().css_to_xpath
+ assert css_to_xpath('*[aval="\'"]') == (
+ '''descendant-or-self::*[@aval = "'"]''')
+ assert css_to_xpath('*[aval="\'\'\'"]') == (
+ """descendant-or-self::*[@aval = "'''"]""")
+ assert css_to_xpath('*[aval=\'"\']') == (
+ '''descendant-or-self::*[@aval = '"']''')
+ assert css_to_xpath('*[aval=\'"""\']') == (
+ '''descendant-or-self::*[@aval = '"""']''')
+
+ def test_unicode_escapes(self):
+ # \22 == '"' \20 == ' '
+ css_to_xpath = GenericTranslator().css_to_xpath
+ assert css_to_xpath(r'*[aval="\'\22\'"]') == (
+ '''descendant-or-self::*[@aval = concat("'",'"',"'")]''')
+ assert css_to_xpath(r'*[aval="\'\22 2\'"]') == (
+ '''descendant-or-self::*[@aval = concat("'",'"2',"'")]''')
+ assert css_to_xpath(r'*[aval="\'\20 \'"]') == (
+ '''descendant-or-self::*[@aval = "' '"]''')
+ assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == (
+ '''descendant-or-self::*[@aval = "' '"]''')
+
+ def test_xpath_pseudo_elements(self):
+ class CustomTranslator(GenericTranslator):
+ def xpath_pseudo_element(self, xpath, pseudo_element):
+ if isinstance(pseudo_element, FunctionalPseudoElement):
+ method = 'xpath_%s_functional_pseudo_element' % (
+ pseudo_element.name.replace('-', '_'))
+ method = _unicode_safe_getattr(self, method, None)
+ if not method:
+ raise ExpressionError(
+ "The functional pseudo-element ::%s() is unknown"
+ % pseudo_element.name)
+ xpath = method(xpath, pseudo_element.arguments)
+ else:
+ method = 'xpath_%s_simple_pseudo_element' % (
+ pseudo_element.replace('-', '_'))
+ method = _unicode_safe_getattr(self, method, None)
+ if not method:
+ raise ExpressionError(
+ "The pseudo-element ::%s is unknown"
+ % pseudo_element)
+ xpath = method(xpath)
+ return xpath
+
+ # functional pseudo-class:
+ # elements that have a certain number of attributes
+ def xpath_nb_attr_function(self, xpath, function):
+ nb_attributes = int(function.arguments[0].value)
+ return xpath.add_condition(
+ "count(@*)=%d" % nb_attributes)
+
+ # pseudo-class:
+ # elements that have 5 attributes
+ def xpath_five_attributes_pseudo(self, xpath):
+ return xpath.add_condition("count(@*)=5")
+
+ # functional pseudo-element:
+ # element's attribute by name
+ def xpath_attr_functional_pseudo_element(self, xpath, arguments):
+ attribute_name = arguments[0].value
+ other = XPathExpr('@%s' % attribute_name, '', )
+ return xpath.join('/', other)
+
+ # pseudo-element:
+ # element's text() nodes
+ def xpath_text_node_simple_pseudo_element(self, xpath):
+ other = XPathExpr('text()', '', )
+ return xpath.join('/', other)
+
+ # pseudo-element:
+ # element's href attribute
+ def xpath_attr_href_simple_pseudo_element(self, xpath):
+ other = XPathExpr('@href', '', )
+ return xpath.join('/', other)
+
+ def xpath(css):
+ return _unicode(CustomTranslator().css_to_xpath(css))
+
+ assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]"
+ assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]"
+ assert xpath('::attr(href)') == "descendant-or-self::*/@href"
+ assert xpath('::text-node') == "descendant-or-self::*/text()"
+ assert xpath('::attr-href') == "descendant-or-self::*/@href"
+ assert xpath('p img::attr(src)') == (
+ "descendant-or-self::p/descendant-or-self::*/img/@src")
+
+ def test_series(self):
+ def series(css):
+ selector, = parse(':nth-child(%s)' % css)
+ args = selector.parsed_tree.arguments
+ try:
+ return parse_series(args)
+ except ValueError:
+ return None
+
+ assert series('1n+3') == (1, 3)
+ assert series('1n +3') == (1, 3)
+ assert series('1n + 3') == (1, 3)
+ assert series('1n+ 3') == (1, 3)
+ assert series('1n-3') == (1, -3)
+ assert series('1n -3') == (1, -3)
+ assert series('1n - 3') == (1, -3)
+ assert series('1n- 3') == (1, -3)
+ assert series('n-5') == (1, -5)
+ assert series('odd') == (2, 1)
+ assert series('even') == (2, 0)
+ assert series('3n') == (3, 0)
+ assert series('n') == (1, 0)
+ assert series('+n') == (1, 0)
+ assert series('-n') == (-1, 0)
+ assert series('5') == (0, 5)
+ assert series('foo') == None
+ assert series('n+') == None
+
+ def test_lang(self):
+ document = etree.fromstring(XMLLANG_IDS)
+ sort_key = dict(
+ (el, count) for count, el in enumerate(document.getiterator())
+ ).__getitem__
+ css_to_xpath = GenericTranslator().css_to_xpath
+
+ def langid(selector):
+ xpath = css_to_xpath(selector)
+ items = document.xpath(xpath)
+ items.sort(key=sort_key)
+ return [element.get('id', 'nil') for element in items]
+
+ assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth']
+ assert langid(':lang("en-us")') == ['second', 'fourth']
+ assert langid(':lang(en-nz)') == ['third']
+ assert langid(':lang(fr)') == ['fifth']
+ assert langid(':lang(ru)') == ['sixth']
+ assert langid(":lang('ZH')") == ['eighth']
+ assert langid(':lang(de) :lang(zh)') == ['eighth']
+ assert langid(':lang(en), :lang(zh)') == [
+ 'first', 'second', 'third', 'fourth', 'eighth']
+ assert langid(':lang(es)') == []
+
+ def test_select(self):
+ document = etree.fromstring(HTML_IDS)
+ sort_key = dict(
+ (el, count) for count, el in enumerate(document.getiterator())
+ ).__getitem__
+ css_to_xpath = GenericTranslator().css_to_xpath
+ html_css_to_xpath = HTMLTranslator().css_to_xpath
+
+ def select_ids(selector, html_only):
+ xpath = css_to_xpath(selector)
+ items = document.xpath(xpath)
+ if html_only:
+ assert items == []
+ xpath = html_css_to_xpath(selector)
+ items = document.xpath(xpath)
+ items.sort(key=sort_key)
+ return [element.get('id', 'nil') for element in items]
+
+ def pcss(main, *selectors, **kwargs):
+ html_only = kwargs.pop('html_only', False)
+ result = select_ids(main, html_only)
+ for selector in selectors:
+ assert select_ids(selector, html_only) == result
+ return result
+
+ all_ids = pcss('*')
+ assert all_ids[:6] == [
+ 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div']
+ assert all_ids[-1:] == ['foobar-span']
+ assert pcss('div') == ['outer-div', 'li-div', 'foobar-div']
+ assert pcss('DIV', html_only=True) == [
+ 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML
+ assert pcss('div div') == ['li-div']
+ assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div']
+ assert pcss('a[name]') == ['name-anchor']
+ assert pcss('a[NAme]', html_only=True) == [
+ 'name-anchor'] # case-insensitive in HTML:
+ assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor']
+ assert pcss('a[rel="tag"]') == ['tag-anchor']
+ assert pcss('a[href*="localhost"]') == ['tag-anchor']
+ assert pcss('a[href*=""]') == []
+ assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor']
+ assert pcss('a[href^="http:"]') == ['tag-anchor']
+ assert pcss('a[href^=""]') == []
+ assert pcss('a[href$="org"]') == ['nofollow-anchor']
+ assert pcss('a[href$=""]') == []
+ assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
+ 'foobar-div']
+ assert pcss('[foobar~="ab bc"]',
+ '[foobar~=""]', '[foobar~=" \t"]') == []
+ assert pcss('div[foobar~="cd"]') == []
+ assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
+ # Attribute values are case sensitive
+ assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
+ assert pcss('*[lang|="e"]') == []
+ # ... :lang() is not.
+ assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
+ 'second-li', 'li-div']
+ assert pcss(':lang("e")', html_only=True) == []
+ assert pcss('li:nth-child(3)') == ['third-li']
+ assert pcss('li:nth-child(10)') == []
+ assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
+ 'li:nth-child(2n+0)') == [
+ 'second-li', 'fourth-li', 'sixth-li']
+ assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [
+ 'first-li', 'third-li', 'fifth-li', 'seventh-li']
+ assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li']
+ # FIXME: I'm not 100% sure this is right:
+ assert pcss('li:nth-child(3n+1)') == [
+ 'first-li', 'fourth-li', 'seventh-li']
+ assert pcss('li:nth-last-child(0)') == [
+ 'seventh-li']
+ assert pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') == [
+ 'second-li', 'fourth-li', 'sixth-li']
+ assert pcss('li:nth-last-child(2n+2)') == ['second-li', 'fourth-li']
+ assert pcss('ol:first-of-type') == ['first-ol']
+ assert pcss('ol:nth-child(1)') == []
+ assert pcss('ol:nth-of-type(2)') == ['second-ol']
+ # FIXME: like above', '(1) or (2)?
+ assert pcss('ol:nth-last-of-type(1)') == ['first-ol']
+ assert pcss('span:only-child') == ['foobar-span']
+ assert pcss('li div:only-child') == ['li-div']
+ assert pcss('div *:only-child') == ['li-div', 'foobar-span']
+ self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
+ assert pcss('p:only-of-type') == ['paragraph']
+ assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
+ assert pcss('li:empty') == [
+ 'third-li', 'fourth-li', 'fifth-li', 'sixth-li']
+ assert pcss(':root', 'html:root') == ['html']
+ assert pcss('li:root', '* :root') == []
+ assert pcss('*:contains("link")', ':CONtains("link")') == [
+ 'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor']
+ assert pcss('*:contains("LInk")') == [] # case sensitive
+ assert pcss('*:contains("e")') == [
+ 'html', 'nil', 'outer-div', 'first-ol', 'first-li',
+ 'paragraph', 'p-em']
+ assert pcss('*:contains("E")') == [] # case-sensitive
+ assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol']
+ assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li']
+ assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [
+ 'third-li', 'fourth-li']
+ assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li']
+ assert pcss('li div', 'li > div', 'div div') == ['li-div']
+ assert pcss('div > div') == []
+ assert pcss('div>.c', 'div > .c') == ['first-ol']
+ assert pcss('div + div') == ['foobar-div']
+ assert pcss('a ~ a') == ['tag-anchor', 'nofollow-anchor']
+ assert pcss('a[rel="tag"] ~ a') == ['nofollow-anchor']
+ assert pcss('ol#first-ol li:last-child') == ['seventh-li']
+ assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li']
+ assert pcss('#outer-div:first-child') == ['outer-div']
+ assert pcss('#outer-div :first-child') == [
+ 'name-anchor', 'first-li', 'li-div', 'p-b',
+ 'checkbox-fieldset-disabled', 'area-href']
+ assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor']
+ assert pcss(':not(*)') == []
+ assert pcss('a:not([href])') == ['name-anchor']
+ assert pcss('ol :Not(li[class])') == [
+ 'first-li', 'second-li', 'li-div',
+ 'fifth-li', 'sixth-li', 'seventh-li']
+ # Invalid characters in XPath element names, should not crash
+ assert pcss(r'di\a0 v', r'div\[') == []
+ assert pcss(r'[h\a0 ref]', r'[h\]ref]') == []
+
+ # HTML-specific
+ assert pcss(':link', html_only=True) == [
+ 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']
+ assert pcss(':visited', html_only=True) == []
+ assert pcss(':enabled', html_only=True) == [
+ 'link-href', 'tag-anchor', 'nofollow-anchor',
+ 'checkbox-unchecked', 'text-checked', 'checkbox-checked',
+ 'area-href']
+ assert pcss(':disabled', html_only=True) == [
+ 'checkbox-disabled', 'checkbox-disabled-checked', 'fieldset',
+ 'checkbox-fieldset-disabled']
+ assert pcss(':checked', html_only=True) == [
+ 'checkbox-checked', 'checkbox-disabled-checked']
+
+ def test_select_shakespeare(self):
+ document = html.document_fromstring(HTML_SHAKESPEARE)
+ body = document.xpath('//body')[0]
+ css_to_xpath = GenericTranslator().css_to_xpath
+
+ try:
+ basestring_ = basestring
+ except NameError:
+ basestring_ = (str, bytes)
+
+ def count(selector):
+ xpath = css_to_xpath(selector)
+ results = body.xpath(xpath)
+ assert not isinstance(results, basestring_)
+ found = set()
+ for item in results:
+ assert item not in found
+ found.add(item)
+ assert not isinstance(item, basestring_)
+ return len(results)
+
+ # Data borrowed from http://mootools.net/slickspeed/
+
+ ## Changed from original; probably because I'm only
+ ## searching the body.
+ #assert count('*') == 252
+ assert count('*') == 246
+ assert count('div:contains(CELIA)') == 26
+ assert count('div:only-child') == 22 # ?
+ assert count('div:nth-child(even)') == 106
+ assert count('div:nth-child(2n)') == 106
+ assert count('div:nth-child(odd)') == 137
+ assert count('div:nth-child(2n+1)') == 137
+ assert count('div:nth-child(n)') == 243
+ assert count('div:last-child') == 53
+ assert count('div:first-child') == 51
+ assert count('div > div') == 242
+ assert count('div + div') == 190
+ assert count('div ~ div') == 190
+ assert count('body') == 1
+ assert count('body div') == 243
+ assert count('div') == 243
+ assert count('div div') == 242
+ assert count('div div div') == 241
+ assert count('div, div, div') == 243
+ assert count('div, a, span') == 243
+ assert count('.dialog') == 51
+ assert count('div.dialog') == 51
+ assert count('div .dialog') == 51
+ assert count('div.character, div.dialog') == 99
+ assert count('div.direction.dialog') == 0
+ assert count('div.dialog.direction') == 0
+ assert count('div.dialog.scene') == 1
+ assert count('div.scene.scene') == 1
+ assert count('div.scene .scene') == 0
+ assert count('div.direction .dialog ') == 0
+ assert count('div .dialog .direction') == 4
+ assert count('div.dialog .dialog .direction') == 4
+ assert count('#speech5') == 1
+ assert count('div#speech5') == 1
+ assert count('div #speech5') == 1
+ assert count('div.scene div.dialog') == 49
+ assert count('div#scene1 div.dialog div') == 142
+ assert count('#scene1 #speech1') == 1
+ assert count('div[class]') == 103
+ assert count('div[class=dialog]') == 50
+ assert count('div[class^=dia]') == 51
+ assert count('div[class$=log]') == 50
+ assert count('div[class*=sce]') == 1
+ assert count('div[class|=dialog]') == 50 # ? Seems right
+ assert count('div[class!=madeup]') == 243 # ? Seems right
+ assert count('div[class~=dialog]') == 51 # ? Seems right
+
+XMLLANG_IDS = '''
+
+ a
+ b
+ c
+ d
+ e
+ f
+
+
+
+
+'''
+
+HTML_IDS = '''
+
+
+
+
+
+
+
+'''
+
+
+HTML_SHAKESPEARE = '''
+
+
+
+
+
+
+
+
+
As You Like It
+
+ by William Shakespeare
+
+
+
ACT I, SCENE III. A room in the palace.
+
+
Enter CELIA and ROSALIND
+
+
CELIA
+
+
Why, cousin! why, Rosalind! Cupid have mercy! not a word?
+
+
ROSALIND
+
+
Not one to throw at a dog.
+
+
CELIA
+
+
No, thy words are too precious to be cast away upon
+
curs; throw some of them at me; come, lame me with reasons.
+
+
ROSALIND
+
CELIA
+
+
But is all this for your father?
+
+
+
Then there were two cousins laid up; when the one
+
should be lamed with reasons and the other mad
+
without any.
+
+
ROSALIND
+
+
No, some of it is for my child's father. O, how
+
full of briers is this working-day world!
+
+
CELIA
+
+
They are but burs, cousin, thrown upon thee in
+
holiday foolery: if we walk not in the trodden
+
paths our very petticoats will catch them.
+
+
ROSALIND
+
+
I could shake them off my coat: these burs are in my heart.
+
+
CELIA
+
+
ROSALIND
+
+
I would try, if I could cry 'hem' and have him.
+
+
CELIA
+
+
Come, come, wrestle with thy affections.
+
+
ROSALIND
+
+
O, they take the part of a better wrestler than myself!
+
+
CELIA
+
+
O, a good wish upon you! you will try in time, in
+
despite of a fall. But, turning these jests out of
+
service, let us talk in good earnest: is it
+
possible, on such a sudden, you should fall into so
+
strong a liking with old Sir Rowland's youngest son?
+
+
ROSALIND
+
+
The duke my father loved his father dearly.
+
+
CELIA
+
+
Doth it therefore ensue that you should love his son
+
dearly? By this kind of chase, I should hate him,
+
for my father hated his father dearly; yet I hate
+
not Orlando.
+
+
ROSALIND
+
+
No, faith, hate him not, for my sake.
+
+
CELIA
+
+
Why should I not? doth he not deserve well?
+
+
ROSALIND
+
+
Let me love him for that, and do you love him
+
because I do. Look, here comes the duke.
+
+
CELIA
+
+
With his eyes full of anger.
+
Enter DUKE FREDERICK, with Lords
+
+
DUKE FREDERICK
+
+
Mistress, dispatch you with your safest haste
+
And get you from our court.
+
+
ROSALIND
+
+
DUKE FREDERICK
+
+
You, cousin
+
Within these ten days if that thou be'st found
+
So near our public court as twenty miles,
+
Thou diest for it.
+
+
ROSALIND
+
+
I do beseech your grace,
+
Let me the knowledge of my fault bear with me:
+
If with myself I hold intelligence
+
Or have acquaintance with mine own desires,
+
If that I do not dream or be not frantic,--
+
As I do trust I am not--then, dear uncle,
+
Never so much as in a thought unborn
+
Did I offend your highness.
+
+
DUKE FREDERICK
+
+
Thus do all traitors:
+
If their purgation did consist in words,
+
They are as innocent as grace itself:
+
Let it suffice thee that I trust thee not.
+
+
ROSALIND
+
+
Yet your mistrust cannot make me a traitor:
+
Tell me whereon the likelihood depends.
+
+
DUKE FREDERICK
+
+
Thou art thy father's daughter; there's enough.
+
+
ROSALIND
+
+
So was I when your highness took his dukedom;
+
So was I when your highness banish'd him:
+
Treason is not inherited, my lord;
+
Or, if we did derive it from our friends,
+
What's that to me? my father was no traitor:
+
Then, good my liege, mistake me not so much
+
To think my poverty is treacherous.
+
+
CELIA
+
+
Dear sovereign, hear me speak.
+
+
DUKE FREDERICK
+
+
Ay, Celia; we stay'd her for your sake,
+
Else had she with her father ranged along.
+
+
CELIA
+
+
I did not then entreat to have her stay;
+
It was your pleasure and your own remorse:
+
I was too young that time to value her;
+
But now I know her: if she be a traitor,
+
Why so am I; we still have slept together,
+
Rose at an instant, learn'd, play'd, eat together,
+
And wheresoever we went, like Juno's swans,
+
Still we went coupled and inseparable.
+
+
DUKE FREDERICK
+
+
She is too subtle for thee; and her smoothness,
+
Her very silence and her patience
+
Speak to the people, and they pity her.
+
Thou art a fool: she robs thee of thy name;
+
And thou wilt show more bright and seem more virtuous
+
When she is gone. Then open not thy lips:
+
Firm and irrevocable is my doom
+
Which I have pass'd upon her; she is banish'd.
+
+
CELIA
+
+
Pronounce that sentence then on me, my liege:
+
I cannot live out of her company.
+
+
DUKE FREDERICK
+
+
You are a fool. You, niece, provide yourself:
+
If you outstay the time, upon mine honour,
+
And in the greatness of my word, you die.
+
Exeunt DUKE FREDERICK and Lords
+
+
CELIA
+
+
O my poor Rosalind, whither wilt thou go?
+
Wilt thou change fathers? I will give thee mine.
+
I charge thee, be not thou more grieved than I am.
+
+
ROSALIND
+
+
CELIA
+
+
Thou hast not, cousin;
+
Prithee be cheerful: know'st thou not, the duke
+
Hath banish'd me, his daughter?
+
+
ROSALIND
+
+
CELIA
+
+
No, hath not? Rosalind lacks then the love
+
Which teacheth thee that thou and I am one:
+
Shall we be sunder'd? shall we part, sweet girl?
+
No: let my father seek another heir.
+
Therefore devise with me how we may fly,
+
Whither to go and what to bear with us;
+
And do not seek to take your change upon you,
+
To bear your griefs yourself and leave me out;
+
For, by this heaven, now at our sorrows pale,
+
Say what thou canst, I'll go along with thee.
+
+
ROSALIND
+
+
Why, whither shall we go?
+
+
CELIA
+
+
To seek my uncle in the forest of Arden.
+
+
ROSALIND
+
+
Alas, what danger will it be to us,
+
Maids as we are, to travel forth so far!
+
Beauty provoketh thieves sooner than gold.
+
+
CELIA
+
+
I'll put myself in poor and mean attire
+
And with a kind of umber smirch my face;
+
The like do you: so shall we pass along
+
And never stir assailants.
+
+
ROSALIND
+
+
Were it not better,
+
Because that I am more than common tall,
+
That I did suit me all points like a man?
+
A gallant curtle-axe upon my thigh,
+
A boar-spear in my hand; and--in my heart
+
Lie there what hidden woman's fear there will--
+
We'll have a swashing and a martial outside,
+
As many other mannish cowards have
+
That do outface it with their semblances.
+
+
CELIA
+
+
What shall I call thee when thou art a man?
+
+
ROSALIND
+
+
I'll have no worse a name than Jove's own page;
+
And therefore look you call me Ganymede.
+
But what will you be call'd?
+
+
CELIA
+
+
Something that hath a reference to my state
+
No longer Celia, but Aliena.
+
+
ROSALIND
+
+
But, cousin, what if we assay'd to steal
+
The clownish fool out of your father's court?
+
Would he not be a comfort to our travel?
+
+
CELIA
+
+
He'll go along o'er the wide world with me;
+
Leave me alone to woo him. Let's away,
+
And get our jewels and our wealth together,
+
Devise the fittest time and safest way
+
To hide us from pursuit that will be made
+
After my flight. Now go we in content
+
To liberty and not to banishment.
+
Exeunt
+
+
+
+
+
+
+'''
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
index 96eac3f..e5e74b9 100644
--- a/cssselect/xpath.py
+++ b/cssselect/xpath.py
@@ -1,46 +1,35 @@
+# coding: utf8
"""
-cssselect.xpath
-===============
+ cssselect.xpath
+ ===============
-Translation of parsed CSS selectors to XPath expressions.
+ Translation of parsed CSS selectors to XPath expressions.
-:copyright: (c) 2007-2012 Ian Bicking and contributors.
-See AUTHORS for more details.
-:license: BSD, see LICENSE for more details.
+ :copyright: (c) 2007-2012 Ian Bicking and contributors.
+ See AUTHORS for more details.
+ :license: BSD, see LICENSE for more details.
"""
-from __future__ import annotations
-
+import sys
import re
-from typing import TYPE_CHECKING, cast
-
-from cssselect.parser import (
- Attrib,
- Class,
- CombinedSelector,
- Element,
- Function,
- Hash,
- Matching,
- Negation,
- Pseudo,
- PseudoElement,
- Relation,
- Selector,
- SelectorError,
- SpecificityAdjustment,
- Tree,
- parse,
- parse_series,
-)
-
-if TYPE_CHECKING:
- from collections.abc import Callable
-
- # typing.Self requires Python 3.11
- from typing_extensions import Self
+
+from cssselect.parser import parse, parse_series, SelectorError
+
+
+if sys.version_info[0] < 3:
+ _basestring = basestring
+ _unicode = unicode
+else:
+ _basestring = str
+ _unicode = str
+
+
+def _unicode_safe_getattr(obj, name, default=None):
+ # getattr() with a non-ASCII name fails on Python 2.x
+ name = name.encode('ascii', 'replace').decode('ascii')
+ return getattr(obj, name, default)
class ExpressionError(SelectorError, RuntimeError):
@@ -49,72 +38,52 @@ class ExpressionError(SelectorError, RuntimeError):
#### XPath Helpers
+class XPathExpr(object):
-class XPathExpr:
- def __init__(
- self,
- path: str = "",
- element: str = "*",
- condition: str = "",
- star_prefix: bool = False,
- ) -> None:
+ def __init__(self, path='', element='*', condition='', star_prefix=False):
self.path = path
self.element = element
self.condition = condition
- def __str__(self) -> str:
- path = str(self.path) + str(self.element)
+ def __str__(self):
+ path = _unicode(self.path) + _unicode(self.element)
if self.condition:
- path += f"[{self.condition}]"
+ path += '[%s]' % self.condition
return path
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}[{self}]"
+ def __repr__(self):
+ return '%s[%s]' % (self.__class__.__name__, self)
- def add_condition(self, condition: str, conjuction: str = "and") -> Self:
+ def add_condition(self, condition):
if self.condition:
- self.condition = f"({self.condition}) {conjuction} ({condition})"
+ self.condition = '%s and (%s)' % (self.condition, condition)
else:
self.condition = condition
return self
- def add_name_test(self) -> None:
- if self.element == "*":
+ def add_name_test(self):
+ if self.element == '*':
# We weren't doing a test anyway
return
- self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}")
- self.element = "*"
+ self.add_condition(
+ "name() = %s" % GenericTranslator.xpath_literal(self.element))
+ self.element = '*'
- def add_star_prefix(self) -> None:
+ def add_star_prefix(self):
"""
Append '*/' to the path to keep the context constrained
to a single parent.
"""
- self.path += "*/"
-
- def join(
- self,
- combiner: str,
- other: XPathExpr,
- closing_combiner: str | None = None,
- has_inner_condition: bool = False,
- ) -> Self:
- path = str(self) + combiner
+ self.path += '*/'
+
+ def join(self, combiner, other):
+ path = _unicode(self) + combiner
# Any "star prefix" is redundant when joining.
- if other.path != "*/":
+ if other.path != '*/':
path += other.path
self.path = path
- if not has_inner_condition:
- self.element = (
- other.element + closing_combiner if closing_combiner else other.element
- )
- self.condition = other.condition
- else:
- self.element = other.element
- if other.condition:
- self.element += "[" + other.condition + "]"
- if closing_combiner:
- self.element += closing_combiner
+ self.element = other.element
+ self.condition = other.condition
return self
@@ -123,16 +92,15 @@ def join(
# The spec is actually more permissive than that, but don’t bother.
# This is just for the fast path.
# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
-is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
+is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
# Test that the string is not empty and does not contain whitespace
-is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
+is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
#### Translation
-
-class GenericTranslator:
+class GenericTranslator(object):
"""
Translator for "generic" XML documents.
@@ -140,44 +108,44 @@ class GenericTranslator:
of element names and attribute names.
"""
-
+
####
#### HERE BE DRAGONS
####
#### You are welcome to hook into this to change some behavior,
#### but do so at your own risks.
- #### Until it has received a lot more work and review,
+ #### Until is has recieved a lot more work and review,
#### I reserve the right to change this API in backward-incompatible ways
#### with any minor version of cssselect.
- #### See https://github.com/scrapy/cssselect/pull/22
+ #### See https://github.com/SimonSapin/cssselect/pull/22
#### -- Simon Sapin.
####
combinator_mapping = {
- " ": "descendant",
- ">": "child",
- "+": "direct_adjacent",
- "~": "indirect_adjacent",
+ ' ': 'descendant',
+ '>': 'child',
+ '+': 'direct_adjacent',
+ '~': 'indirect_adjacent',
}
attribute_operator_mapping = {
- "exists": "exists",
- "=": "equals",
- "~=": "includes",
- "|=": "dashmatch",
- "^=": "prefixmatch",
- "$=": "suffixmatch",
- "*=": "substringmatch",
- "!=": "different", # XXX Not in Level 3 but meh
+ 'exists': 'exists',
+ '=': 'equals',
+ '~=': 'includes',
+ '|=': 'dashmatch',
+ '^=': 'prefixmatch',
+ '$=': 'suffixmatch',
+ '*=': 'substringmatch',
+ '!=': 'different', # XXX Not in Level 3 but meh
}
#: The attribute used for ID selectors depends on the document language:
#: http://www.w3.org/TR/selectors/#id-selectors
- id_attribute = "id"
+ id_attribute = 'id'
#: The attribute used for ``:lang()`` depends on the document language:
#: http://www.w3.org/TR/selectors/#lang-pseudo
- lang_attribute = "xml:lang"
+ lang_attribute = 'xml:lang'
#: The case sensitivity of document language element names,
#: attribute names, and attribute values in selectors depends
@@ -200,36 +168,31 @@ class GenericTranslator:
# class used to represent and xpath expression
xpathexpr_cls = XPathExpr
- def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
+ def css_to_xpath(self, css, prefix='descendant-or-self::'):
"""Translate a *group of selectors* to XPath.
Pseudo-elements are not supported here since XPath only knows
about "real" elements.
:param css:
- A *group of selectors* as a string.
+ A *group of selectors* as an Unicode string.
:param prefix:
This string is prepended to the XPath expression for each selector.
The default makes selectors scoped to the context node’s subtree.
:raises:
- :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
+ :class:`SelectorSyntaxError` on invalid selectors,
:class:`ExpressionError` on unknown/unsupported selectors,
including pseudo-elements.
:returns:
- The equivalent XPath 1.0 expression as a string.
+ The equivalent XPath 1.0 expression as an Unicode string.
"""
- return " | ".join(
- self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
- for selector in parse(css)
- )
+ return ' | '.join(self.selector_to_xpath(selector, prefix,
+ translate_pseudo_elements=True)
+ for selector in parse(css))
- def selector_to_xpath(
- self,
- selector: Selector,
- prefix: str = "descendant-or-self::",
- translate_pseudo_elements: bool = False,
- ) -> str:
+ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
+ translate_pseudo_elements=False):
"""Translate a parsed selector to XPath.
@@ -247,458 +210,289 @@ def selector_to_xpath(
:raises:
:class:`ExpressionError` on unknown/unsupported selectors.
:returns:
- The equivalent XPath 1.0 expression as a string.
+ The equivalent XPath 1.0 expression as an Unicode string.
"""
- tree = getattr(selector, "parsed_tree", None)
+ tree = getattr(selector, 'parsed_tree', None)
if not tree:
- raise TypeError(f"Expected a parsed selector, got {selector!r}")
+ raise TypeError('Expected a parsed selector, got %r' % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if translate_pseudo_elements and selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
- return (prefix or "") + str(xpath)
+ return (prefix or '') + _unicode(xpath)
- def xpath_pseudo_element(
- self, xpath: XPathExpr, pseudo_element: PseudoElement
- ) -> XPathExpr:
+ def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
Defaults to not supporting pseudo-elements at all,
but can be overridden by sub-classes.
"""
- raise ExpressionError("Pseudo-elements are not supported.")
+ raise ExpressionError('Pseudo-elements are not supported.')
@staticmethod
- def xpath_literal(s: str) -> str:
- s = str(s)
+ def xpath_literal(s):
+ s = _unicode(s)
if "'" not in s:
- s = f"'{s}'"
+ s = "'%s'" % s
elif '"' not in s:
- s = f'"{s}"'
+ s = '"%s"' % s
else:
- parts_quoted = [
- f'"{part}"' if "'" in part else f"'{part}'"
- for part in split_at_single_quotes(s)
- if part
- ]
- s = "concat({})".format(",".join(parts_quoted))
+ s = "concat(%s)" % ','.join([
+ (("'" in part) and '"%s"' or "'%s'") % part
+ for part in split_at_single_quotes(s) if part
+ ])
return s
- def xpath(self, parsed_selector: Tree) -> XPathExpr:
+ def xpath(self, parsed_selector):
"""Translate any parsed selector object."""
type_name = type(parsed_selector).__name__
- method = cast(
- "Callable[[Tree], XPathExpr] | None",
- getattr(self, f"xpath_{type_name.lower()}", None),
- )
+ method = getattr(self, 'xpath_%s' % type_name.lower(), None)
if method is None:
- raise ExpressionError(f"{type_name} is not supported.")
+ raise ExpressionError('%s is not supported.' % type_name)
return method(parsed_selector)
+
# Dispatched by parsed object type
- def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
+ def xpath_combinedselector(self, combined):
"""Translate a combined selector."""
combinator = self.combinator_mapping[combined.combinator]
- method = cast(
- "Callable[[XPathExpr, XPathExpr], XPathExpr]",
- getattr(self, f"xpath_{combinator}_combinator"),
- )
- return method(self.xpath(combined.selector), self.xpath(combined.subselector))
+ method = getattr(self, 'xpath_%s_combinator' % combinator)
+ return method(self.xpath(combined.selector),
+ self.xpath(combined.subselector))
- def xpath_negation(self, negation: Negation) -> XPathExpr:
+ def xpath_negation(self, negation):
xpath = self.xpath(negation.selector)
sub_xpath = self.xpath(negation.subselector)
sub_xpath.add_name_test()
if sub_xpath.condition:
- return xpath.add_condition(f"not({sub_xpath.condition})")
- return xpath.add_condition("0")
-
- def xpath_relation(self, relation: Relation) -> XPathExpr:
- xpath = self.xpath(relation.selector)
- combinator = relation.combinator
- subselector = relation.subselector
- right = self.xpath(subselector.parsed_tree)
- method = cast(
- "Callable[[XPathExpr, XPathExpr], XPathExpr]",
- getattr(
- self,
- f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator",
- ),
- )
- return method(xpath, right)
-
- def xpath_matching(self, matching: Matching) -> XPathExpr:
- xpath = self.xpath(matching.selector)
- exprs = [self.xpath(selector) for selector in matching.selector_list]
- for e in exprs:
- e.add_name_test()
- if e.condition:
- xpath.add_condition(e.condition, "or")
- return xpath
-
- def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
- xpath = self.xpath(matching.selector)
- exprs = [self.xpath(selector) for selector in matching.selector_list]
- for e in exprs:
- e.add_name_test()
- if e.condition:
- xpath.add_condition(e.condition, "or")
- return xpath
+ return xpath.add_condition('not(%s)' % sub_xpath.condition)
+ else:
+ return xpath.add_condition('0')
- def xpath_function(self, function: Function) -> XPathExpr:
+ def xpath_function(self, function):
"""Translate a functional pseudo-class."""
- method_name = "xpath_{}_function".format(function.name.replace("-", "_"))
- method = cast(
- "Callable[[XPathExpr, Function], XPathExpr] | None",
- getattr(self, method_name, None),
- )
+ method = 'xpath_%s_function' % function.name.replace('-', '_')
+ method = _unicode_safe_getattr(self, method, None)
if not method:
- raise ExpressionError(f"The pseudo-class :{function.name}() is unknown")
+ raise ExpressionError(
+ "The pseudo-class :%s() is unknown" % function.name)
return method(self.xpath(function.selector), function)
- def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
+ def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
- method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_"))
- method = cast(
- "Callable[[XPathExpr], XPathExpr] | None",
- getattr(self, method_name, None),
- )
+ method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
+ method = _unicode_safe_getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
- raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown")
+ raise ExpressionError(
+ "The pseudo-class :%s is unknown" % pseudo.ident)
return method(self.xpath(pseudo.selector))
- def xpath_attrib(self, selector: Attrib) -> XPathExpr:
+
+ def xpath_attrib(self, selector):
"""Translate an attribute selector."""
operator = self.attribute_operator_mapping[selector.operator]
- method = cast(
- "Callable[[XPathExpr, str, str | None], XPathExpr]",
- getattr(self, f"xpath_attrib_{operator}"),
- )
+ method = getattr(self, 'xpath_attrib_%s' % operator)
if self.lower_case_attribute_names:
name = selector.attrib.lower()
else:
name = selector.attrib
safe = is_safe_name(name)
if selector.namespace:
- name = f"{selector.namespace}:{name}"
+ name = '%s:%s' % (selector.namespace, name)
safe = safe and is_safe_name(selector.namespace)
if safe:
- attrib = "@" + name
+ attrib = '@' + name
else:
- attrib = f"attribute::*[name() = {self.xpath_literal(name)}]"
- if selector.value is None:
- value = None
- elif self.lower_case_attribute_values:
- value = cast("str", selector.value.value).lower()
+ attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
+ if self.lower_case_attribute_values:
+ value = selector.value.lower()
else:
- value = selector.value.value
+ value = selector.value
return method(self.xpath(selector.selector), attrib, value)
- def xpath_class(self, class_selector: Class) -> XPathExpr:
+ def xpath_class(self, class_selector):
"""Translate a class selector."""
# .foo is defined as [class~=foo] in the spec.
xpath = self.xpath(class_selector.selector)
- return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
+ return self.xpath_attrib_includes(
+ xpath, '@class', class_selector.class_name)
- def xpath_hash(self, id_selector: Hash) -> XPathExpr:
+ def xpath_hash(self, id_selector):
"""Translate an ID selector."""
xpath = self.xpath(id_selector.selector)
- return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
+ return self.xpath_attrib_equals(xpath, '@id', id_selector.id)
- def xpath_element(self, selector: Element) -> XPathExpr:
+ def xpath_element(self, selector):
"""Translate a type or universal selector."""
element = selector.element
if not element:
- element = "*"
+ element = '*'
safe = True
else:
- safe = bool(is_safe_name(element))
+ safe = is_safe_name(element)
if self.lower_case_element_names:
element = element.lower()
if selector.namespace:
# Namespace prefixes are case-sensitive.
# http://www.w3.org/TR/css3-namespace/#prefixes
- element = f"{selector.namespace}:{element}"
- safe = safe and bool(is_safe_name(selector.namespace))
+ element = '%s:%s' % (selector.namespace, element)
+ safe = safe and is_safe_name(selector.namespace)
xpath = self.xpathexpr_cls(element=element)
if not safe:
xpath.add_name_test()
return xpath
+
# CombinedSelector: dispatch by combinator
- def xpath_descendant_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
+ def xpath_descendant_combinator(self, left, right):
"""right is a child, grand-child or further descendant of left"""
- return left.join("/descendant-or-self::*/", right)
+ return left.join('/descendant-or-self::*/', right)
- def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+ def xpath_child_combinator(self, left, right):
"""right is an immediate child of left"""
- return left.join("/", right)
+ return left.join('/', right)
- def xpath_direct_adjacent_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
+ def xpath_direct_adjacent_combinator(self, left, right):
"""right is a sibling immediately after left"""
- xpath = left.join("/following-sibling::", right)
+ xpath = left.join('/following-sibling::', right)
xpath.add_name_test()
- return xpath.add_condition("position() = 1")
+ return xpath.add_condition('position() = 1')
- def xpath_indirect_adjacent_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
+ def xpath_indirect_adjacent_combinator(self, left, right):
"""right is a sibling after left, immediately or not"""
- return left.join("/following-sibling::", right)
-
- def xpath_relation_descendant_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
- """right is a child, grand-child or further descendant of left; select left"""
- return left.join(
- "[descendant::", right, closing_combiner="]", has_inner_condition=True
- )
-
- def xpath_relation_child_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
- """right is an immediate child of left; select left"""
- return left.join("[./", right, closing_combiner="]")
-
- def xpath_relation_direct_adjacent_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
- """right is a sibling immediately after left; select left"""
- return left.add_condition(
- f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]"
- )
+ return left.join('/following-sibling::', right)
- def xpath_relation_indirect_adjacent_combinator(
- self, left: XPathExpr, right: XPathExpr
- ) -> XPathExpr:
- """right is a sibling after left, immediately or not; select left"""
- return left.join("[following-sibling::", right, closing_combiner="]")
# Function: dispatch by function/pseudo-class name
- def xpath_nth_child_function(
- self,
- xpath: XPathExpr,
- function: Function,
- last: bool = False,
- add_name_test: bool = True,
- ) -> XPathExpr:
+ def xpath_nth_child_function(self, xpath, function, last=False,
+ add_name_test=True):
try:
a, b = parse_series(function.arguments)
- except ValueError as ex:
- raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex
-
- # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
- #
- # :nth-child(an+b)
- # an+b-1 siblings before
- #
- # :nth-last-child(an+b)
- # an+b-1 siblings after
- #
- # :nth-of-type(an+b)
- # an+b-1 siblings with the same expanded element name before
- #
- # :nth-last-of-type(an+b)
- # an+b-1 siblings with the same expanded element name after
- #
- # So,
- # for :nth-child and :nth-of-type
- #
- # count(preceding-sibling::) = an+b-1
- #
- # for :nth-last-child and :nth-last-of-type
- #
- # count(following-sibling::) = an+b-1
- #
- # therefore,
- # count(...) - (b-1) ≡ 0 (mod a)
- #
- # if a == 0:
- # ~~~~~~~~~~
- # count(...) = b-1
- #
- # if a < 0:
- # ~~~~~~~~~
- # count(...) - b +1 <= 0
- # -> count(...) <= b-1
- #
- # if a > 0:
- # ~~~~~~~~~
- # count(...) - b +1 >= 0
- # -> count(...) >= b-1
-
- # work with b-1 instead
- b_min_1 = b - 1
-
- # early-exit condition 1:
- # ~~~~~~~~~~~~~~~~~~~~~~~
- # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
- # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
- # there is always an "n" matching any number of siblings (maybe none)
- if a == 1 and b_min_1 <= 0:
- return xpath
-
- # early-exit condition 2:
- # ~~~~~~~~~~~~~~~~~~~~~~~
- # an+b-1 siblings with a<0 and (b-1)<0 is not possible
- if a < 0 and b_min_1 < 0:
- return xpath.add_condition("0")
-
- # `add_name_test` boolean is inverted and somewhat counter-intuitive:
- #
- # nth_of_type() calls nth_child(add_name_test=False)
- nodetest = "*" if add_name_test else f"{xpath.element}"
-
- # count siblings before or after the element
- if not last:
- siblings_count = f"count(preceding-sibling::{nodetest})"
- else:
- siblings_count = f"count(following-sibling::{nodetest})"
-
- # special case of fixed position: nth-*(0n+b)
- # if a == 0:
- # ~~~~~~~~~~
- # count(***-sibling::***) = b-1
+ except ValueError:
+ raise ExpressionError("Invalid series: '%r'" % function.arguments)
+ if add_name_test:
+ xpath.add_name_test()
+ xpath.add_star_prefix()
if a == 0:
- return xpath.add_condition(f"{siblings_count} = {b_min_1}")
-
- expressions = []
-
- if a > 0:
- # siblings count, an+b-1, is always >= 0,
- # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
- # therefore, the predicate is only interesting if (b-1)>0
- if b_min_1 > 0:
- expressions.append(f"{siblings_count} >= {b_min_1}")
+ if last:
+ b = 'last() - %s' % b
+ return xpath.add_condition('position() = %s' % b)
+ if last:
+ # FIXME: I'm not sure if this is right
+ a = -a
+ b = -b
+ if b > 0:
+ b_neg = str(-b)
else:
- # if a<0, and (b-1)<0, no "n" satisfies this,
- # this is tested above as an early exist condition
- # otherwise,
- expressions.append(f"{siblings_count} <= {b_min_1}")
-
- # operations modulo 1 or -1 are simpler, one only needs to verify:
- #
- # - either:
- # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
- # i.e. count(***-sibling::***) >= (b-1)
- #
- # - or:
- # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
- # i.e. count(***-sibling::***) <= (b-1)
- # we we just did above.
- #
- if abs(a) != 1:
- # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
- left = siblings_count
-
- # apply "modulo a" on 2nd term, -(b-1),
- # to simplify things like "(... +6) % -3",
- # and also make it positive with |a|
- b_neg = (-b_min_1) % abs(a)
-
- if b_neg != 0:
- left = f"({left} +{b_neg})"
-
- expressions.append(f"{left} mod {a} = 0")
-
- template = "(%s)" if len(expressions) > 1 else "%s"
- xpath.add_condition(
- " and ".join(template % expression for expression in expressions)
- )
+ b_neg = '+%s' % (-b)
+ if a != 1:
+ expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
+ else:
+ expr = []
+ if b >= 0:
+ expr.append('position() >= %s' % b)
+ elif b < 0 and last:
+ expr.append('position() < (last() %s)' % b)
+ expr = ' and '.join(expr)
+ if expr:
+ xpath.add_condition(expr)
return xpath
-
- def xpath_nth_last_child_function(
- self, xpath: XPathExpr, function: Function
- ) -> XPathExpr:
+ # FIXME: handle an+b, odd, even
+ # an+b means every-a, plus b, e.g., 2n+1 means odd
+ # 0n+b means b
+ # n+0 means a=1, i.e., all elements
+ # an means every a elements, i.e., 2n means even
+ # -n means -1n
+ # -1n+6 means elements 6 and previous
+
+ def xpath_nth_last_child_function(self, xpath, function):
return self.xpath_nth_child_function(xpath, function, last=True)
- def xpath_nth_of_type_function(
- self, xpath: XPathExpr, function: Function
- ) -> XPathExpr:
- if xpath.element == "*":
- raise ExpressionError("*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(xpath, function, add_name_test=False)
-
- def xpath_nth_last_of_type_function(
- self, xpath: XPathExpr, function: Function
- ) -> XPathExpr:
- if xpath.element == "*":
- raise ExpressionError("*:nth-of-type() is not implemented")
- return self.xpath_nth_child_function(
- xpath, function, last=True, add_name_test=False
- )
+ def xpath_nth_of_type_function(self, xpath, function):
+ if xpath.element == '*':
+ raise ExpressionError(
+ "*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function,
+ add_name_test=False)
+
+ def xpath_nth_last_of_type_function(self, xpath, function):
+ if xpath.element == '*':
+ raise ExpressionError(
+ "*:nth-of-type() is not implemented")
+ return self.xpath_nth_child_function(xpath, function, last=True,
+ add_name_test=False)
- def xpath_contains_function(
- self, xpath: XPathExpr, function: Function
- ) -> XPathExpr:
+ def xpath_contains_function(self, xpath, function):
# Defined there, removed in later drafts:
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
- if function.argument_types() not in (["STRING"], ["IDENT"]):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError(
- f"Expected a single string or ident for :contains(), got {function.arguments!r}"
- )
- value = cast("str", function.arguments[0].value)
- return xpath.add_condition(f"contains(., {self.xpath_literal(value)})")
+ "Expected a single string or ident for :contains(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ 'contains(., %s)' % self.xpath_literal(value))
- def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
- if function.argument_types() not in (["STRING"], ["IDENT"]):
+ def xpath_lang_function(self, xpath, function):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError(
- f"Expected a single string or ident for :lang(), got {function.arguments!r}"
- )
- value = cast("str", function.arguments[0].value)
- return xpath.add_condition(f"lang({self.xpath_literal(value)})")
+ "Expected a single string or ident for :lang(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ "lang(%s)" % (self.xpath_literal(value)))
+
# Pseudo: dispatch by pseudo-class name
- def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+ def xpath_root_pseudo(self, xpath):
return xpath.add_condition("not(parent::*)")
- # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
- # Works only at the start of a selector
- # Needed to get immediate children of a processed selector in Scrapy
- # for product in response.css('.product'):
- # description = product.css(':scope > div::text').get()
- def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("1")
-
- def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("count(preceding-sibling::*) = 0")
+ def xpath_first_child_pseudo(self, xpath):
+ xpath.add_star_prefix()
+ xpath.add_name_test()
+ return xpath.add_condition('position() = 1')
- def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("count(following-sibling::*) = 0")
+ def xpath_last_child_pseudo(self, xpath):
+ xpath.add_star_prefix()
+ xpath.add_name_test()
+ return xpath.add_condition('position() = last()')
- def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- if xpath.element == "*":
- raise ExpressionError("*:first-of-type is not implemented")
- return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0")
+ def xpath_first_of_type_pseudo(self, xpath):
+ if xpath.element == '*':
+ raise ExpressionError(
+ "*:first-of-type is not implemented")
+ xpath.add_star_prefix()
+ return xpath.add_condition('position() = 1')
- def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- if xpath.element == "*":
- raise ExpressionError("*:last-of-type is not implemented")
- return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0")
+ def xpath_last_of_type_pseudo(self, xpath):
+ if xpath.element == '*':
+ raise ExpressionError(
+ "*:last-of-type is not implemented")
+ xpath.add_star_prefix()
+ return xpath.add_condition('position() = last()')
- def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("count(parent::*/child::*) = 1")
+ def xpath_only_child_pseudo(self, xpath):
+ xpath.add_name_test()
+ xpath.add_star_prefix()
+ return xpath.add_condition('last() = 1')
- def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- if xpath.element == "*":
- raise ExpressionError("*:only-of-type is not implemented")
- return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1")
+ def xpath_only_of_type_pseudo(self, xpath):
+ if xpath.element == '*':
+ raise ExpressionError(
+ "*:only-of-type is not implemented")
+ return xpath.add_condition('last() = 1')
- def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+ def xpath_empty_pseudo(self, xpath):
return xpath.add_condition("not(*) and not(string-length())")
- def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
+ def pseudo_never_matches(self, xpath):
"""Common implementation for pseudo-classes that never match."""
return xpath.add_condition("0")
@@ -714,88 +508,67 @@ def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
# Attrib: dispatch by attribute operator
- def xpath_attrib_exists(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
+ def xpath_attrib_exists(self, xpath, name, value):
assert not value
xpath.add_condition(name)
return xpath
- def xpath_attrib_equals(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
- assert value is not None
- xpath.add_condition(f"{name} = {self.xpath_literal(value)}")
+ def xpath_attrib_equals(self, xpath, name, value):
+ xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
return xpath
- def xpath_attrib_different(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
- assert value is not None
+ def xpath_attrib_different(self, xpath, name, value):
# FIXME: this seems like a weird hack...
if value:
- xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}")
+ xpath.add_condition('not(%s) or %s != %s'
+ % (name, name, self.xpath_literal(value)))
else:
- xpath.add_condition(f"{name} != {self.xpath_literal(value)}")
+ xpath.add_condition('%s != %s'
+ % (name, self.xpath_literal(value)))
return xpath
- def xpath_attrib_includes(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
- if value and is_non_whitespace(value):
- arg = self.xpath_literal(" " + value + " ")
+ def xpath_attrib_includes(self, xpath, name, value):
+ if is_non_whitespace(value):
xpath.add_condition(
- f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})"
- )
+ "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
+ % (name, name, self.xpath_literal(' '+value+' ')))
else:
- xpath.add_condition("0")
+ xpath.add_condition('0')
return xpath
- def xpath_attrib_dashmatch(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
- assert value is not None
- arg = self.xpath_literal(value)
- arg_dash = self.xpath_literal(value + "-")
+ def xpath_attrib_dashmatch(self, xpath, name, value):
# Weird, but true...
- xpath.add_condition(
- f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))"
- )
+ xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
+ name,
+ name, self.xpath_literal(value),
+ name, self.xpath_literal(value + '-')))
return xpath
- def xpath_attrib_prefixmatch(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
+ def xpath_attrib_prefixmatch(self, xpath, name, value):
if value:
- xpath.add_condition(
- f"{name} and starts-with({name}, {self.xpath_literal(value)})"
- )
+ xpath.add_condition('%s and starts-with(%s, %s)' % (
+ name, name, self.xpath_literal(value)))
else:
- xpath.add_condition("0")
+ xpath.add_condition('0')
return xpath
- def xpath_attrib_suffixmatch(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
+ def xpath_attrib_suffixmatch(self, xpath, name, value):
if value:
# Oddly there is a starts-with in XPath 1.0, but not ends-with
xpath.add_condition(
- f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}"
- )
+ '%s and substring(%s, string-length(%s)-%s) = %s'
+ % (name, name, name, len(value)-1, self.xpath_literal(value)))
else:
- xpath.add_condition("0")
+ xpath.add_condition('0')
return xpath
- def xpath_attrib_substringmatch(
- self, xpath: XPathExpr, name: str, value: str | None
- ) -> XPathExpr:
+ def xpath_attrib_substringmatch(self, xpath, name, value):
if value:
# Attribute selectors are case sensitive
- xpath.add_condition(
- f"{name} and contains({name}, {self.xpath_literal(value)})"
- )
+ xpath.add_condition('%s and contains(%s, %s)' % (
+ name, name, self.xpath_literal(value)))
else:
- xpath.add_condition("0")
+ xpath.add_condition('0')
return xpath
@@ -816,52 +589,47 @@ class HTMLTranslator(GenericTranslator):
"""
- lang_attribute = "lang"
+ lang_attribute = 'lang'
- def __init__(self, xhtml: bool = False) -> None:
+ def __init__(self, xhtml=False):
self.xhtml = xhtml # Might be useful for sub-classes?
if not xhtml:
# See their definition in GenericTranslator.
self.lower_case_element_names = True
self.lower_case_attribute_names = True
- def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+ def xpath_checked_pseudo(self, xpath):
# FIXME: is this really all the elements?
return xpath.add_condition(
"(@selected and name(.) = 'option') or "
"(@checked "
- "and (name(.) = 'input' or name(.) = 'command')"
- "and (@type = 'checkbox' or @type = 'radio'))"
- )
+ "and (name(.) = 'input' or name(.) = 'command')"
+ "and (@type = 'checkbox' or @type = 'radio'))")
- def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
- if function.argument_types() not in (["STRING"], ["IDENT"]):
+ def xpath_lang_function(self, xpath, function):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError(
- f"Expected a single string or ident for :lang(), got {function.arguments!r}"
- )
+ "Expected a single string or ident for :lang(), got %r"
+ % function.arguments)
value = function.arguments[0].value
- assert value
- arg = self.xpath_literal(value.lower() + "-")
return xpath.add_condition(
"ancestor-or-self::*[@lang][1][starts-with(concat("
- # XPath 1.0 has no lower-case function...
- f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
- "'abcdefghijklmnopqrstuvwxyz'), "
- f"'-'), {arg})]"
- )
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]"
+ % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
- def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition(
- "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
- )
+ def xpath_link_pseudo(self, xpath):
+ return xpath.add_condition("@href and "
+ "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
# Links are never visited, the implementation for :visited is the same
# as in GenericTranslator
- def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+ def xpath_disabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition(
- """
+ return xpath.add_condition('''
(
@disabled and
(
@@ -883,15 +651,13 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:
)
and ancestor::fieldset[@disabled]
)
- """
- )
+ ''')
# FIXME: in the second half, add "and is not a descendant of that
# fieldset element's first legend element child, if any."
- def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+ def xpath_enabled_pseudo(self, xpath):
# http://www.w3.org/TR/html5/section-index.html#attributes-1
- return xpath.add_condition(
- """
+ return xpath.add_condition('''
(
@href and (
name(.) = 'a' or
@@ -919,8 +685,7 @@ def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:
@disabled or ancestor::optgroup[@disabled]
)
)
- """
- )
+ ''')
# FIXME: ... or "li elements that are children of menu elements,
# and that have a child element that defines a command, if the first
# such element's Disabled State facet is false (not disabled)".
diff --git a/docs/conf.py b/docs/conf.py
index da3f023..22e6032 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
#
# cssselect documentation build configuration file, created by
# sphinx-quickstart on Tue Mar 27 14:20:34 2012.
@@ -11,210 +12,217 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
-import re
-from pathlib import Path
+import sys, os, re
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-# sys.path.insert(0, os.path.abspath('.'))
+#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
+#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.doctest"]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
+ 'sphinx.ext.doctest']
# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
+templates_path = ['_templates']
# The suffix of source filenames.
-source_suffix = {".rst": "restructuredtext"}
+source_suffix = '.rst'
# The encoding of source files.
-# source_encoding = 'utf-8-sig'
+#source_encoding = 'utf-8-sig'
# The master toctree document.
-master_doc = "index"
+master_doc = 'index'
# General information about the project.
-project = "cssselect"
-project_copyright = "2012-2017, Simon Sapin, Scrapy developers"
+project = 'cssselect'
+copyright = '2012, Simon Sapin'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version, including alpha/beta/rc tags.
-init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text()
-release = re.search('VERSION = "([^"]+)"', init_py).group(1)
+init_py = open(os.path.join(os.path.dirname(__file__),
+ '..', 'cssselect', '__init__.py')).read()
+release = re.search("VERSION = '([^']+)'", init_py).group(1)
# The short X.Y version.
-version = release.rstrip("dev")
+version = release.rstrip('dev')
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
-# language = None
+#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-# today = ''
+#today = ''
# Else, today_fmt is used as the format for a strftime call.
-# today_fmt = '%B %d, %Y'
+#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
-exclude_patterns = ["_build"]
+exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
-# default_role = None
+#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-# add_function_parentheses = True
+#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-# add_module_names = True
+#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-# show_authors = False
+#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = "sphinx"
+pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
+#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = "sphinx_rtd_theme"
+#html_theme = 'agogo'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
-# html_theme_options = {}
+#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
+#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
-# html_title = None
+#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
-# html_short_title = None
+#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-# html_logo = None
+#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
-# html_favicon = None
+#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ['_static']
+#html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
-# html_last_updated_fmt = '%b %d, %Y'
+#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
-# html_use_smartypants = True
+#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
-# html_sidebars = {}
+#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
-# html_additional_pages = {}
+#html_additional_pages = {}
# If false, no module index is generated.
-# html_domain_indices = True
+#html_domain_indices = True
# If false, no index is generated.
-# html_use_index = True
+#html_use_index = True
# If true, the index is split into individual pages for each letter.
-# html_split_index = False
+#html_split_index = False
# If true, links to the reST sources are added to the pages.
-# html_show_sourcelink = True
+#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-# html_show_sphinx = True
+#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-# html_show_copyright = True
+#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
-# html_use_opensearch = ''
+#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
+#html_file_suffix = None
# Output file base name for HTML help builder.
-htmlhelp_basename = "cssselectdoc"
+htmlhelp_basename = 'cssselectdoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- #'papersize': 'letterpaper',
- # The font size ('10pt', '11pt' or '12pt').
- #'pointsize': '10pt',
- # Additional stuff for the LaTeX preamble.
- #'preamble': '',
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ("index", "cssselect.tex", "cssselect Documentation", "Simon Sapin", "manual"),
+ ('index', 'cssselect.tex', 'cssselect Documentation',
+ 'Simon Sapin', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-# latex_logo = None
+#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-# latex_use_parts = False
+#latex_use_parts = False
# If true, show page references after internal links.
-# latex_show_pagerefs = False
+#latex_show_pagerefs = False
# If true, show URL addresses after external links.
-# latex_show_urls = False
+#latex_show_urls = False
# Documents to append as an appendix to all manuals.
-# latex_appendices = []
+#latex_appendices = []
# If false, no module index is generated.
-# latex_domain_indices = True
+#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [("index", "cssselect", "cssselect Documentation", ["Simon Sapin"], 1)]
+man_pages = [
+ ('index', 'cssselect', 'cssselect Documentation',
+ ['Simon Sapin'], 1)
+]
# If true, show URL addresses after external links.
-# man_show_urls = False
+#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
@@ -223,35 +231,20 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- (
- "index",
- "cssselect",
- "cssselect Documentation",
- "Simon Sapin",
- "cssselect",
- "One line description of project.",
- "Miscellaneous",
- ),
+ ('index', 'cssselect', 'cssselect Documentation',
+ 'Simon Sapin', 'cssselect', 'One line description of project.',
+ 'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
-# texinfo_appendices = []
+#texinfo_appendices = []
# If false, no module index is generated.
-# texinfo_domain_indices = True
+#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
-# texinfo_show_urls = 'footnote'
+#texinfo_show_urls = 'footnote'
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {"python": ("https://docs.python.org/3", None)}
-
-
-# --- Nitpicking options ------------------------------------------------------
-
-nitpicky = True
-nitpick_ignore = [
- # explicitly not a part of the public API
- ("py:class", "Token"),
-]
+intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/docs/conftest.py b/docs/conftest.py
deleted file mode 100644
index a71d108..0000000
--- a/docs/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
-
-from sybil import Sybil
-from sybil.parsers.doctest import DocTestParser
-from sybil.parsers.skip import skip
-
-try:
- # sybil 3.0.0+
- from sybil.parsers.codeblock import PythonCodeBlockParser
-except ImportError:
- from sybil.parsers.codeblock import CodeBlockParser as PythonCodeBlockParser
-
-
-pytest_collect_file = Sybil(
- parsers=[
- DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
- PythonCodeBlockParser(future_imports=["print_function"]),
- skip,
- ],
- pattern="*.rst",
-).pytest()
diff --git a/docs/index.rst b/docs/index.rst
index a024f20..4ac7401 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -99,24 +99,12 @@ These applicable pseudo-classes are not yet implemented:
you specify an element type, but not with ``*``
On the other hand, *cssselect* supports some selectors that are not
-in the Level 3 specification.
-
-These parts of the Level 4 specification are supported (note that a large part
-of the Level 4 additions is not applicable to cssselect similarly to ``:hover``
-or not representable in XPath 1.0 so the complete specification is unlikely to
-be implemented):
-
-* The ``:scope`` pseudo-class. Limitation: it can only be used at a start of a
- selector.
-* The ``:is()``, ``:where()`` and ``:has()`` pseudo-classes. Limitation:
- ``:has()`` cannot contain nested ``:has()`` or ``:not()``.
-
-These are non-standard extensions:
+in the Level 3 specification:
* The ``:contains(text)`` pseudo-class that existed in `an early draft`_
but was then removed.
* The ``!=`` attribute operator. ``[foo!=bar]`` is the same as
- ``:not([foo=bar])``.
+ ``:not([foo=bar])``
* ``:not()`` accepts a *sequence of simple selectors*, not just single
*simple selector*. For example, ``:not(a.important[rel])`` is allowed,
even though the negation contains 3 *simple selectors*.
@@ -151,7 +139,7 @@ and their signature. You can look at the `source code`_ to see how it works.
However, be aware that this API is not very stable yet. It might change
and break your sub-class.
-.. _source code: https://github.com/scrapy/cssselect/blob/master/cssselect/xpath.py
+.. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py
Namespaces
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index 21cb2eb..0000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sphinx==8.2.3
-sphinx-rtd-theme==3.0.2
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index c7c54a0..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,239 +0,0 @@
-[build-system]
-build-backend = "hatchling.build"
-requires = ["hatchling>=1.27.0"]
-
-[project]
-name = "cssselect"
-license = "BSD-3-Clause"
-license-files = ["LICENSE", "AUTHORS"]
-description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
-readme = "README.rst"
-authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }]
-maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }]
-requires-python = ">=3.10"
-classifiers = [
- "Development Status :: 4 - Beta",
- "Intended Audience :: Developers",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- "Programming Language :: Python :: 3.13",
- "Programming Language :: Python :: 3.14",
- "Programming Language :: Python :: Implementation :: CPython",
- "Programming Language :: Python :: Implementation :: PyPy",
-]
-dynamic = ["version"]
-
-[project.urls]
-"Homepage" = "https://github.com/scrapy/cssselect"
-
-[tool.hatch.version]
-path = "cssselect/__init__.py"
-
-[tool.hatch.build.targets.sdist]
-include = [
- "/cssselect",
- "/docs",
- "/tests",
- "/CHANGES",
- "/README.rst",
- "/tox.ini",
-]
-exclude = [
- "/docs/_build",
-]
-
-[tool.hatch.build.targets.wheel]
-packages = ["cssselect"]
-
-[tool.bumpversion]
-current_version = "1.4.0"
-commit = true
-tag = true
-
-[[tool.bumpversion.files]]
-filename = "cssselect/__init__.py"
-
-[[tool.bumpversion.files]]
-filename = "CHANGES"
-search = "^Unreleased\\.$"
-replace = "Released on {now:%Y-%m-%d}."
-regex = true
-
-[tool.coverage.run]
-branch = true
-source = ["cssselect"]
-
-[tool.coverage.report]
-exclude_also = [
- "def __repr__",
- "if sys.version_info",
- "if __name__ == '__main__':",
-]
-
-[tool.mypy]
-strict = true
-
-[tool.pylint.MASTER]
-persistent = "no"
-extension-pkg-allow-list = ["lxml"]
-
-[tool.pylint."MESSAGES CONTROL"]
-enable = [
- "useless-suppression",
-]
-disable = [
- "consider-using-f-string",
- "fixme",
- "invalid-name",
- "line-too-long",
- "missing-class-docstring",
- "missing-function-docstring",
- "missing-module-docstring",
- "no-member",
- "not-callable",
- "redefined-builtin",
- "redefined-outer-name",
- "too-few-public-methods",
- "too-many-arguments",
- "too-many-branches",
- "too-many-function-args",
- "too-many-lines",
- "too-many-locals",
- "too-many-positional-arguments",
- "too-many-public-methods",
- "too-many-statements",
- "unused-argument",
-]
-
-[tool.pytest.ini_options]
-testpaths = ["tests"]
-
-[tool.ruff.lint]
-extend-select = [
- # flake8-builtins
- "A",
- # flake8-async
- "ASYNC",
- # flake8-bugbear
- "B",
- # flake8-comprehensions
- "C4",
- # flake8-commas
- "COM",
- # pydocstyle
- "D",
- # flake8-future-annotations
- "FA",
- # flynt
- "FLY",
- # refurb
- "FURB",
- # isort
- "I",
- # flake8-implicit-str-concat
- "ISC",
- # flake8-logging
- "LOG",
- # Perflint
- "PERF",
- # pygrep-hooks
- "PGH",
- # flake8-pie
- "PIE",
- # pylint
- "PL",
- # flake8-pytest-style
- "PT",
- # flake8-use-pathlib
- "PTH",
- # flake8-pyi
- "PYI",
- # flake8-quotes
- "Q",
- # flake8-return
- "RET",
- # flake8-raise
- "RSE",
- # Ruff-specific rules
- "RUF",
- # flake8-bandit
- "S",
- # flake8-simplify
- "SIM",
- # flake8-slots
- "SLOT",
- # flake8-debugger
- "T10",
- # flake8-type-checking
- "TC",
- # pyupgrade
- "UP",
- # pycodestyle warnings
- "W",
- # flake8-2020
- "YTT",
-]
-ignore = [
- # Trailing comma missing
- "COM812",
- # Missing docstring in public module
- "D100",
- # Missing docstring in public class
- "D101",
- # Missing docstring in public method
- "D102",
- # Missing docstring in public function
- "D103",
- # Missing docstring in public package
- "D104",
- # Missing docstring in magic method
- "D105",
- # Missing docstring in public nested class
- "D106",
- # Missing docstring in __init__
- "D107",
- # One-line docstring should fit on one line with quotes
- "D200",
- # No blank lines allowed after function docstring
- "D202",
- # 1 blank line required between summary line and description
- "D205",
- # Multi-line docstring closing quotes should be on a separate line
- "D209",
- # First line should end with a period
- "D400",
- # First line should be in imperative mood; try rephrasing
- "D401",
- # First line should not be the function's "signature"
- "D402",
- # First word of the first line should be properly capitalized
- "D403",
- # Too many return statements
- "PLR0911",
- # Too many branches
- "PLR0912",
- # Too many arguments in function definition
- "PLR0913",
- # Too many statements
- "PLR0915",
- # Magic value used in comparison
- "PLR2004",
- # String contains ambiguous {}.
- "RUF001",
- # Docstring contains ambiguous {}.
- "RUF002",
- # Comment contains ambiguous {}.
- "RUF003",
- # Mutable class attributes should be annotated with `typing.ClassVar`
- "RUF012",
- # Use of `assert` detected
- "S101",
-]
-
-[tool.ruff.lint.isort]
-split-on-trailing-comma = false
-
-[tool.ruff.lint.pydocstyle]
-convention = "pep257"
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..ccddf11
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,10 @@
+[build_sphinx]
+source-dir = docs
+build-dir = docs/_build
+#all_files = 1
+
+[upload_sphinx] # Sphinx-PyPI-upload
+upload-dir = docs/_build/html
+
+[pytest]
+python_files=tests.py
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..bd1e385
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,44 @@
+# coding: utf8
+
+import re
+import os.path
+try:
+ from setuptools import setup
+ extra_kwargs = {'test_suite': 'cssselect.tests'}
+except ImportError:
+ from distutils.core import setup
+ extra_kwargs = {}
+
+
+ROOT = os.path.dirname(__file__)
+README = open(os.path.join(ROOT, 'README.rst')).read()
+INIT_PY = open(os.path.join(ROOT, 'cssselect', '__init__.py')).read()
+VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1)
+
+
+setup(
+ name='cssselect',
+ version=VERSION,
+ author='Ian Bicking',
+ author_email='ianb@colorstudy.com',
+ maintainer='Simon Sapin',
+ maintainer_email='simon.sapin@exyr.org',
+ description=
+ 'cssselect parses CSS3 Selectors and translates them to XPath 1.0',
+ long_description=README,
+ url='http://packages.python.org/cssselect/',
+ license='BSD',
+ packages=['cssselect'],
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: BSD License',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.5',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.2',
+ ],
+ **extra_kwargs
+)
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py
deleted file mode 100644
index dc67bb7..0000000
--- a/tests/test_cssselect.py
+++ /dev/null
@@ -1,1540 +0,0 @@
-#!/usr/bin/env python
-"""
-Tests for cssselect
-===================
-
-These tests can be run either by py.test or by the standard library's
-unittest. They use plain ``assert`` statements and do little reporting
-themselves in case of failure.
-
-Use py.test to get fancy error reporting and assert introspection.
-
-
-:copyright: (c) 2007-2012 Ian Bicking and contributors.
-See AUTHORS for more details.
-:license: BSD, see LICENSE for more details.
-
-"""
-
-from __future__ import annotations
-
-import sys
-import typing
-import unittest
-from typing import TYPE_CHECKING
-
-import pytest
-from lxml import etree, html
-
-from cssselect import (
- ExpressionError,
- GenericTranslator,
- HTMLTranslator,
- SelectorSyntaxError,
- parse,
-)
-from cssselect.parser import (
- Function,
- FunctionalPseudoElement,
- PseudoElement,
- Token,
- parse_series,
- tokenize,
-)
-from cssselect.xpath import XPathExpr
-
-if TYPE_CHECKING:
- from collections.abc import Sequence
-
-
-class TestCssselect(unittest.TestCase):
- def test_tokenizer(self) -> None:
- tokens = [
- str(item)
- for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')
- ]
- assert tokens == [
- "",
- "",
- "' at 5>",
- "",
- # the no-break space is not whitespace in CSS
- "", # f\xa0
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- ]
-
- def test_parser(self) -> None:
- def repr_parse(css: str) -> list[str]:
- selectors = parse(css)
- for selector in selectors:
- assert selector.pseudo_element is None
- return [repr(selector.parsed_tree) for selector in selectors]
-
- def parse_many(first: str, *others: str) -> list[str]:
- result = repr_parse(first)
- for other in others:
- assert repr_parse(other) == result
- return result
-
- assert parse_many("*") == ["Element[*]"]
- assert parse_many("*|*") == ["Element[*]"]
- assert parse_many("*|foo") == ["Element[foo]"]
- assert parse_many("|foo") == ["Element[foo]"]
- assert parse_many("foo|*") == ["Element[foo|*]"]
- assert parse_many("foo|bar") == ["Element[foo|bar]"]
- # This will never match, but it is valid:
- assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"]
- assert parse_many(
- "div>.foo",
- "div> .foo",
- "div >.foo",
- "div > .foo",
- "div \n> \t \t .foo",
- "div\r>\n\n\n.foo",
- "div\f>\f.foo",
- ) == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"]
- assert parse_many(
- "td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar"
- ) == [
- "Class[Element[td].foo]",
- "Class[Element[*].bar]",
- ]
- assert parse_many("div, td.foo, div.bar span") == [
- "Element[div]",
- "Class[Element[td].foo]",
- "CombinedSelector[Class[Element[div].bar] Element[span]]",
- ]
- assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"]
- assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
- assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
- assert parse_many("td :first") == [
- "CombinedSelector[Element[td] Pseudo[Element[*]:first]]"
- ]
- assert parse_many("td :first") == [
- "CombinedSelector[Element[td] Pseudo[Element[*]:first]]"
- ]
- assert parse_many("a[name]", "a[ name\t]") == ["Attrib[Element[a][name]]"]
- assert parse_many("a [name]") == [
- "CombinedSelector[Element[a] Attrib[Element[*][name]]]"
- ]
- assert parse_many('a[rel="include"]', "a[rel = include]") == [
- "Attrib[Element[a][rel = 'include']]"
- ]
- assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
- "Attrib[Element[a][hreflang |= 'en']]"
- ]
- assert parse_many("div:nth-child(10)") == [
- "Function[Element[div]:nth-child(['10'])]"
- ]
- assert parse_many(":nth-child(2n+2)") == [
- "Function[Element[*]:nth-child(['2', 'n', '+2'])]"
- ]
- assert parse_many("div:nth-of-type(10)") == [
- "Function[Element[div]:nth-of-type(['10'])]"
- ]
- assert parse_many("div div:nth-of-type(10) .aclass") == [
- "CombinedSelector[CombinedSelector[Element[div] "
- "Function[Element[div]:nth-of-type(['10'])]] "
- " Class[Element[*].aclass]]"
- ]
- assert parse_many("label:only") == ["Pseudo[Element[label]:only]"]
- assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"]
- assert parse_many('div:contains("foo")') == [
- "Function[Element[div]:contains(['foo'])]"
- ]
- assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"]
- assert parse_many("div:not(div.foo)") == [
- "Negation[Element[div]:not(Class[Element[div].foo])]"
- ]
- assert parse_many("div:has(div.foo)") == [
- "Relation[Element[div]:has(Selector[Class[Element[div].foo]])]"
- ]
- assert parse_many("div:is(.foo, #bar)") == [
- "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]"
- ]
- assert parse_many(":is(:hover, :visited)") == [
- "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]"
- ]
- assert parse_many(":where(:hover, :visited)") == [
- "SpecificityAdjustment[Element[*]:where(Pseudo[Element[*]:hover],"
- " Pseudo[Element[*]:visited])]"
- ]
- assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"]
- assert parse_many(":scope > foo") == [
- "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
- ]
- assert parse_many(" :scope > foo") == [
- "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
- ]
- assert parse_many(":scope > foo bar > div") == [
- "CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
- "Element[foo]] Element[bar]] > Element[div]]"
- ]
- assert parse_many(":scope > #foo #bar") == [
- "CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
- "Hash[Element[*]#foo]] Hash[Element[*]#bar]]"
- ]
-
- def test_pseudo_elements(self) -> None:
- def parse_pseudo(css: str) -> list[tuple[str, str | None]]:
- result: list[tuple[str, str | None]] = []
- for selector in parse(css):
- pseudo = selector.pseudo_element
- pseudo = str(pseudo) if pseudo else pseudo
- # No Symbol here
- assert pseudo is None or isinstance(pseudo, str)
- selector_as_str = repr(selector.parsed_tree)
- result.append((selector_as_str, pseudo))
- return result
-
- def parse_one(css: str) -> tuple[str, str | None]:
- result = parse_pseudo(css)
- assert len(result) == 1
- return result[0]
-
- def test_pseudo_repr(css: str) -> str:
- result = parse(css)
- assert len(result) == 1
- selector = result[0]
- return repr(selector.parsed_tree)
-
- assert parse_one("foo") == ("Element[foo]", None)
- assert parse_one("*") == ("Element[*]", None)
- assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None)
- assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None)
-
- # Special cases for CSS 2.1 pseudo-elements
- assert parse_one(":BEfore") == ("Element[*]", "before")
- assert parse_one(":aftER") == ("Element[*]", "after")
- assert parse_one(":First-Line") == ("Element[*]", "first-line")
- assert parse_one(":First-Letter") == ("Element[*]", "first-letter")
-
- assert parse_one("::befoRE") == ("Element[*]", "before")
- assert parse_one("::AFter") == ("Element[*]", "after")
- assert parse_one("::firsT-linE") == ("Element[*]", "first-line")
- assert parse_one("::firsT-letteR") == ("Element[*]", "first-letter")
-
- assert parse_one("::text-content") == ("Element[*]", "text-content")
- assert parse_one("::attr(name)") == (
- "Element[*]",
- "FunctionalPseudoElement[::attr(['name'])]",
- )
-
- assert parse_one("::Selection") == ("Element[*]", "selection")
- assert parse_one("foo:after") == ("Element[foo]", "after")
- assert parse_one("foo::selection") == ("Element[foo]", "selection")
- assert parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection") == (
- "CombinedSelector[Hash[Element[lorem]#ipsum] ~ "
- "Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]",
- "selection",
- )
- assert parse_pseudo(":scope > div, foo bar") == [
- ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
- ("CombinedSelector[Element[foo] Element[bar]]", None),
- ]
- assert parse_pseudo("foo bar, :scope > div") == [
- ("CombinedSelector[Element[foo] Element[bar]]", None),
- ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
- ]
- assert parse_pseudo("foo bar,:scope > div") == [
- ("CombinedSelector[Element[foo] Element[bar]]", None),
- ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
- ]
- assert parse_pseudo("foo:before, bar, baz:after") == [
- ("Element[foo]", "before"),
- ("Element[bar]", None),
- ("Element[baz]", "after"),
- ]
-
- # Special cases for CSS 2.1 pseudo-elements are ignored by default
- for pseudo in ("after", "before", "first-line", "first-letter"):
- (selector,) = parse(f"e:{pseudo}")
- assert selector.pseudo_element == pseudo
- assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e"
-
- # Pseudo Elements are ignored by default, but if allowed they are not
- # supported by GenericTranslator
- tr = GenericTranslator()
- (selector,) = parse("e::foo")
- assert selector.pseudo_element == "foo"
- assert tr.selector_to_xpath(selector, prefix="") == "e"
- with pytest.raises(ExpressionError):
- tr.selector_to_xpath(selector, translate_pseudo_elements=True)
-
- # Special test for the unicode symbols and ':scope' element if check
- # Errors if use repr() instead of __repr__()
- assert test_pseudo_repr(":fİrst-child") == "Pseudo[Element[*]:fİrst-child]"
- assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]"
-
- def test_specificity(self) -> None:
- def specificity(css: str) -> tuple[int, int, int]:
- selectors = parse(css)
- assert len(selectors) == 1
- return selectors[0].specificity()
-
- assert specificity("*") == (0, 0, 0)
- assert specificity(" foo") == (0, 0, 1)
- assert specificity(":empty ") == (0, 1, 0)
- assert specificity(":before") == (0, 0, 1)
- assert specificity("*:before") == (0, 0, 1)
- assert specificity(":nth-child(2)") == (0, 1, 0)
- assert specificity(".bar") == (0, 1, 0)
- assert specificity("[baz]") == (0, 1, 0)
- assert specificity('[baz="4"]') == (0, 1, 0)
- assert specificity('[baz^="4"]') == (0, 1, 0)
- assert specificity("#lipsum") == (1, 0, 0)
- assert specificity("::attr(name)") == (0, 0, 1)
-
- assert specificity(":not(*)") == (0, 0, 0)
- assert specificity(":not(foo)") == (0, 0, 1)
- assert specificity(":not(.foo)") == (0, 1, 0)
- assert specificity(":not([foo])") == (0, 1, 0)
- assert specificity(":not(:empty)") == (0, 1, 0)
- assert specificity(":not(#foo)") == (1, 0, 0)
-
- assert specificity(":has(*)") == (0, 0, 0)
- assert specificity(":has(foo)") == (0, 0, 1)
- assert specificity(":has(.foo)") == (0, 1, 0)
- assert specificity(":has(> foo)") == (0, 0, 1)
-
- assert specificity(":is(.foo, #bar)") == (1, 0, 0)
- assert specificity(":is(:hover, :visited)") == (0, 1, 0)
- assert specificity(":where(:hover, :visited)") == (0, 0, 0)
-
- assert specificity("foo:empty") == (0, 1, 1)
- assert specificity("foo:before") == (0, 0, 2)
- assert specificity("foo::before") == (0, 0, 2)
- assert specificity("foo:empty::before") == (0, 1, 2)
-
- assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (
- 2,
- 1,
- 3,
- )
-
- def test_css_export(self) -> None:
- def css2css(css: str, res: str | None = None) -> None:
- selectors = parse(css)
- assert len(selectors) == 1
- assert selectors[0].canonical() == (res or css)
-
- css2css("*")
- css2css(" foo", "foo")
- css2css("Foo", "Foo")
- css2css(":empty ", ":empty")
- css2css(":before", "::before")
- css2css(":beFOre", "::before")
- css2css("*:before", "::before")
- css2css(":nth-child(2)")
- css2css(".bar")
- css2css("[baz]")
- css2css('[baz="4"]', "[baz='4']")
- css2css('[baz^="4"]', "[baz^='4']")
- css2css("[ns|attr='4']")
- css2css("#lipsum")
- css2css(":not(*)")
- css2css(":not(foo)")
- css2css(":not(*.foo)", ":not(.foo)")
- css2css(":not(*[foo])", ":not([foo])")
- css2css(":not(:empty)")
- css2css(":not(#foo)")
- css2css(":has(*)")
- css2css(":has(foo)")
- css2css(":has(*.foo)", ":has(.foo)")
- css2css(":is(#bar, .foo)")
- css2css(":is(:focused, :visited)")
- css2css(":where(:focused, :visited)")
- css2css("foo:empty")
- css2css("foo::before")
- css2css("foo:empty::before")
- css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)")
- css2css("#lorem + foo#ipsum:first-child > bar::first-line")
- css2css("foo > *")
-
- def test_parse_errors(self) -> None:
- def get_error(css: str) -> str | None:
- try:
- parse(css)
- except SelectorSyntaxError:
- return str(sys.exc_info()[1])
- return None
-
- assert get_error("attributes(href)/html/body/a") == (
- "Expected selector, got "
- )
- assert get_error("attributes(href)") == (
- "Expected selector, got "
- )
- assert get_error("html/body/a") == ("Expected selector, got ")
- assert get_error(" ") == ("Expected selector, got ")
- assert get_error("div, ") == ("Expected selector, got ")
- assert get_error(" , div") == ("Expected selector, got ")
- assert get_error("p, , div") == ("Expected selector, got ")
- assert get_error("div > ") == ("Expected selector, got ")
- assert get_error(" > div") == ("Expected selector, got ' at 2>")
- assert get_error("foo|#bar") == ("Expected ident or '*', got ")
- assert get_error("#.foo") == ("Expected selector, got ")
- assert get_error(".#foo") == ("Expected ident, got ")
- assert get_error(":#foo") == ("Expected ident, got ")
- assert get_error("[*]") == ("Expected '|', got ")
- assert get_error("[foo|]") == ("Expected ident, got ")
- assert get_error("[#]") == ("Expected ident or '*', got ")
- assert get_error("[foo=#]") == (
- "Expected string or ident, got "
- )
- assert get_error("[href]a") == ("Expected selector, got ")
- assert get_error("[rel=stylesheet]") is None
- assert get_error("[rel:stylesheet]") == (
- "Operator expected, got "
- )
- assert get_error("[rel=stylesheet") == ("Expected ']', got ")
- assert get_error(":lang(fr)") is None
- assert get_error(":lang(fr") == ("Expected an argument, got ")
- assert get_error(':contains("foo') == ("Unclosed string at 10")
- assert get_error("foo!") == ("Expected selector, got ")
-
- # Mis-placed pseudo-elements
- assert get_error("a:before:empty") == (
- "Got pseudo-element ::before not at the end of a selector"
- )
- assert get_error("li:before a") == (
- "Got pseudo-element ::before not at the end of a selector"
- )
- assert get_error(":not(:before)") == (
- "Got pseudo-element ::before inside :not() at 12"
- )
- assert get_error(":not(:not(a))") == ("Got nested :not()")
- assert get_error(":is(:before)") == (
- "Got pseudo-element ::before inside function"
- )
- assert get_error(":is(a b)") == ("Expected an argument, got ")
- assert get_error(":where(:before)") == (
- "Got pseudo-element ::before inside function"
- )
- assert get_error(":where(a b)") == (
- "Expected an argument, got "
- )
- assert get_error(":scope > div :scope header") == (
- 'Got immediate child pseudo-element ":scope" not at the start of a selector'
- )
- assert get_error("div :scope header") == (
- 'Got immediate child pseudo-element ":scope" not at the start of a selector'
- )
- assert get_error("> div p") == ("Expected selector, got ' at 0>")
-
- # Unsupported :has() with several arguments
- assert get_error(":has(a, b)") == ("Expected an argument, got ")
- assert get_error(":has()") == ("Expected selector, got ")
-
- def test_translation(self) -> None:
- def xpath(css: str) -> str:
- return str(GenericTranslator().css_to_xpath(css, prefix=""))
-
- assert xpath("*") == "*"
- assert xpath("e") == "e"
- assert xpath("*|e") == "e"
- assert xpath("e|f") == "e:f"
- assert xpath("e[foo]") == "e[@foo]"
- assert xpath("e[foo|bar]") == "e[@foo:bar]"
- assert xpath('e[foo="bar"]') == "e[@foo = 'bar']"
- assert xpath('e[foo~="bar"]') == (
- "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]"
- )
- assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]")
- assert xpath('e[foo$="bar"]') == (
- "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']"
- )
- assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]")
- assert xpath('e[hreflang|="en"]') == (
- "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]"
- )
-
- # --- nth-* and nth-last-* -------------------------------------
- assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]")
-
- # always true
- assert xpath("e:nth-child(n)") == ("e")
- assert xpath("e:nth-child(n+1)") == ("e")
- # always true too
- assert xpath("e:nth-child(n-10)") == ("e")
- # b=2 is the limit...
- assert xpath("e:nth-child(n+2)") == ("e[count(preceding-sibling::*) >= 1]")
- # always false
- assert xpath("e:nth-child(-n)") == ("e[0]")
- # equivalent to first child
- assert xpath("e:nth-child(-n+1)") == ("e[count(preceding-sibling::*) <= 0]")
-
- assert xpath("e:nth-child(3n+2)") == (
- "e[(count(preceding-sibling::*) >= 1) and "
- "((count(preceding-sibling::*) +2) mod 3 = 0)]"
- )
- assert xpath("e:nth-child(3n-2)") == (
- "e[count(preceding-sibling::*) mod 3 = 0]"
- )
- assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]")
-
- assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]")
- assert xpath("e:nth-last-child(2n)") == (
- "e[(count(following-sibling::*) +1) mod 2 = 0]"
- )
- assert xpath("e:nth-last-child(2n+1)") == (
- "e[count(following-sibling::*) mod 2 = 0]"
- )
- assert xpath("e:nth-last-child(2n+2)") == (
- "e[(count(following-sibling::*) >= 1) and "
- "((count(following-sibling::*) +1) mod 2 = 0)]"
- )
- assert xpath("e:nth-last-child(3n+1)") == (
- "e[count(following-sibling::*) mod 3 = 0]"
- )
- # represents the two last e elements
- assert xpath("e:nth-last-child(-n+2)") == (
- "e[count(following-sibling::*) <= 1]"
- )
-
- assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]")
- assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]")
- assert xpath("div e:nth-last-of-type(1) .aclass") == (
- "div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
- "/descendant-or-self::*/*[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' aclass ')]"
- )
-
- assert xpath("e:first-child") == ("e[count(preceding-sibling::*) = 0]")
- assert xpath("e:last-child") == ("e[count(following-sibling::*) = 0]")
- assert xpath("e:first-of-type") == ("e[count(preceding-sibling::e) = 0]")
- assert xpath("e:last-of-type") == ("e[count(following-sibling::e) = 0]")
- assert xpath("e:only-child") == ("e[count(parent::*/child::*) = 1]")
- assert xpath("e:only-of-type") == ("e[count(parent::*/child::e) = 1]")
- assert xpath("e:empty") == ("e[not(*) and not(string-length())]")
- assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]")
- assert xpath("e:root") == ("e[not(parent::*)]")
- assert xpath("e:hover") == ("e[0]") # never matches
- assert (
- xpath("div:has(bar.foo)") == "div[descendant::bar"
- "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]]"
- )
- assert xpath("e:has(> f)") == "e[./f]"
- assert xpath("e:has(f)") == "e[descendant::f]"
- assert xpath("e:has(~ f)") == "e[following-sibling::f]"
- assert (
- xpath("e:has(+ f)")
- == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]"
- )
- assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]")
- assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]")
- assert xpath("e.warning") == (
- "e[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' warning ')]"
- )
- assert xpath("e#myid") == ("e[@id = 'myid']")
- assert xpath("e:not(:nth-child(odd))") == (
- "e[not(count(preceding-sibling::*) mod 2 = 0)]"
- )
- assert xpath("e:nOT(*)") == ("e[0]") # never matches
- assert xpath("e f") == ("e/descendant-or-self::*/f")
- assert xpath("e > f") == ("e/f")
- assert xpath("e + f") == (
- "e/following-sibling::*[(name() = 'f') and (position() = 1)]"
- )
- assert xpath("e ~ f") == ("e/following-sibling::f")
- assert xpath("e ~ f:nth-child(3)") == (
- "e/following-sibling::f[count(preceding-sibling::*) = 2]"
- )
- assert xpath("div#container p") == (
- "div[@id = 'container']/descendant-or-self::*/p"
- )
- assert xpath("e:where(foo)") == "e[name() = 'foo']"
- assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]"
-
- # Invalid characters in XPath element names
- assert xpath(r"di\a0 v") == ("*[name() = 'di v']") # di\xa0v
- assert xpath(r"di\[v") == ("*[name() = 'di[v']")
- assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref
- assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]")
-
- with pytest.raises(ExpressionError):
- xpath(":fİrst-child")
- with pytest.raises(ExpressionError):
- xpath(":first-of-type")
- with pytest.raises(ExpressionError):
- xpath(":only-of-type")
- with pytest.raises(ExpressionError):
- xpath(":last-of-type")
- with pytest.raises(ExpressionError):
- xpath(":nth-of-type(1)")
- with pytest.raises(ExpressionError):
- xpath(":nth-last-of-type(1)")
- with pytest.raises(ExpressionError):
- xpath(":nth-child(n-)")
- with pytest.raises(ExpressionError):
- xpath(":after")
- with pytest.raises(ExpressionError):
- xpath(":lorem-ipsum")
- with pytest.raises(ExpressionError):
- xpath(":lorem(ipsum)")
- with pytest.raises(ExpressionError):
- xpath("::lorem-ipsum")
- with pytest.raises(TypeError):
- GenericTranslator().css_to_xpath(4) # type: ignore[arg-type]
- with pytest.raises(TypeError):
- GenericTranslator().selector_to_xpath("foo") # type: ignore[arg-type]
-
- def test_unicode(self) -> None:
- css = ".a\xc1b"
- xpath = GenericTranslator().css_to_xpath(css)
- assert css[1:] in xpath
- xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII")
- assert xpath == (
- "descendant-or-self::*[@class and contains("
- "concat(' ', normalize-space(@class), ' '), ' aÁb ')]"
- )
-
- def test_quoting(self) -> None:
- css_to_xpath = GenericTranslator().css_to_xpath
- assert css_to_xpath('*[aval="\'"]') == (
- """descendant-or-self::*[@aval = "'"]"""
- )
- assert css_to_xpath("*[aval=\"'''\"]") == (
- """descendant-or-self::*[@aval = "'''"]"""
- )
- assert css_to_xpath("*[aval='\"']") == (
- """descendant-or-self::*[@aval = '"']"""
- )
- assert css_to_xpath('*[aval=\'"""\']') == (
- '''descendant-or-self::*[@aval = '"""']'''
- )
- assert css_to_xpath(':scope > div[dataimg=""]') == (
- "descendant-or-self::*[1]/div[@dataimg = '']"
- )
-
- def test_unicode_escapes(self) -> None:
- # \22 == '"' \20 == ' '
- css_to_xpath = GenericTranslator().css_to_xpath
- assert css_to_xpath(r'*[aval="\'\22\'"]') == (
- """descendant-or-self::*[@aval = concat("'",'"',"'")]"""
- )
- assert css_to_xpath(r'*[aval="\'\22 2\'"]') == (
- """descendant-or-self::*[@aval = concat("'",'"2',"'")]"""
- )
- assert css_to_xpath(r'*[aval="\'\20 \'"]') == (
- """descendant-or-self::*[@aval = "' '"]"""
- )
- assert css_to_xpath("*[aval=\"'\\20\r\n '\"]") == (
- """descendant-or-self::*[@aval = "' '"]"""
- )
-
- def test_xpath_pseudo_elements(self) -> None:
- class CustomTranslator(GenericTranslator):
- def xpath_pseudo_element(
- self, xpath: XPathExpr, pseudo_element: PseudoElement
- ) -> XPathExpr:
- if isinstance(pseudo_element, FunctionalPseudoElement):
- method_name = "xpath_{}_functional_pseudo_element".format(
- pseudo_element.name.replace("-", "_")
- )
- method = getattr(self, method_name, None)
- if not method:
- raise ExpressionError(
- f"The functional pseudo-element ::{pseudo_element.name}() is unknown"
- )
- xpath = method(xpath, pseudo_element.arguments)
- else:
- method_name = "xpath_{}_simple_pseudo_element".format(
- pseudo_element.replace("-", "_")
- )
- method = getattr(self, method_name, None)
- if not method:
- raise ExpressionError(
- f"The pseudo-element ::{pseudo_element} is unknown"
- )
- xpath = method(xpath)
- return xpath
-
- # functional pseudo-class:
- # elements that have a certain number of attributes
- def xpath_nb_attr_function(
- self, xpath: XPathExpr, function: Function
- ) -> XPathExpr:
- assert function.arguments[0].value
- nb_attributes = int(function.arguments[0].value)
- return xpath.add_condition(f"count(@*)={nb_attributes}")
-
- # pseudo-class:
- # elements that have 5 attributes
- def xpath_five_attributes_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("count(@*)=5")
-
- # functional pseudo-element:
- # element's attribute by name
- def xpath_attr_functional_pseudo_element(
- self, xpath: XPathExpr, arguments: Sequence[Token]
- ) -> XPathExpr:
- attribute_name = arguments[0].value
- other = XPathExpr(
- f"@{attribute_name}",
- "",
- )
- return xpath.join("/", other)
-
- # pseudo-element:
- # element's text() nodes
- def xpath_text_node_simple_pseudo_element(
- self, xpath: XPathExpr
- ) -> XPathExpr:
- other = XPathExpr(
- "text()",
- "",
- )
- return xpath.join("/", other)
-
- # pseudo-element:
- # element's href attribute
- def xpath_attr_href_simple_pseudo_element(
- self, xpath: XPathExpr
- ) -> XPathExpr:
- other = XPathExpr(
- "@href",
- "",
- )
- return xpath.join("/", other)
-
- # pseudo-element:
- # used to demonstrate operator precedence
- def xpath_first_or_second_pseudo(self, xpath: XPathExpr) -> XPathExpr:
- return xpath.add_condition("@id = 'first' or @id = 'second'")
-
- def xpath(css: str) -> str:
- return str(CustomTranslator().css_to_xpath(css))
-
- assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]"
- assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]"
- assert xpath("::attr(href)") == "descendant-or-self::*/@href"
- assert xpath("::text-node") == "descendant-or-self::*/text()"
- assert xpath("::attr-href") == "descendant-or-self::*/@href"
- assert xpath("p img::attr(src)") == (
- "descendant-or-self::p/descendant-or-self::*/img/@src"
- )
- assert xpath(":scope") == "descendant-or-self::*[1]"
- assert xpath(":first-or-second[href]") == (
- "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]"
- )
-
- assert str(XPathExpr("", "", condition="@href")) == "[@href]"
-
- document = etree.fromstring(OPERATOR_PRECEDENCE_IDS)
- sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
-
- def operator_id(selector: str) -> list[str]:
- xpath = CustomTranslator().css_to_xpath(selector)
- items = typing.cast("list[etree._Element]", document.xpath(xpath))
- items.sort(key=sort_key)
- return [element.get("id", "nil") for element in items]
-
- assert operator_id(":first-or-second") == ["first", "second"]
- assert operator_id(":first-or-second[href]") == ["second"]
- assert operator_id("[href]:first-or-second") == ["second"]
-
- def test_series(self) -> None:
- def series(css: str) -> tuple[int, int] | None:
- (selector,) = parse(f":nth-child({css})")
- args = typing.cast(
- "FunctionalPseudoElement", selector.parsed_tree
- ).arguments
- try:
- return parse_series(args)
- except ValueError:
- return None
-
- assert series("1n+3") == (1, 3)
- assert series("1n +3") == (1, 3)
- assert series("1n + 3") == (1, 3)
- assert series("1n+ 3") == (1, 3)
- assert series("1n-3") == (1, -3)
- assert series("1n -3") == (1, -3)
- assert series("1n - 3") == (1, -3)
- assert series("1n- 3") == (1, -3)
- assert series("n-5") == (1, -5)
- assert series("odd") == (2, 1)
- assert series("even") == (2, 0)
- assert series("3n") == (3, 0)
- assert series("n") == (1, 0)
- assert series("+n") == (1, 0)
- assert series("-n") == (-1, 0)
- assert series("5") == (0, 5)
- assert series("foo") is None
- assert series("n+") is None
-
- def test_lang(self) -> None:
- document = etree.fromstring(XMLLANG_IDS)
- sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
- css_to_xpath = GenericTranslator().css_to_xpath
-
- def langid(selector: str) -> list[str]:
- xpath = css_to_xpath(selector)
- items = typing.cast("list[etree._Element]", document.xpath(xpath))
- items.sort(key=sort_key)
- return [element.get("id", "nil") for element in items]
-
- assert langid(':lang("EN")') == ["first", "second", "third", "fourth"]
- assert langid(':lang("en-us")') == ["second", "fourth"]
- assert langid(":lang(en-nz)") == ["third"]
- assert langid(":lang(fr)") == ["fifth"]
- assert langid(":lang(ru)") == ["sixth"]
- assert langid(":lang('ZH')") == ["eighth"]
- assert langid(":lang(de) :lang(zh)") == ["eighth"]
- assert langid(":lang(en), :lang(zh)") == [
- "first",
- "second",
- "third",
- "fourth",
- "eighth",
- ]
- assert langid(":lang(es)") == []
-
- def test_argument_types(self) -> None:
- class CustomTranslator(GenericTranslator):
- def __init__(self) -> None:
- self.argument_types: list[str] = []
-
- def xpath_pseudo_element(
- self, xpath: XPathExpr, pseudo_element: PseudoElement
- ) -> XPathExpr:
- self.argument_types += typing.cast(
- "FunctionalPseudoElement", pseudo_element
- ).argument_types()
- return xpath
-
- def argument_types(css: str) -> list[str]:
- translator = CustomTranslator()
- translator.css_to_xpath(css)
- return translator.argument_types
-
- mappings: list[tuple[str, list[str]]] = [
- ("", []),
- ("ident", ["IDENT"]),
- ('"string"', ["STRING"]),
- ("1", ["NUMBER"]),
- ]
- for argument_string, argument_list in mappings:
- css = f"::pseudo_element({argument_string})"
- assert argument_types(css) == argument_list
-
- def test_select(self) -> None:
- document = etree.fromstring(HTML_IDS)
- sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
- css_to_xpath = GenericTranslator().css_to_xpath
- html_css_to_xpath = HTMLTranslator().css_to_xpath
-
- def select_ids(selector: str, html_only: bool) -> list[str]:
- xpath = css_to_xpath(selector)
- items = typing.cast("list[etree._Element]", document.xpath(xpath))
- if html_only:
- assert items == []
- xpath = html_css_to_xpath(selector)
- items = typing.cast("list[etree._Element]", document.xpath(xpath))
- items.sort(key=sort_key)
- return [element.get("id", "nil") for element in items]
-
- def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]:
- html_only = kwargs.pop("html_only", False)
- result = select_ids(main, html_only)
- for selector in selectors:
- assert select_ids(selector, html_only) == result
- return result
-
- all_ids = pcss("*")
- assert all_ids[:6] == [
- "html",
- "nil",
- "link-href",
- "link-nohref",
- "nil",
- "outer-div",
- ]
- assert all_ids[-1:] == ["foobar-span"]
- assert pcss("div") == ["outer-div", "li-div", "foobar-div"]
- assert pcss("DIV", html_only=True) == [
- "outer-div",
- "li-div",
- "foobar-div",
- ] # case-insensitive in HTML
- assert pcss("div div") == ["li-div"]
- assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"]
- assert pcss("a[name]") == ["name-anchor"]
- assert pcss("a[NAme]", html_only=True) == [
- "name-anchor"
- ] # case-insensitive in HTML:
- assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"]
- assert pcss('a[rel="tag"]') == ["tag-anchor"]
- assert pcss('a[href*="localhost"]') == ["tag-anchor"]
- assert pcss('a[href*=""]') == []
- assert pcss('a[href^="http"]') == ["tag-anchor", "nofollow-anchor"]
- assert pcss('a[href^="http:"]') == ["tag-anchor"]
- assert pcss('a[href^=""]') == []
- assert pcss('a[href$="org"]') == ["nofollow-anchor"]
- assert pcss('a[href$=""]') == []
- assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == ["foobar-div"]
- assert pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]') == []
- assert pcss('div[foobar~="cd"]') == []
- assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ["second-li"]
- # Attribute values are case sensitive
- assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
- assert pcss('*[lang|="e"]') == []
- # ... :lang() is not.
- assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == [
- "second-li",
- "li-div",
- ]
- assert pcss(':lang("e")', html_only=True) == []
- assert pcss(":scope > div") == []
- assert pcss(":scope body") == ["nil"]
- assert pcss(":scope body > div") == ["outer-div", "foobar-div"]
- assert pcss(":scope head") == ["nil"]
- assert pcss(":scope html") == []
-
- # --- nth-* and nth-last-* -------------------------------------
-
- # select nothing
- assert pcss("li:nth-child(-n)") == []
- # select all children
- assert pcss("li:nth-child(n)") == [
- "first-li",
- "second-li",
- "third-li",
- "fourth-li",
- "fifth-li",
- "sixth-li",
- "seventh-li",
- ]
-
- assert pcss("li:nth-child(3)", "#first-li ~ :nth-child(3)") == ["third-li"]
- assert pcss("li:nth-child(10)") == []
- assert pcss("li:nth-child(2n)", "li:nth-child(even)", "li:nth-child(2n+0)") == [
- "second-li",
- "fourth-li",
- "sixth-li",
- ]
- assert pcss("li:nth-child(+2n+1)", "li:nth-child(odd)") == [
- "first-li",
- "third-li",
- "fifth-li",
- "seventh-li",
- ]
- assert pcss("li:nth-child(2n+4)") == ["fourth-li", "sixth-li"]
- assert pcss("li:nth-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"]
- assert pcss("li:nth-child(-n+3)") == ["first-li", "second-li", "third-li"]
- assert pcss("li:nth-child(-2n+4)") == ["second-li", "fourth-li"]
- assert pcss("li:nth-last-child(0)") == []
- assert pcss("li:nth-last-child(1)") == ["seventh-li"]
- assert pcss("li:nth-last-child(2n)", "li:nth-last-child(even)") == [
- "second-li",
- "fourth-li",
- "sixth-li",
- ]
- assert pcss("li:nth-last-child(2n+1)") == [
- "first-li",
- "third-li",
- "fifth-li",
- "seventh-li",
- ]
- assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"]
- assert pcss("li:nth-last-child(3n+1)") == [
- "first-li",
- "fourth-li",
- "seventh-li",
- ]
- assert pcss("ol:first-of-type") == ["first-ol"]
- assert pcss("ol:nth-child(1)") == []
- assert pcss("ol:nth-of-type(2)") == ["second-ol"]
- assert pcss("ol:nth-last-of-type(1)") == ["second-ol"]
-
- # "+" and "~" tests
- assert pcss("ol#first-ol li + li:nth-child(4)") == ["fourth-li"]
- assert pcss("li + li:nth-child(1)") == []
- assert pcss("li ~ li:nth-child(2n+1)") == [
- "third-li",
- "fifth-li",
- "seventh-li",
- ] # all but the first
- assert pcss("li ~ li:nth-last-child(2n+1)") == [
- "third-li",
- "fifth-li",
- "seventh-li",
- ] # all but the first
-
- assert pcss("span:only-child") == ["foobar-span"]
- assert pcss("li div:only-child") == ["li-div"]
- assert pcss("div *:only-child") == ["li-div", "foobar-span"]
- with pytest.raises(ExpressionError):
- pcss("p *:only-of-type")
- assert pcss("p:only-of-type") == ["paragraph"]
- assert pcss("a:empty", "a:EMpty") == ["name-anchor"]
- assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"]
- assert pcss(":root", "html:root") == ["html"]
- assert pcss("li:root", "* :root") == []
- assert pcss('*:contains("link")', ':CONtains("link")') == [
- "html",
- "nil",
- "outer-div",
- "tag-anchor",
- "nofollow-anchor",
- ]
- assert pcss('*:contains("LInk")') == [] # case sensitive
- assert pcss('*:contains("e")') == [
- "html",
- "nil",
- "outer-div",
- "first-ol",
- "first-li",
- "paragraph",
- "p-em",
- ]
- assert pcss('*:contains("E")') == [] # case-sensitive
- assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"]
- assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"]
- assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == [
- "third-li",
- "fourth-li",
- ]
- assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"]
- assert pcss("li div", "li > div", "div div") == ["li-div"]
- assert pcss("div > div") == []
- assert pcss("div>.c", "div > .c") == ["first-ol"]
- assert pcss("div + div") == ["foobar-div"]
- assert pcss("a ~ a") == ["tag-anchor", "nofollow-anchor"]
- assert pcss('a[rel="tag"] ~ a') == ["nofollow-anchor"]
- assert pcss("ol#first-ol li:last-child") == ["seventh-li"]
- assert pcss("ol#first-ol *:last-child") == ["li-div", "seventh-li"]
- assert pcss("#outer-div:first-child") == ["outer-div"]
- assert pcss("#outer-div :first-child") == [
- "name-anchor",
- "first-li",
- "li-div",
- "p-b",
- "checkbox-fieldset-disabled",
- "area-href",
- ]
- assert pcss("a[href]") == ["tag-anchor", "nofollow-anchor"]
- assert pcss(":not(*)") == []
- assert pcss("a:not([href])") == ["name-anchor"]
- assert pcss("ol :Not(li[class])") == [
- "first-li",
- "second-li",
- "li-div",
- "fifth-li",
- "sixth-li",
- "seventh-li",
- ]
- assert pcss("link:has(*)") == []
- assert pcss("ol:has(div)") == ["first-ol"]
- assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"]
- assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"]
- assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"]
- assert pcss("ol.a.b.c > li.c:nth-child(3)") == ["third-li"]
-
- # Invalid characters in XPath element names, should not crash
- assert pcss(r"di\a0 v", r"div\[") == []
- assert pcss(r"[h\a0 ref]", r"[h\]ref]") == []
-
- # HTML-specific
- assert pcss(":link", html_only=True) == [
- "link-href",
- "tag-anchor",
- "nofollow-anchor",
- "area-href",
- ]
- assert pcss(":visited", html_only=True) == []
- assert pcss(":enabled", html_only=True) == [
- "link-href",
- "tag-anchor",
- "nofollow-anchor",
- "checkbox-unchecked",
- "text-checked",
- "checkbox-checked",
- "area-href",
- ]
- assert pcss(":disabled", html_only=True) == [
- "checkbox-disabled",
- "checkbox-disabled-checked",
- "fieldset",
- "checkbox-fieldset-disabled",
- ]
- assert pcss(":checked", html_only=True) == [
- "checkbox-checked",
- "checkbox-disabled-checked",
- ]
-
- def test_select_shakespeare(self) -> None:
- document = html.document_fromstring(HTML_SHAKESPEARE)
- body = typing.cast("list[etree._Element]", document.xpath("//body"))[0]
- css_to_xpath = GenericTranslator().css_to_xpath
-
- basestring_ = (str, bytes)
-
- def count(selector: str) -> int:
- xpath = css_to_xpath(selector)
- results = typing.cast("list[etree._Element]", body.xpath(xpath))
- assert not isinstance(results, basestring_)
- found = set()
- for item in results:
- assert item not in found
- found.add(item)
- assert not isinstance(item, basestring_)
- return len(results)
-
- # Data borrowed from http://mootools.net/slickspeed/
-
- ## Changed from original; probably because I'm only
- ## searching the body.
- # assert count('*') == 252
- assert count("*") == 246
- assert count("div:contains(CELIA)") == 26
- assert count("div:only-child") == 22 # ?
- assert count("div:nth-child(even)") == 106
- assert count("div:nth-child(2n)") == 106
- assert count("div:nth-child(odd)") == 137
- assert count("div:nth-child(2n+1)") == 137
- assert count("div:nth-child(n)") == 243
- assert count("div:last-child") == 53
- assert count("div:first-child") == 51
- assert count("div > div") == 242
- assert count("div + div") == 190
- assert count("div ~ div") == 190
- assert count("body") == 1
- assert count("body div") == 243
- assert count("div") == 243
- assert count("div div") == 242
- assert count("div div div") == 241
- assert count("div, div, div") == 243
- assert count("div, a, span") == 243
- assert count(".dialog") == 51
- assert count("div.dialog") == 51
- assert count("div .dialog") == 51
- assert count("div.character, div.dialog") == 99
- assert count("div.direction.dialog") == 0
- assert count("div.dialog.direction") == 0
- assert count("div.dialog.scene") == 1
- assert count("div.scene.scene") == 1
- assert count("div.scene .scene") == 0
- assert count("div.direction .dialog ") == 0
- assert count("div .dialog .direction") == 4
- assert count("div.dialog .dialog .direction") == 4
- assert count("#speech5") == 1
- assert count("div#speech5") == 1
- assert count("div #speech5") == 1
- assert count("div.scene div.dialog") == 49
- assert count("div#scene1 div.dialog div") == 142
- assert count("#scene1 #speech1") == 1
- assert count("div[class]") == 103
- assert count("div[class=dialog]") == 50
- assert count("div[class^=dia]") == 51
- assert count("div[class$=log]") == 50
- assert count("div[class*=sce]") == 1
- assert count("div[class|=dialog]") == 50 # ? Seems right
- assert count("div[class!=madeup]") == 243 # ? Seems right
- assert count("div[class~=dialog]") == 51 # ? Seems right
- assert count(":scope > div") == 1
- assert count(":scope > div > div[class=dialog]") == 1
- assert count(":scope > div div") == 242
-
-
-OPERATOR_PRECEDENCE_IDS = """
-
-
-
-
-
-"""
-
-XMLLANG_IDS = """
-
- a
- b
- c
- d
- e
- f
-
-
-
-
-"""
-
-HTML_IDS = """
-
-
-
-
-
-
-
-"""
-
-
-HTML_SHAKESPEARE = """
-
-
-
-
-
-
-
-
-
As You Like It
-
- by William Shakespeare
-
-
-
ACT I, SCENE III. A room in the palace.
-
-
Enter CELIA and ROSALIND
-
-
CELIA
-
-
Why, cousin! why, Rosalind! Cupid have mercy! not a word?
-
-
ROSALIND
-
-
Not one to throw at a dog.
-
-
CELIA
-
-
No, thy words are too precious to be cast away upon
-
curs; throw some of them at me; come, lame me with reasons.
-
-
ROSALIND
-
CELIA
-
-
But is all this for your father?
-
-
-
Then there were two cousins laid up; when the one
-
should be lamed with reasons and the other mad
-
without any.
-
-
ROSALIND
-
-
No, some of it is for my child's father. O, how
-
full of briers is this working-day world!
-
-
CELIA
-
-
They are but burs, cousin, thrown upon thee in
-
holiday foolery: if we walk not in the trodden
-
paths our very petticoats will catch them.
-
-
ROSALIND
-
-
I could shake them off my coat: these burs are in my heart.
-
-
CELIA
-
-
ROSALIND
-
-
I would try, if I could cry 'hem' and have him.
-
-
CELIA
-
-
Come, come, wrestle with thy affections.
-
-
ROSALIND
-
-
O, they take the part of a better wrestler than myself!
-
-
CELIA
-
-
O, a good wish upon you! you will try in time, in
-
despite of a fall. But, turning these jests out of
-
service, let us talk in good earnest: is it
-
possible, on such a sudden, you should fall into so
-
strong a liking with old Sir Rowland's youngest son?
-
-
ROSALIND
-
-
The duke my father loved his father dearly.
-
-
CELIA
-
-
Doth it therefore ensue that you should love his son
-
dearly? By this kind of chase, I should hate him,
-
for my father hated his father dearly; yet I hate
-
not Orlando.
-
-
ROSALIND
-
-
No, faith, hate him not, for my sake.
-
-
CELIA
-
-
Why should I not? doth he not deserve well?
-
-
ROSALIND
-
-
Let me love him for that, and do you love him
-
because I do. Look, here comes the duke.
-
-
CELIA
-
-
With his eyes full of anger.
-
Enter DUKE FREDERICK, with Lords
-
-
DUKE FREDERICK
-
-
Mistress, dispatch you with your safest haste
-
And get you from our court.
-
-
ROSALIND
-
-
DUKE FREDERICK
-
-
You, cousin
-
Within these ten days if that thou be'st found
-
So near our public court as twenty miles,
-
Thou diest for it.
-
-
ROSALIND
-
-
I do beseech your grace,
-
Let me the knowledge of my fault bear with me:
-
If with myself I hold intelligence
-
Or have acquaintance with mine own desires,
-
If that I do not dream or be not frantic,--
-
As I do trust I am not--then, dear uncle,
-
Never so much as in a thought unborn
-
Did I offend your highness.
-
-
DUKE FREDERICK
-
-
Thus do all traitors:
-
If their purgation did consist in words,
-
They are as innocent as grace itself:
-
Let it suffice thee that I trust thee not.
-
-
ROSALIND
-
-
Yet your mistrust cannot make me a traitor:
-
Tell me whereon the likelihood depends.
-
-
DUKE FREDERICK
-
-
Thou art thy father's daughter; there's enough.
-
-
ROSALIND
-
-
So was I when your highness took his dukedom;
-
So was I when your highness banish'd him:
-
Treason is not inherited, my lord;
-
Or, if we did derive it from our friends,
-
What's that to me? my father was no traitor:
-
Then, good my liege, mistake me not so much
-
To think my poverty is treacherous.
-
-
CELIA
-
-
Dear sovereign, hear me speak.
-
-
DUKE FREDERICK
-
-
Ay, Celia; we stay'd her for your sake,
-
Else had she with her father ranged along.
-
-
CELIA
-
-
I did not then entreat to have her stay;
-
It was your pleasure and your own remorse:
-
I was too young that time to value her;
-
But now I know her: if she be a traitor,
-
Why so am I; we still have slept together,
-
Rose at an instant, learn'd, play'd, eat together,
-
And wheresoever we went, like Juno's swans,
-
Still we went coupled and inseparable.
-
-
DUKE FREDERICK
-
-
She is too subtle for thee; and her smoothness,
-
Her very silence and her patience
-
Speak to the people, and they pity her.
-
Thou art a fool: she robs thee of thy name;
-
And thou wilt show more bright and seem more virtuous
-
When she is gone. Then open not thy lips:
-
Firm and irrevocable is my doom
-
Which I have pass'd upon her; she is banish'd.
-
-
CELIA
-
-
Pronounce that sentence then on me, my liege:
-
I cannot live out of her company.
-
-
DUKE FREDERICK
-
-
You are a fool. You, niece, provide yourself:
-
If you outstay the time, upon mine honour,
-
And in the greatness of my word, you die.
-
Exeunt DUKE FREDERICK and Lords
-
-
CELIA
-
-
O my poor Rosalind, whither wilt thou go?
-
Wilt thou change fathers? I will give thee mine.
-
I charge thee, be not thou more grieved than I am.
-
-
ROSALIND
-
-
CELIA
-
-
Thou hast not, cousin;
-
Prithee be cheerful: know'st thou not, the duke
-
Hath banish'd me, his daughter?
-
-
ROSALIND
-
-
CELIA
-
-
No, hath not? Rosalind lacks then the love
-
Which teacheth thee that thou and I am one:
-
Shall we be sunder'd? shall we part, sweet girl?
-
No: let my father seek another heir.
-
Therefore devise with me how we may fly,
-
Whither to go and what to bear with us;
-
And do not seek to take your change upon you,
-
To bear your griefs yourself and leave me out;
-
For, by this heaven, now at our sorrows pale,
-
Say what thou canst, I'll go along with thee.
-
-
ROSALIND
-
-
Why, whither shall we go?
-
-
CELIA
-
-
To seek my uncle in the forest of Arden.
-
-
ROSALIND
-
-
Alas, what danger will it be to us,
-
Maids as we are, to travel forth so far!
-
Beauty provoketh thieves sooner than gold.
-
-
CELIA
-
-
I'll put myself in poor and mean attire
-
And with a kind of umber smirch my face;
-
The like do you: so shall we pass along
-
And never stir assailants.
-
-
ROSALIND
-
-
Were it not better,
-
Because that I am more than common tall,
-
That I did suit me all points like a man?
-
A gallant curtle-axe upon my thigh,
-
A boar-spear in my hand; and--in my heart
-
Lie there what hidden woman's fear there will--
-
We'll have a swashing and a martial outside,
-
As many other mannish cowards have
-
That do outface it with their semblances.
-
-
CELIA
-
-
What shall I call thee when thou art a man?
-
-
ROSALIND
-
-
I'll have no worse a name than Jove's own page;
-
And therefore look you call me Ganymede.
-
But what will you be call'd?
-
-
CELIA
-
-
Something that hath a reference to my state
-
No longer Celia, but Aliena.
-
-
ROSALIND
-
-
But, cousin, what if we assay'd to steal
-
The clownish fool out of your father's court?
-
Would he not be a comfort to our travel?
-
-
CELIA
-
-
He'll go along o'er the wide world with me;
-
Leave me alone to woo him. Let's away,
-
And get our jewels and our wealth together,
-
Devise the fittest time and safest way
-
To hide us from pursuit that will be made
-
After my flight. Now go we in content
-
To liberty and not to banishment.
-
Exeunt
-
-
-
-
-
-
-"""
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tox.ini b/tox.ini
index 9ff54cf..ca053d8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,49 +1,10 @@
[tox]
-envlist = pre-commit,pylint,py,docs,typing
+envlist = py25,py26,py27,py32,py33
[testenv]
-deps =
- lxml>=4.4
- pytest-cov>=7.0.0
- pytest>=5.4
- sybil
-commands =
- pytest --cov=cssselect \
- --cov-report=term-missing --cov-report=html --cov-report=xml \
- {posargs: cssselect tests docs}
+deps=lxml
+commands = python cssselect/tests.py
-[testenv:pylint]
-deps =
- {[testenv]deps}
- pylint==4.0.4
-commands =
- pylint {posargs: cssselect tests docs}
-
-[testenv:docs]
-changedir = docs
-deps =
- -r docs/requirements.txt
-commands =
- sphinx-build -W -b html . {envtmpdir}/html
-
-[testenv:typing]
-deps =
- {[testenv]deps}
- mypy==1.19.1
- types-lxml==2026.1.1
-commands =
- mypy {posargs: cssselect tests}
-
-[testenv:pre-commit]
-deps = pre-commit
-commands = pre-commit run --all-files --show-diff-on-failure
-skip_install = true
-
-[testenv:twinecheck]
-basepython = python3
-deps =
- twine==6.2.0
- build==1.4.0
-commands =
- python -m build --sdist
- twine check dist/*
+[testenv:py25]
+setenv =
+ PIP_INSECURE = 1