update Soupsieve

This commit is contained in:
AdeHub
2024-08-24 16:36:55 +12:00
parent b3edfa0d87
commit 38d8e13e4e
15 changed files with 4282 additions and 383 deletions

View File

@@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
""" """
from __future__ import annotations
from .__meta__ import __version__, __version_info__ # noqa: F401 from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp from . import css_parser as cp
from . import css_match as cm from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401 from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import] import bs4 # type: ignore[import-untyped]
from typing import Dict, Optional, Any, List, Iterator, Iterable from typing import Any, Iterator, Iterable
__all__ = ( __all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve', 'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001 def compile( # noqa: A001
pattern: str, pattern: str,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> cm.SoupSieve: ) -> cm.SoupSieve:
"""Compile CSS pattern.""" """Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve): if isinstance(pattern, SoupSieve):
if flags: if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list") raise ValueError("Cannot process 'flags' argument on a compiled selector list")
@@ -64,7 +62,12 @@ def compile( # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list") raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern return pattern
return cp._cached_css_compile(pattern, ns, cs, flags) return cp._cached_css_compile(
pattern,
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
ct.CustomSelectors(custom) if custom is not None else custom,
flags
)
def purge() -> None: def purge() -> None:
@@ -75,13 +78,13 @@ def purge() -> None:
def closest( def closest(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Match closest ancestor.""" """Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag) return compile(select, namespaces, flags, **kwargs).closest(tag)
@@ -89,11 +92,11 @@ def closest(
def match( def match(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> bool: ) -> bool:
"""Match node.""" """Match node."""
@@ -103,13 +106,13 @@ def match(
def filter( # noqa: A001 def filter( # noqa: A001
select: str, select: str,
iterable: Iterable['bs4.Tag'], iterable: Iterable[bs4.Tag],
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> List['bs4.Tag']: ) -> list[bs4.Tag]:
"""Filter list of nodes.""" """Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable) return compile(select, namespaces, flags, **kwargs).filter(iterable)
@@ -117,13 +120,13 @@ def filter( # noqa: A001
def select_one( def select_one(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Select a single tag.""" """Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag) return compile(select, namespaces, flags, **kwargs).select_one(tag)
@@ -131,14 +134,14 @@ def select_one(
def select( def select(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> List['bs4.Tag']: ) -> list[bs4.Tag]:
"""Select the specified tags.""" """Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit) return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@@ -146,18 +149,17 @@ def select(
def iselect( def iselect(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: Optional[Dict[str, str]] = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: Optional[Dict[str, str]] = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> Iterator['bs4.Tag']: ) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
yield el
def escape(ident: str) -> str: def escape(ident: str) -> str:

View File

@@ -1,4 +1,5 @@
"""Meta related things.""" """Meta related things."""
from __future__ import annotations
from collections import namedtuple from collections import namedtuple
import re import re
@@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
cls, cls,
major: int, minor: int, micro: int, release: str = "final", major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0 pre: int = 0, post: int = 0, dev: int = 0
) -> "Version": ) -> Version:
"""Validate version info.""" """Validate version info."""
# Ensure all parts are positive integers. # Ensure all parts are positive integers.
@@ -92,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.") raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP: if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release)) raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases). # Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final": if ".dev-candidate" < release < "final":
@@ -117,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev: elif dev:
raise ValueError("Version is not a development release.") raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev) return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool: def _is_pre(self) -> bool:
"""Is prerelease.""" """Is prerelease."""
@@ -144,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0: if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor) ver = f"{self.major}.{self.minor}"
else: else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro) ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre(): if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre) ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post(): if self._is_post():
ver += ".post{}".format(self.post) ver += f".post{self.post}"
if self._is_dev(): if self._is_dev():
ver += ".dev{}".format(self.dev) ver += f".dev{self.dev}"
return ver return ver
@@ -163,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver) m = RE_VER.match(ver)
if m is None: if m is None:
raise ValueError("'{}' is not a valid version".format(ver)) raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro # Handle major, minor, micro
major = int(m.group('major')) major = int(m.group('major'))
@@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev) return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 3, 1, "final") __version_info__ = Version(2, 6, 0, "final")
__version__ = __version_info__._get_canonical() __version__ = __version_info__._get_canonical()

View File

@@ -1,11 +1,12 @@
"""CSS matcher.""" """CSS matcher."""
from __future__ import annotations
from datetime import datetime from datetime import datetime
from . import util from . import util
import re import re
from . import css_types as ct from . import css_types as ct
import unicodedata import unicodedata
import bs4 # type: ignore[import] import bs4 # type: ignore[import-untyped]
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay) # Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@@ -64,12 +65,12 @@ class _FakeParent:
fake parent so we can traverse the root element as a child. fake parent so we can traverse the root element as a child.
""" """
def __init__(self, element: 'bs4.Tag') -> None: def __init__(self, element: bs4.Tag) -> None:
"""Initialize.""" """Initialize."""
self.contents = [element] self.contents = [element]
def __len__(self) -> 'bs4.PageElement': def __len__(self) -> bs4.PageElement:
"""Length.""" """Length."""
return len(self.contents) return len(self.contents)
@@ -84,62 +85,62 @@ class _DocumentNav:
# Fail on unexpected types. # Fail on unexpected types.
if not cls.is_tag(tag): if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag))) raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod @staticmethod
def is_doc(obj: 'bs4.Tag') -> bool: def is_doc(obj: bs4.Tag) -> bool:
"""Is `BeautifulSoup` object.""" """Is `BeautifulSoup` object."""
return isinstance(obj, bs4.BeautifulSoup) return isinstance(obj, bs4.BeautifulSoup)
@staticmethod @staticmethod
def is_tag(obj: 'bs4.PageElement') -> bool: def is_tag(obj: bs4.PageElement) -> bool:
"""Is tag.""" """Is tag."""
return isinstance(obj, bs4.Tag) return isinstance(obj, bs4.Tag)
@staticmethod @staticmethod
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is declaration.""" """Is declaration."""
return isinstance(obj, bs4.Declaration) return isinstance(obj, bs4.Declaration)
@staticmethod @staticmethod
def is_cdata(obj: 'bs4.PageElement') -> bool: def is_cdata(obj: bs4.PageElement) -> bool:
"""Is CDATA.""" """Is CDATA."""
return isinstance(obj, bs4.CData) return isinstance(obj, bs4.CData)
@staticmethod @staticmethod
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is processing instruction.""" """Is processing instruction."""
return isinstance(obj, bs4.ProcessingInstruction) return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod @staticmethod
def is_navigable_string(obj: 'bs4.PageElement') -> bool: def is_navigable_string(obj: bs4.PageElement) -> bool:
"""Is navigable string.""" """Is navigable string."""
return isinstance(obj, bs4.NavigableString) return isinstance(obj, bs4.NavigableString)
@staticmethod @staticmethod
def is_special_string(obj: 'bs4.PageElement') -> bool: def is_special_string(obj: bs4.PageElement) -> bool:
"""Is special string.""" """Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod @classmethod
def is_content_string(cls, obj: 'bs4.PageElement') -> bool: def is_content_string(cls, obj: bs4.PageElement) -> bool:
"""Check if node is content string.""" """Check if node is content string."""
return cls.is_navigable_string(obj) and not cls.is_special_string(obj) return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
@staticmethod @staticmethod
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent: def create_fake_parent(el: bs4.Tag) -> _FakeParent:
"""Create fake parent for a given element.""" """Create fake parent for a given element."""
return _FakeParent(el) return _FakeParent(el)
@staticmethod @staticmethod
def is_xml_tree(el: 'bs4.Tag') -> bool: def is_xml_tree(el: bs4.Tag) -> bool:
"""Check if element (or document) is from a XML tree.""" """Check if element (or document) is from a XML tree."""
return bool(el._is_xml) return bool(el._is_xml)
def is_iframe(self, el: 'bs4.Tag') -> bool: def is_iframe(self, el: bs4.Tag) -> bool:
"""Check if element is an `iframe`.""" """Check if element is an `iframe`."""
return bool( return bool(
@@ -147,7 +148,7 @@ class _DocumentNav:
self.is_html_tag(el) # type: ignore[attr-defined] self.is_html_tag(el) # type: ignore[attr-defined]
) )
def is_root(self, el: 'bs4.Tag') -> bool: def is_root(self, el: bs4.Tag) -> bool:
""" """
Return whether element is a root element. Return whether element is a root element.
@@ -161,20 +162,19 @@ class _DocumentNav:
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined] root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
return root return root
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']: def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse.""" """Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el): if not no_iframe or not self.is_iframe(el):
for content in el.contents: yield from el.contents
yield content
def get_children( def get_children(
self, self,
el: 'bs4.Tag', el: bs4.Tag,
start: Optional[int] = None, start: int | None = None,
reverse: bool = False, reverse: bool = False,
tags: bool = True, tags: bool = True,
no_iframe: bool = False no_iframe: bool = False
) -> Iterator['bs4.PageElement']: ) -> Iterator[bs4.PageElement]:
"""Get children.""" """Get children."""
if not no_iframe or not self.is_iframe(el): if not no_iframe or not self.is_iframe(el):
@@ -195,10 +195,10 @@ class _DocumentNav:
def get_descendants( def get_descendants(
self, self,
el: 'bs4.Tag', el: bs4.Tag,
tags: bool = True, tags: bool = True,
no_iframe: bool = False no_iframe: bool = False
) -> Iterator['bs4.PageElement']: ) -> Iterator[bs4.PageElement]:
"""Get descendants.""" """Get descendants."""
if not no_iframe or not self.is_iframe(el): if not no_iframe or not self.is_iframe(el):
@@ -229,7 +229,7 @@ class _DocumentNav:
if not tags or is_tag: if not tags or is_tag:
yield child yield child
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag': def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
"""Get parent.""" """Get parent."""
parent = el.parent parent = el.parent
@@ -238,25 +238,25 @@ class _DocumentNav:
return parent return parent
@staticmethod @staticmethod
def get_tag_name(el: 'bs4.Tag') -> Optional[str]: def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag.""" """Get tag."""
return cast(Optional[str], el.name) return cast('str | None', el.name)
@staticmethod @staticmethod
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]: def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix.""" """Get prefix."""
return cast(Optional[str], el.prefix) return cast('str | None', el.prefix)
@staticmethod @staticmethod
def get_uri(el: 'bs4.Tag') -> Optional[str]: def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`.""" """Get namespace `URI`."""
return cast(Optional[str], el.namespace) return cast('str | None', el.namespace)
@classmethod @classmethod
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement': def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get next sibling tag.""" """Get next sibling tag."""
sibling = el.next_sibling sibling = el.next_sibling
@@ -265,7 +265,7 @@ class _DocumentNav:
return sibling return sibling
@classmethod @classmethod
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement': def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get previous sibling tag.""" """Get previous sibling tag."""
sibling = el.previous_sibling sibling = el.previous_sibling
@@ -274,7 +274,7 @@ class _DocumentNav:
return sibling return sibling
@staticmethod @staticmethod
def has_html_ns(el: 'bs4.Tag') -> bool: def has_html_ns(el: bs4.Tag) -> bool:
""" """
Check if element has an HTML namespace. Check if element has an HTML namespace.
@@ -282,17 +282,17 @@ class _DocumentNav:
like we do in the case of `is_html_tag`. like we do in the case of `is_html_tag`.
""" """
ns = getattr(el, 'namespace') if el else None ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML) return bool(ns and ns == NS_XHTML)
@staticmethod @staticmethod
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]: def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix.""" """Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@classmethod @classmethod
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]: def normalize_value(cls, value: Any) -> str | Sequence[str]:
"""Normalize the value to be a string or list of strings.""" """Normalize the value to be a string or list of strings."""
# Treat `None` as empty string. # Treat `None` as empty string.
@@ -327,10 +327,10 @@ class _DocumentNav:
@classmethod @classmethod
def get_attribute_by_name( def get_attribute_by_name(
cls, cls,
el: 'bs4.Tag', el: bs4.Tag,
name: str, name: str,
default: Optional[Union[str, Sequence[str]]] = None default: str | Sequence[str] | None = None
) -> Optional[Union[str, Sequence[str]]]: ) -> str | Sequence[str] | None:
"""Get attribute by name.""" """Get attribute by name."""
value = default value = default
@@ -347,14 +347,14 @@ class _DocumentNav:
return value return value
@classmethod @classmethod
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]: def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes.""" """Iterate attributes."""
for k, v in el.attrs.items(): for k, v in el.attrs.items():
yield k, cls.normalize_value(v) yield k, cls.normalize_value(v)
@classmethod @classmethod
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]: def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
"""Get classes.""" """Get classes."""
classes = cls.get_attribute_by_name(el, 'class', []) classes = cls.get_attribute_by_name(el, 'class', [])
@@ -362,14 +362,14 @@ class _DocumentNav:
classes = RE_NOT_WS.findall(classes) classes = RE_NOT_WS.findall(classes)
return cast(Sequence[str], classes) return cast(Sequence[str], classes)
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str: def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
"""Get text.""" """Get text."""
return ''.join( return ''.join(
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)] [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
) )
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]: def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
"""Get Own Text.""" """Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)] return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
@@ -393,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool: def validate_week(year: int, week: int) -> bool:
"""Validate week.""" """Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1: if max_week == 1:
max_week = 53 max_week = 53
return 1 <= week <= max_week return 1 <= week <= max_week
@@ -423,10 +423,10 @@ class Inputs:
return 0 <= minutes <= 59 return 0 <= minutes <= 59
@classmethod @classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]: def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value.""" """Parse the input value."""
parsed = None # type: Optional[Tuple[float, ...]] parsed = None # type: tuple[float, ...] | None
if value is None: if value is None:
return value return value
if itype == "date": if itype == "date":
@@ -484,19 +484,19 @@ class CSSMatch(_DocumentNav):
def __init__( def __init__(
self, self,
selectors: ct.SelectorList, selectors: ct.SelectorList,
scope: 'bs4.Tag', scope: bs4.Tag,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
flags: int flags: int
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
self.assert_valid_input(scope) self.assert_valid_input(scope)
self.tag = scope self.tag = scope
self.cached_meta_lang = [] # type: List[Tuple[str, str]] self.cached_meta_lang = [] # type: list[tuple[str, str]]
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']] self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]] self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
self.selectors = selectors self.selectors = selectors
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]] self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
self.flags = flags self.flags = flags
self.iframe_restrict = False self.iframe_restrict = False
@@ -527,7 +527,7 @@ class CSSMatch(_DocumentNav):
return self.is_xml or self.has_html_namespace return self.is_xml or self.has_html_namespace
def get_tag_ns(self, el: 'bs4.Tag') -> str: def get_tag_ns(self, el: bs4.Tag) -> str:
"""Get tag namespace.""" """Get tag namespace."""
if self.supports_namespaces(): if self.supports_namespaces():
@@ -539,24 +539,24 @@ class CSSMatch(_DocumentNav):
namespace = NS_XHTML namespace = NS_XHTML
return namespace return namespace
def is_html_tag(self, el: 'bs4.Tag') -> bool: def is_html_tag(self, el: bs4.Tag) -> bool:
"""Check if tag is in HTML namespace.""" """Check if tag is in HTML namespace."""
return self.get_tag_ns(el) == NS_XHTML return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: 'bs4.Tag') -> Optional[str]: def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag.""" """Get tag."""
name = self.get_tag_name(el) name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]: def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix.""" """Get prefix."""
prefix = self.get_prefix_name(el) prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]: def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text.""" """Get directionality from element text."""
for node in self.get_children(el, tags=False): for node in self.get_children(el, tags=False):
@@ -600,13 +600,18 @@ class CSSMatch(_DocumentNav):
ranges = lang_range.split('-') ranges = lang_range.split('-')
subtags = lang_tag.lower().split('-') subtags = lang_tag.lower().split('-')
length = len(ranges) length = len(ranges)
slength = len(subtags)
rindex = 0 rindex = 0
sindex = 0 sindex = 0
r = ranges[rindex] r = ranges[rindex]
s = subtags[sindex] s = subtags[sindex]
# Empty specified language should match unspecified language attributes
if length == 1 and slength == 1 and not r and r == s:
return True
# Primary tag needs to match # Primary tag needs to match
if r != '*' and r != s: if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
match = False match = False
rindex += 1 rindex += 1
@@ -645,10 +650,10 @@ class CSSMatch(_DocumentNav):
def match_attribute_name( def match_attribute_name(
self, self,
el: 'bs4.Tag', el: bs4.Tag,
attr: str, attr: str,
prefix: Optional[str] prefix: str | None
) -> Optional[Union[str, Sequence[str]]]: ) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists.""" """Match attribute name and return value if it exists."""
value = None value = None
@@ -696,7 +701,7 @@ class CSSMatch(_DocumentNav):
break break
return value return value
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool: def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match the namespace of the element.""" """Match the namespace of the element."""
match = True match = True
@@ -717,7 +722,7 @@ class CSSMatch(_DocumentNav):
match = False match = False
return match return match
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool: def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
"""Match attributes.""" """Match attributes."""
match = True match = True
@@ -736,7 +741,7 @@ class CSSMatch(_DocumentNav):
break break
return match return match
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool: def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match tag name.""" """Match tag name."""
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name) name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
@@ -745,7 +750,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*') name not in (self.get_tag(el), '*')
) )
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool: def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag.""" """Match the tag."""
match = True match = True
@@ -757,7 +762,7 @@ class CSSMatch(_DocumentNav):
match = False match = False
return match return match
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match past relationship.""" """Match past relationship."""
found = False found = False
@@ -785,12 +790,12 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation) found = self.match_selectors(sibling, relation)
return found return found
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool: def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
"""Match future child.""" """Match future child."""
match = False match = False
if recursive: if recursive:
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']] children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
else: else:
children = self.get_children children = self.get_children
for child in children(parent, no_iframe=self.iframe_restrict): for child in children(parent, no_iframe=self.iframe_restrict):
@@ -799,7 +804,7 @@ class CSSMatch(_DocumentNav):
break break
return match return match
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match future relationship.""" """Match future relationship."""
found = False found = False
@@ -822,7 +827,7 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation) found = self.match_selectors(sibling, relation)
return found return found
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match relationship to other elements.""" """Match relationship to other elements."""
found = False found = False
@@ -837,7 +842,7 @@ class CSSMatch(_DocumentNav):
return found return found
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool: def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
"""Match element's ID.""" """Match element's ID."""
found = True found = True
@@ -847,7 +852,7 @@ class CSSMatch(_DocumentNav):
break break
return found return found
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool: def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
"""Match element's classes.""" """Match element's classes."""
current_classes = self.get_classes(el) current_classes = self.get_classes(el)
@@ -858,7 +863,7 @@ class CSSMatch(_DocumentNav):
break break
return found return found
def match_root(self, el: 'bs4.Tag') -> bool: def match_root(self, el: bs4.Tag) -> bool:
"""Match element as root.""" """Match element as root."""
is_root = self.is_root(el) is_root = self.is_root(el)
@@ -884,20 +889,20 @@ class CSSMatch(_DocumentNav):
sibling = self.get_next(sibling, tags=False) sibling = self.get_next(sibling, tags=False)
return is_root return is_root
def match_scope(self, el: 'bs4.Tag') -> bool: def match_scope(self, el: bs4.Tag) -> bool:
"""Match element as scope.""" """Match element as scope."""
return self.scope is el return self.scope is el
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool: def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
"""Match tag type for `nth` matches.""" """Match tag type for `nth` matches."""
return( return (
(self.get_tag(child) == self.get_tag(el)) and (self.get_tag(child) == self.get_tag(el)) and
(self.get_tag_ns(child) == self.get_tag_ns(el)) (self.get_tag_ns(child) == self.get_tag_ns(el))
) )
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool: def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
"""Match `nth` elements.""" """Match `nth` elements."""
matched = True matched = True
@@ -998,7 +1003,7 @@ class CSSMatch(_DocumentNav):
break break
return matched return matched
def match_empty(self, el: 'bs4.Tag') -> bool: def match_empty(self, el: bs4.Tag) -> bool:
"""Check if element is empty (if requested).""" """Check if element is empty (if requested)."""
is_empty = True is_empty = True
@@ -1011,7 +1016,7 @@ class CSSMatch(_DocumentNav):
break break
return is_empty return is_empty
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool: def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
"""Match selectors.""" """Match selectors."""
match = True match = True
@@ -1020,11 +1025,11 @@ class CSSMatch(_DocumentNav):
match = False match = False
return match return match
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool: def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
"""Match element if it contains text.""" """Match element if it contains text."""
match = True match = True
content = None # type: Optional[Union[str, Sequence[str]]] content = None # type: str | Sequence[str] | None
for contain_list in contains: for contain_list in contains:
if content is None: if content is None:
if contain_list.own: if contain_list.own:
@@ -1048,7 +1053,7 @@ class CSSMatch(_DocumentNav):
match = False match = False
return match return match
def match_default(self, el: 'bs4.Tag') -> bool: def match_default(self, el: bs4.Tag) -> bool:
"""Match default.""" """Match default."""
match = False match = False
@@ -1087,13 +1092,13 @@ class CSSMatch(_DocumentNav):
break break
return match return match
def match_indeterminate(self, el: 'bs4.Tag') -> bool: def match_indeterminate(self, el: bs4.Tag) -> bool:
"""Match default.""" """Match default."""
match = False match = False
name = cast(str, self.get_attribute_by_name(el, 'name')) name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']: def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form.""" """Find this input's form."""
form = None form = None
parent = self.get_parent(el, no_iframe=True) parent = self.get_parent(el, no_iframe=True)
@@ -1148,7 +1153,7 @@ class CSSMatch(_DocumentNav):
return match return match
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool: def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
"""Match languages.""" """Match languages."""
match = False match = False
@@ -1183,7 +1188,7 @@ class CSSMatch(_DocumentNav):
break break
# Use cached meta language. # Use cached meta language.
if not found_lang and self.cached_meta_lang: if found_lang is None and self.cached_meta_lang:
for cache in self.cached_meta_lang: for cache in self.cached_meta_lang:
if root is cache[0]: if root is cache[0]:
found_lang = cache[1] found_lang = cache[1]
@@ -1217,13 +1222,13 @@ class CSSMatch(_DocumentNav):
found_lang = content found_lang = content
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang))) self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break break
if found_lang: if found_lang is not None:
break break
if not found_lang: if found_lang is None:
self.cached_meta_lang.append((cast(str, root), '')) self.cached_meta_lang.append((cast(str, root), ''))
# If we determined a language, compare. # If we determined a language, compare.
if found_lang: if found_lang is not None:
for patterns in langs: for patterns in langs:
match = False match = False
for pattern in patterns: for pattern in patterns:
@@ -1234,7 +1239,7 @@ class CSSMatch(_DocumentNav):
return match return match
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool: def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
"""Check directionality.""" """Check directionality."""
# If we have to match both left and right, we can't match either. # If we have to match both left and right, we can't match either.
@@ -1266,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs # Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea: if is_textarea:
temp = [] value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
else: else:
value = cast(str, self.get_attribute_by_name(el, 'value', '')) value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value: if value:
@@ -1297,7 +1298,7 @@ class CSSMatch(_DocumentNav):
# Match parents direction # Match parents direction
return self.match_dir(self.get_parent(el, no_iframe=True), directionality) return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
def match_range(self, el: 'bs4.Tag', condition: int) -> bool: def match_range(self, el: bs4.Tag, condition: int) -> bool:
""" """
Match range. Match range.
@@ -1337,7 +1338,7 @@ class CSSMatch(_DocumentNav):
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
def match_defined(self, el: 'bs4.Tag') -> bool: def match_defined(self, el: bs4.Tag) -> bool:
""" """
Match defined. Match defined.
@@ -1360,7 +1361,7 @@ class CSSMatch(_DocumentNav):
) )
) )
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool: def match_placeholder_shown(self, el: bs4.Tag) -> bool:
""" """
Match placeholder shown according to HTML spec. Match placeholder shown according to HTML spec.
@@ -1375,7 +1376,7 @@ class CSSMatch(_DocumentNav):
return match return match
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool: def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
"""Check if element matches one of the selectors.""" """Check if element matches one of the selectors."""
match = False match = False
@@ -1459,7 +1460,7 @@ class CSSMatch(_DocumentNav):
return match return match
def select(self, limit: int = 0) -> Iterator['bs4.Tag']: def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
"""Match all tags under the targeted tag.""" """Match all tags under the targeted tag."""
lim = None if limit < 1 else limit lim = None if limit < 1 else limit
@@ -1472,7 +1473,7 @@ class CSSMatch(_DocumentNav):
if lim < 1: if lim < 1:
break break
def closest(self) -> Optional['bs4.Tag']: def closest(self) -> bs4.Tag | None:
"""Match closest ancestor.""" """Match closest ancestor."""
current = self.tag current = self.tag
@@ -1484,12 +1485,12 @@ class CSSMatch(_DocumentNav):
current = self.get_parent(current) current = self.get_parent(current)
return closest return closest
def filter(self) -> List['bs4.Tag']: # noqa A001 def filter(self) -> list[bs4.Tag]: # noqa A001
"""Filter tag's children.""" """Filter tag's children."""
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)] return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
def match(self, el: 'bs4.Tag') -> bool: def match(self, el: bs4.Tag) -> bool:
"""Match.""" """Match."""
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors) return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
@@ -1500,8 +1501,8 @@ class SoupSieve(ct.Immutable):
pattern: str pattern: str
selectors: ct.SelectorList selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces] namespaces: ct.Namespaces | None
custom: Dict[str, str] custom: dict[str, str]
flags: int flags: int
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash") __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
@@ -1510,8 +1511,8 @@ class SoupSieve(ct.Immutable):
self, self,
pattern: str, pattern: str,
selectors: ct.SelectorList, selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
custom: Optional[ct.CustomSelectors], custom: ct.CustomSelectors | None,
flags: int flags: int
): ):
"""Initialize.""" """Initialize."""
@@ -1524,17 +1525,17 @@ class SoupSieve(ct.Immutable):
flags=flags flags=flags
) )
def match(self, tag: 'bs4.Tag') -> bool: def match(self, tag: bs4.Tag) -> bool:
"""Match.""" """Match."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag) return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag': def closest(self, tag: bs4.Tag) -> bs4.Tag:
"""Match closest ancestor.""" """Match closest ancestor."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest() return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001 def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
""" """
Filter. Filter.
@@ -1551,31 +1552,28 @@ class SoupSieve(ct.Immutable):
else: else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)] return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag': def select_one(self, tag: bs4.Tag) -> bs4.Tag:
"""Select a single tag.""" """Select a single tag."""
tags = self.select(tag, limit=1) tags = self.select(tag, limit=1)
return tags[0] if tags else None return tags[0] if tags else None
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']: def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
"""Select the specified tags.""" """Select the specified tags."""
return list(self.iselect(tag, limit)) return list(self.iselect(tag, limit))
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']: def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
yield el
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( return (
self.pattern, f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
self.namespaces, f"custom={self.custom!r}, flags={self.flags!r})"
self.custom,
self.flags
) )
__str__ = __repr__ __str__ = __repr__

View File

@@ -1,4 +1,5 @@
"""CSS selector parser.""" """CSS selector parser."""
from __future__ import annotations
import re import re
from functools import lru_cache from functools import lru_cache
from . import util from . import util
@@ -6,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import SelectorSyntaxError from .util import SelectorSyntaxError
import warnings import warnings
from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD UNICODE_REPLACEMENT_CHAR = 0xFFFD
@@ -91,94 +92,81 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts # Sub-patterns parts
# Whitespace # Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE) WS = fr'(?:[ \t]|{NEWLINE})'
# Comments # Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included # Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes # CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier # CSS Identifier
IDENTIFIER = r''' IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''.format(esc=CSS_ESCAPES) '''
# `nth` content # `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier # Value: quoted string or identifier
VALUE = r''' VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard. # Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r''' ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns # Selector patterns
# IDs (`#id`) # IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`) # Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`) # Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.) # Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r''' PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
# Pseudo class (`:pseudo-class`, `:pseudo-class(`) # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`) # Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Nesting ampersand selector. Matches `&`
PAT_AMP = r'&'
# Closing pseudo group (`)`) # Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`) # Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported) # At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r''' PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{name} (?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) (?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) '''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r''' PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{name} (?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{nth}|even|odd)){ws}*\) (?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) '''
# Pseudo class language (`:lang("*-de", en)`) # Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Pseudo class direction (`:dir(ltr)`) # Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`) # Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`) # Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Regular expressions # Regular expressions
# CSS escape pattern # CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile( RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
# Pattern to break up `nth` specifiers # Pattern to break up `nth` specifiers
RE_NTH = re.compile( RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
# Pattern to iterate multiple values. # Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks # Whitespace checks
RE_WS = re.compile(WS) RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile('{}*$'.format(WSC)) RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants # Constants
# List split token # List split token
@@ -206,8 +194,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE) @lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile( def _cached_css_compile(
pattern: str, pattern: str,
namespaces: Optional[ct.Namespaces], namespaces: ct.Namespaces | None,
custom: Optional[ct.CustomSelectors], custom: ct.CustomSelectors | None,
flags: int flags: int
) -> cm.SoupSieve: ) -> cm.SoupSieve:
"""Cached CSS compile.""" """Cached CSS compile."""
@@ -232,7 +220,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear() _cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]: def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom.""" """Process custom."""
custom_selectors = {} custom_selectors = {}
@@ -240,9 +228,9 @@ def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str,
for key, value in custom.items(): for key, value in custom.items():
name = util.lower(key) name = util.lower(key)
if RE_CUSTOM.match(name) is None: if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors: if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name)) raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value custom_selectors[css_unescape(name)] = value
return custom_selectors return custom_selectors
@@ -282,23 +270,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-' start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash: if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters # Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident)) string.append(f'\\{ident}')
else: else:
for index, c in enumerate(ident): for index, c in enumerate(ident):
codepoint = ord(c) codepoint = ord(c)
if codepoint == 0x00: if codepoint == 0x00:
string.append('\ufffd') string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif ( elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
): ):
string.append(c) string.append(c)
else: else:
string.append('\\{}'.format(c)) string.append(f'\\{c}')
return ''.join(string) return ''.join(string)
@@ -316,7 +304,7 @@ class SelectorPattern:
return self.name return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector.""" """Match the selector."""
return self.re_pattern.match(selector, index) return self.re_pattern.match(selector, index)
@@ -325,7 +313,7 @@ class SelectorPattern:
class SpecialPseudoPattern(SelectorPattern): class SpecialPseudoPattern(SelectorPattern):
"""Selector pattern.""" """Selector pattern."""
def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None: def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
"""Initialize.""" """Initialize."""
self.patterns = {} self.patterns = {}
@@ -335,7 +323,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]: for pseudo in p[1]:
self.patterns[pseudo] = pattern self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern] self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str: def get_name(self) -> str:
@@ -343,7 +331,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name() return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector.""" """Match the selector."""
pseudo = None pseudo = None
@@ -371,20 +359,20 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None: def __init__(self, **kwargs: Any) -> None:
"""Initialize.""" """Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag] self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: List[str] self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: List[str] self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute] self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth] self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList] self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: List[_Selector] self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str] self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains] self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang] self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int self.flags = kwargs.get('flags', 0) # type: int
self.no_match = kwargs.get('no_match', False) # type: bool self.no_match = kwargs.get('no_match', False) # type: bool
def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList: def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
"""Freeze relation.""" """Freeze relation."""
if relations: if relations:
@@ -394,7 +382,7 @@ class _Selector:
else: else:
return ct.SelectorList() return ct.SelectorList()
def freeze(self) -> Union[ct.Selector, ct.SelectorNull]: def freeze(self) -> ct.Selector | ct.SelectorNull:
"""Freeze self.""" """Freeze self."""
if self.no_match: if self.no_match:
@@ -418,11 +406,10 @@ class _Selector:
"""String representation.""" """String representation."""
return ( return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
).format( f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, f'no_match={self.no_match!r})'
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
) )
__repr__ = __str__ __repr__ = __str__
@@ -450,6 +437,7 @@ class CSSParser:
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
SelectorPattern("amp", PAT_AMP),
SelectorPattern("at_rule", PAT_AT_RULE), SelectorPattern("at_rule", PAT_AT_RULE),
SelectorPattern("id", PAT_ID), SelectorPattern("id", PAT_ID),
SelectorPattern("class", PAT_CLASS), SelectorPattern("class", PAT_CLASS),
@@ -461,7 +449,7 @@ class CSSParser:
def __init__( def __init__(
self, self,
selector: str, selector: str,
custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None, custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0 flags: int = 0
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
@@ -562,7 +550,7 @@ class CSSParser:
selector = self.custom.get(pseudo) selector = self.custom.get(pseudo)
if selector is None: if selector is None:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)), f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern, self.pattern,
m.end(0) m.end(0)
) )
@@ -583,9 +571,9 @@ class CSSParser:
sel: _Selector, sel: _Selector,
m: Match[str], m: Match[str],
has_selector: bool, has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]], iselector: Iterator[tuple[str, Match[str]]],
is_html: bool is_html: bool
) -> Tuple[bool, bool]: ) -> tuple[bool, bool]:
"""Parse pseudo class.""" """Parse pseudo class."""
complex_pseudo = False complex_pseudo = False
@@ -662,13 +650,16 @@ class CSSParser:
has_selector = True has_selector = True
elif pseudo in PSEUDO_SUPPORTED: elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo), f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
else: else:
raise NotImplementedError( raise SelectorSyntaxError(
"'{}' pseudo-class is not implemented at this time".format(pseudo) f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. "
"If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.",
self.pattern,
m.start(0)
) )
return has_selector, is_html return has_selector, is_html
@@ -678,7 +669,7 @@ class CSSParser:
sel: _Selector, sel: _Selector,
m: Match[str], m: Match[str],
has_selector: bool, has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]] iselector: Iterator[tuple[str, Match[str]]]
) -> bool: ) -> bool:
"""Parse `nth` pseudo.""" """Parse `nth` pseudo."""
@@ -743,7 +734,7 @@ class CSSParser:
sel: _Selector, sel: _Selector,
name: str, name: str,
has_selector: bool, has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]], iselector: Iterator[tuple[str, Match[str]]],
index: int index: int
) -> bool: ) -> bool:
"""Parse pseudo with opening bracket.""" """Parse pseudo with opening bracket."""
@@ -752,7 +743,7 @@ class CSSParser:
if name == ':not': if name == ':not':
flags |= FLG_NOT flags |= FLG_NOT
elif name == ':has': elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE flags |= FLG_RELATIVE
elif name in (':where', ':is'): elif name in (':where', ':is'):
flags |= FLG_FORGIVE flags |= FLG_FORGIVE
@@ -766,21 +757,16 @@ class CSSParser:
sel: _Selector, sel: _Selector,
m: Match[str], m: Match[str],
has_selector: bool, has_selector: bool,
selectors: List[_Selector], selectors: list[_Selector],
rel_type: str, rel_type: str,
index: int index: int
) -> Tuple[bool, _Selector, str]: ) -> tuple[bool, _Selector, str]:
"""Parse combinator tokens.""" """Parse combinator tokens."""
combinator = m.group('relation').strip() combinator = m.group('relation').strip()
if not combinator: if not combinator:
combinator = WS_COMBINATOR combinator = WS_COMBINATOR
if combinator == COMMA_COMBINATOR: if combinator == COMMA_COMBINATOR:
if not has_selector:
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
sel.no_match = True
sel.rel_type = rel_type sel.rel_type = rel_type
selectors[-1].relations.append(sel) selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR rel_type = ":" + WS_COMBINATOR
@@ -797,7 +783,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace, # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail. # then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index), f'The multiple combinators at position {index}',
self.pattern, self.pattern,
index index
) )
@@ -814,12 +800,12 @@ class CSSParser:
sel: _Selector, sel: _Selector,
m: Match[str], m: Match[str],
has_selector: bool, has_selector: bool,
selectors: List[_Selector], selectors: list[_Selector],
relations: List[_Selector], relations: list[_Selector],
is_pseudo: bool, is_pseudo: bool,
is_forgive: bool, is_forgive: bool,
index: int index: int
) -> Tuple[bool, _Selector]: ) -> tuple[bool, _Selector]:
"""Parse combinator tokens.""" """Parse combinator tokens."""
combinator = m.group('relation').strip() combinator = m.group('relation').strip()
@@ -828,7 +814,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR: if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern, self.pattern,
index index
) )
@@ -873,7 +859,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name'))) pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains": if pseudo == ":contains":
warnings.warn( warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning FutureWarning
) )
@@ -924,7 +910,7 @@ class CSSParser:
def parse_selectors( def parse_selectors(
self, self,
iselector: Iterator[Tuple[str, Match[str]]], iselector: Iterator[tuple[str, Match[str]]],
index: int = 0, index: int = 0,
flags: int = 0 flags: int = 0
) -> ct.SelectorList: ) -> ct.SelectorList:
@@ -935,7 +921,7 @@ class CSSParser:
selectors = [] selectors = []
has_selector = False has_selector = False
closed = False closed = False
relations = [] # type: List[_Selector] relations = [] # type: list[_Selector]
rel_type = ":" + WS_COMBINATOR rel_type = ":" + WS_COMBINATOR
# Setup various flags # Setup various flags
@@ -986,13 +972,16 @@ class CSSParser:
# Handle parts # Handle parts
if key == "at_rule": if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0))) raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == "amp":
sel.flags |= ct.SEL_SCOPE
has_selector = True
elif key == 'pseudo_class_custom': elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class': elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element': elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains': elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector) has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'): elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@@ -1007,7 +996,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive: if not is_forgive:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Expected a selector at postion {}".format(m.start(0)), f"Expected a selector at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@@ -1017,7 +1006,7 @@ class CSSParser:
break break
else: else:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unmatched pseudo-class close at postion {}".format(m.start(0)), f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@@ -1035,7 +1024,7 @@ class CSSParser:
elif key == 'tag': elif key == 'tag':
if has_selector: if has_selector:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)), f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@@ -1050,7 +1039,7 @@ class CSSParser:
# Handle selectors that are not closed # Handle selectors that are not closed
if is_open and not closed: if is_open and not closed:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index), f"Unclosed pseudo-class at position {index}",
self.pattern, self.pattern,
index index
) )
@@ -1069,28 +1058,18 @@ class CSSParser:
selectors.append(sel) selectors.append(sel)
# Forgive empty slots in pseudo-classes that have lists (and are forgiving) # Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive: elif is_forgive and (not selectors or not relations):
if is_relative: # Handle normal pseudo-classes with empty slots like `:is()` etc.
# Handle relative selectors pseudo-classes with empty slots like `:has()` sel.no_match = True
if selectors and selectors[-1].rel_type is None and rel_type == ': ': del relations[:]
sel.rel_type = rel_type selectors.append(sel)
sel.no_match = True has_selector = True
selectors[-1].relations.append(sel)
has_selector = True
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
if not has_selector: if not has_selector:
# We will always need to finish a selector when `:has()` is used as it leads with combining. # We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well. # May apply to others as well.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index), f'Expected a selector at position {index}',
self.pattern, self.pattern,
index index
) )
@@ -1112,7 +1091,7 @@ class CSSParser:
# Return selector list # Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]: def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
"""Iterate selector tokens.""" """Iterate selector tokens."""
# Ignore whitespace and comments at start and end of pattern # Ignore whitespace and comments at start and end of pattern
@@ -1122,7 +1101,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1) end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern)) print(f'## PARSING: {pattern!r}')
while index <= end: while index <= end:
m = None m = None
for v in self.css_tokens: for v in self.css_tokens:
@@ -1130,7 +1109,7 @@ class CSSParser:
if m: if m:
name = v.get_name() name = v.get_name()
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0) index = m.end(0)
yield name, m yield name, m
break break
@@ -1140,15 +1119,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error; # throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character. # otherwise, report the invalid character.
if c == '[': if c == '[':
msg = "Malformed attribute selector at position {}".format(index) msg = f"Malformed attribute selector at position {index}"
elif c == '.': elif c == '.':
msg = "Malformed class selector at position {}".format(index) msg = f"Malformed class selector at position {index}"
elif c == '#': elif c == '#':
msg = "Malformed id selector at position {}".format(index) msg = f"Malformed id selector at position {index}"
elif c == ':': elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index) msg = f"Malformed pseudo-class selector at position {index}"
else: else:
msg = "Invalid character {!r} position {}".format(c, index) msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index) raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## END PARSING') print('## END PARSING')

View File

@@ -1,7 +1,8 @@
"""CSS selector structure items.""" """CSS selector structure items."""
from __future__ import annotations
import copyreg import copyreg
from .pretty import pretty from .pretty import pretty
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = ( __all__ = (
'Selector', 'Selector',
@@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable: class Immutable:
"""Immutable.""" """Immutable."""
__slots__: Tuple[str, ...] = ('_hash',) __slots__: tuple[str, ...] = ('_hash',)
_hash: int _hash: int
@@ -44,11 +45,11 @@ class Immutable:
for k, v in kwargs.items(): for k, v in kwargs.items():
temp.append(type(v)) temp.append(type(v))
temp.append(v) temp.append(v)
super(Immutable, self).__setattr__(k, v) super().__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) super().__setattr__('_hash', hash(tuple(temp)))
@classmethod @classmethod
def __base__(cls) -> "Type[Immutable]": def __base__(cls) -> type[Immutable]:
"""Get base class.""" """Get base class."""
return cls return cls
@@ -58,7 +59,7 @@ class Immutable:
return ( return (
isinstance(other, self.__base__()) and isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __ne__(self, other: Any) -> bool: def __ne__(self, other: Any) -> bool:
@@ -66,7 +67,7 @@ class Immutable:
return ( return (
not isinstance(other, self.__base__()) or not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __hash__(self) -> int: def __hash__(self) -> int:
@@ -77,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None: def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability.""" """Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{}({})".format( r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) return f"{self.__class__.__name__}({r})"
)
__str__ = __repr__ __str__ = __repr__
@@ -99,7 +99,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __init__( def __init__(
self, self,
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]] arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
@@ -107,14 +107,14 @@ class ImmutableDict(Mapping[Any, Any]):
self._d = dict(arg) self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())])) self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None: def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]): if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]: def __iter__(self) -> Iterator[Any]:
"""Iterator.""" """Iterator."""
@@ -139,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{!r}".format(self._d) return f"{self._d!r}"
__str__ = __repr__ __str__ = __repr__
@@ -147,37 +147,37 @@ class ImmutableDict(Mapping[Any, Any]):
class Namespaces(ImmutableDict): class Namespaces(ImmutableDict):
"""Namespaces.""" """Namespaces."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize.""" """Initialize."""
super().__init__(arg) super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict): class CustomSelectors(ImmutableDict):
"""Custom selectors.""" """Custom selectors."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize.""" """Initialize."""
super().__init__(arg) super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable): class Selector(Immutable):
@@ -188,30 +188,30 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash' 'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
) )
tag: Optional['SelectorTag'] tag: SelectorTag | None
ids: Tuple[str, ...] ids: tuple[str, ...]
classes: Tuple[str, ...] classes: tuple[str, ...]
attributes: Tuple['SelectorAttribute', ...] attributes: tuple[SelectorAttribute, ...]
nth: Tuple['SelectorNth', ...] nth: tuple[SelectorNth, ...]
selectors: Tuple['SelectorList', ...] selectors: tuple[SelectorList, ...]
relation: 'SelectorList' relation: SelectorList
rel_type: Optional[str] rel_type: str | None
contains: Tuple['SelectorContains', ...] contains: tuple[SelectorContains, ...]
lang: Tuple['SelectorLang', ...] lang: tuple[SelectorLang, ...]
flags: int flags: int
def __init__( def __init__(
self, self,
tag: Optional['SelectorTag'], tag: SelectorTag | None,
ids: Tuple[str, ...], ids: tuple[str, ...],
classes: Tuple[str, ...], classes: tuple[str, ...],
attributes: Tuple['SelectorAttribute', ...], attributes: tuple[SelectorAttribute, ...],
nth: Tuple['SelectorNth', ...], nth: tuple[SelectorNth, ...],
selectors: Tuple['SelectorList', ...], selectors: tuple[SelectorList, ...],
relation: 'SelectorList', relation: SelectorList,
rel_type: Optional[str], rel_type: str | None,
contains: Tuple['SelectorContains', ...], contains: tuple[SelectorContains, ...],
lang: Tuple['SelectorLang', ...], lang: tuple[SelectorLang, ...],
flags: int flags: int
): ):
"""Initialize.""" """Initialize."""
@@ -246,9 +246,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash") __slots__ = ("name", "prefix", "_hash")
name: str name: str
prefix: Optional[str] prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None: def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize.""" """Initialize."""
super().__init__(name=name, prefix=prefix) super().__init__(name=name, prefix=prefix)
@@ -261,15 +261,15 @@ class SelectorAttribute(Immutable):
attribute: str attribute: str
prefix: str prefix: str
pattern: Optional[Pattern[str]] pattern: Pattern[str] | None
xml_type_pattern: Optional[Pattern[str]] xml_type_pattern: Pattern[str] | None
def __init__( def __init__(
self, self,
attribute: str, attribute: str,
prefix: str, prefix: str,
pattern: Optional[Pattern[str]], pattern: Pattern[str] | None,
xml_type_pattern: Optional[Pattern[str]] xml_type_pattern: Pattern[str] | None
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
@@ -286,7 +286,7 @@ class SelectorContains(Immutable):
__slots__ = ("text", "own", "_hash") __slots__ = ("text", "own", "_hash")
text: Tuple[str, ...] text: tuple[str, ...]
own: bool own: bool
def __init__(self, text: Iterable[str], own: bool) -> None: def __init__(self, text: Iterable[str], own: bool) -> None:
@@ -305,9 +305,9 @@ class SelectorNth(Immutable):
b: int b: int
of_type: bool of_type: bool
last: bool last: bool
selectors: 'SelectorList' selectors: SelectorList
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None: def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
"""Initialize.""" """Initialize."""
super().__init__( super().__init__(
@@ -325,7 +325,7 @@ class SelectorLang(Immutable):
__slots__ = ("languages", "_hash",) __slots__ = ("languages", "_hash",)
languages: Tuple[str, ...] languages: tuple[str, ...]
def __init__(self, languages: Iterable[str]): def __init__(self, languages: Iterable[str]):
"""Initialize.""" """Initialize."""
@@ -353,25 +353,25 @@ class SelectorList(Immutable):
__slots__ = ("selectors", "is_not", "is_html", "_hash") __slots__ = ("selectors", "is_not", "is_html", "_hash")
selectors: Tuple[Union['Selector', 'SelectorNull'], ...] selectors: tuple[Selector | SelectorNull, ...]
is_not: bool is_not: bool
is_html: bool is_html: bool
def __init__( def __init__(
self, self,
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None, selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False, is_not: bool = False,
is_html: bool = False is_html: bool = False
) -> None: ) -> None:
"""Initialize.""" """Initialize."""
super().__init__( super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(), selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not, is_not=is_not,
is_html=is_html is_html=is_html
) )
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]: def __iter__(self) -> Iterator[Selector | SelectorNull]:
"""Iterator.""" """Iterator."""
return iter(self.selectors) return iter(self.selectors)
@@ -381,7 +381,7 @@ class SelectorList(Immutable):
return len(self.selectors) return len(self.selectors)
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']: def __getitem__(self, index: int) -> Selector | SelectorNull:
"""Get item.""" """Get item."""
return self.selectors[index] return self.selectors[index]

View File

@@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners). hasn't been tested extensively to make sure we aren't missing corners).
Example: Example:
-------
``` ```
>>> import soupsieve as sv >>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty() >>> sv.compile('this > that.class[name=value]').selectors.pretty()
@@ -64,7 +64,9 @@ SelectorList(
is_not=False, is_not=False,
is_html=False) is_html=False)
``` ```
""" """
from __future__ import annotations
import re import re
from typing import Any from typing import Any
@@ -122,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0) index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'): if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4 indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent)) output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0)) output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'): elif name in ('lend', 'dend', 'tend'):
indent -= 4 indent -= 4
output.append(m.group(0)) output.append(m.group(0))
elif name in ('sep',): elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent)) output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',): elif name in ('dsep',):
output.append('{} '.format(m.group(1))) output.append(f'{m.group(1)} ')
break break
return ''.join(output) return ''.join(output)

View File

@@ -1,8 +1,9 @@
"""Utility.""" """Utility."""
from __future__ import annotations
from functools import wraps, lru_cache from functools import wraps, lru_cache
import warnings import warnings
import re import re
from typing import Callable, Any, Optional, Tuple, List from typing import Callable, Any
DEBUG = 0x00001 DEBUG = 0x00001
@@ -26,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception): class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector.""" """Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None: def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize.""" """Initialize."""
self.line = None self.line = None
@@ -36,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None: if pattern is not None and index is not None:
# Format pattern to show line and column position # Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index) self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg) super().__init__(msg)
@@ -75,15 +76,15 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co
) )
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]: def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
"""Get the pattern context.""" """Get the pattern context."""
last = 0 last = 0
current_line = 1 current_line = 1
col = 1 col = 1
text = [] # type: List[str] text = [] # type: list[str]
line = 1 line = 1
offset = None # type: Optional[int] offset = None # type: int | None
# Split pattern by newline and handle the text before the newline # Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern): for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
@@ -104,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column # we will render the output with just `\n`. We will still log the column
# correctly though. # correctly though.
text.append('\n') text.append('\n')
text.append('{}{}'.format(indent, linetext)) text.append(f'{indent}{linetext}')
if offset is not None: if offset is not None:
text.append('\n') text.append('\n')
text.append(' ' * (col + offset) + '^') text.append(' ' * (col + offset) + '^')

View File

@@ -0,0 +1,166 @@
"""
Soup Sieve.
A CSS selector filter for BeautifulSoup4.
MIT License
Copyright (c) 2018 Isaac Muse
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Dict, Optional, Any, List, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
'closest', 'compile', 'filter', 'iselect',
'match', 'select', 'select_one'
)
SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
elif namespaces is not None:
raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
elif custom is not None:
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, ns, cs, flags)
def purge() -> None:
"""Purge cached patterns."""
cp._purge_cache()
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> bool:
"""Match node."""
return compile(select, namespaces, flags, **kwargs).match(tag)
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
def escape(ident: str) -> str:
"""Escape identifier."""
return cp.escape(ident)

View File

@@ -0,0 +1,196 @@
"""Meta related things."""
from collections import namedtuple
import re
RE_VER = re.compile(
r'''(?x)
(?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))?
(?:(?P<type>a|b|rc)(?P<pre>\d+))?
(?:\.post(?P<post>\d+))?
(?:\.dev(?P<dev>\d+))?
'''
)
REL_MAP = {
".dev": "",
".dev-alpha": "a",
".dev-beta": "b",
".dev-candidate": "rc",
"alpha": "a",
"beta": "b",
"candidate": "rc",
"final": ""
}
DEV_STATUS = {
".dev": "2 - Pre-Alpha",
".dev-alpha": "2 - Pre-Alpha",
".dev-beta": "2 - Pre-Alpha",
".dev-candidate": "2 - Pre-Alpha",
"alpha": "3 - Alpha",
"beta": "4 - Beta",
"candidate": "4 - Beta",
"final": "5 - Production/Stable"
}
PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
"""
Get the version (PEP 440).
A biased approach to the PEP 440 semantic version.
Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
(major, minor, micro, release type, pre-release build, post-release build, development release build)
Release types are named in is such a way they are comparable with ease.
Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
development status for setup files.
How it works (currently):
- You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
- To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
The dot is used to ensure all development specifiers are sorted before `alpha`.
You can specify a `dev` number for development builds, but do not have to as implicit development releases
are allowed.
- You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
allow implicit prereleases.
- You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
does not allow implicit post releases.
- It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
Acceptable version releases:
```
Version(1, 0, 0, "final") 1.0
Version(1, 2, 0, "final") 1.2
Version(1, 2, 3, "final") 1.2.3
Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4
Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4
Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4
Version(1, 2, 0, "final", post=1) 1.2.post1
Version(1, 2, 3, ".dev") 1.2.3.dev0
Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1
```
"""
def __new__(
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> "Version":
"""Validate version info."""
# Ensure all parts are positive integers.
for value in (major, minor, micro, pre, post):
if not (isinstance(value, int) and value >= 0):
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release))
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
if pre == 0:
raise ValueError("Implicit pre-releases not allowed.")
elif dev:
raise ValueError("Version is not a development release.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure valid development or development/pre release
elif release < "alpha":
if release > ".dev" and pre == 0:
raise ValueError("Implicit pre-release not allowed.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure a valid normal release
else:
if pre:
raise ValueError("Version is not a pre-release.")
elif dev:
raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
return bool(self.pre > 0)
def _is_dev(self) -> bool:
"""Is development."""
return bool(self.release < "alpha")
def _is_post(self) -> bool:
"""Is post."""
return bool(self.post > 0)
def _get_dev_status(self) -> str: # pragma: no cover
"""Get development status string."""
return DEV_STATUS[self.release]
def _get_canonical(self) -> str:
"""Get the canonical output string."""
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor)
else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre)
if self._is_post():
ver += ".post{}".format(self.post)
if self._is_dev():
ver += ".dev{}".format(self.dev)
return ver
def parse_version(ver: str) -> Version:
"""Parse version into a comparable Version tuple."""
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
# Handle major, minor, micro
major = int(m.group('major'))
minor = int(m.group('minor')) if m.group('minor') else 0
micro = int(m.group('micro')) if m.group('micro') else 0
# Handle pre releases
if m.group('type'):
release = PRE_REL_MAP[m.group('type')]
pre = int(m.group('pre'))
else:
release = "final"
pre = 0
# Handle development releases
dev = m.group('dev') if m.group('dev') else 0
if m.group('dev'):
dev = int(m.group('dev'))
release = '.dev-' + release if pre else '.dev'
else:
dev = 0
# Handle post
post = int(m.group('post')) if m.group('post') else 0
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 3, 1, "final")
__version__ = __version_info__._get_canonical()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,407 @@
"""CSS selector structure items."""
import copyreg
from .pretty import pretty
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
__all__ = (
'Selector',
'SelectorNull',
'SelectorTag',
'SelectorAttribute',
'SelectorContains',
'SelectorNth',
'SelectorLang',
'SelectorList',
'Namespaces',
'CustomSelectors'
)
SEL_EMPTY = 0x1
SEL_ROOT = 0x2
SEL_DEFAULT = 0x4
SEL_INDETERMINATE = 0x8
SEL_SCOPE = 0x10
SEL_DIR_LTR = 0x20
SEL_DIR_RTL = 0x40
SEL_IN_RANGE = 0x80
SEL_OUT_OF_RANGE = 0x100
SEL_DEFINED = 0x200
SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable:
"""Immutable."""
__slots__: Tuple[str, ...] = ('_hash',)
_hash: int
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
temp = []
for k, v in kwargs.items():
temp.append(type(v))
temp.append(v)
super(Immutable, self).__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "Type[Immutable]":
"""Get base class."""
return cls
def __eq__(self, other: Any) -> bool:
"""Equal."""
return (
isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __ne__(self, other: Any) -> bool:
"""Equal."""
return (
not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
__str__ = __repr__
def pretty(self) -> None: # pragma: no cover
"""Pretty print."""
print(pretty(self))
class ImmutableDict(Mapping[Any, Any]):
"""Hashable, immutable dictionary."""
def __init__(
self,
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
) -> None:
"""Initialize."""
self._validate(arg)
self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
return iter(self._d)
def __len__(self) -> int:
"""Length."""
return len(self._d)
def __getitem__(self, key: Any) -> Any:
"""Get item: `namespace['key']`."""
return self._d[key]
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
__str__ = __repr__
class Namespaces(ImmutableDict):
"""Namespaces."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class CustomSelectors(ImmutableDict):
"""Custom selectors."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class Selector(Immutable):
"""Selector."""
__slots__ = (
'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional['SelectorTag']
ids: Tuple[str, ...]
classes: Tuple[str, ...]
attributes: Tuple['SelectorAttribute', ...]
nth: Tuple['SelectorNth', ...]
selectors: Tuple['SelectorList', ...]
relation: 'SelectorList'
rel_type: Optional[str]
contains: Tuple['SelectorContains', ...]
lang: Tuple['SelectorLang', ...]
flags: int
def __init__(
self,
tag: Optional['SelectorTag'],
ids: Tuple[str, ...],
classes: Tuple[str, ...],
attributes: Tuple['SelectorAttribute', ...],
nth: Tuple['SelectorNth', ...],
selectors: Tuple['SelectorList', ...],
relation: 'SelectorList',
rel_type: Optional[str],
contains: Tuple['SelectorContains', ...],
lang: Tuple['SelectorLang', ...],
flags: int
):
"""Initialize."""
super().__init__(
tag=tag,
ids=ids,
classes=classes,
attributes=attributes,
nth=nth,
selectors=selectors,
relation=relation,
rel_type=rel_type,
contains=contains,
lang=lang,
flags=flags
)
class SelectorNull(Immutable):
"""Null Selector."""
def __init__(self) -> None:
"""Initialize."""
super().__init__()
class SelectorTag(Immutable):
"""Selector tag."""
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
def __init__(self, name: str, prefix: Optional[str]) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
class SelectorAttribute(Immutable):
"""Selector attribute rule."""
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
) -> None:
"""Initialize."""
super().__init__(
attribute=attribute,
prefix=prefix,
pattern=pattern,
xml_type_pattern=xml_type_pattern
)
class SelectorContains(Immutable):
"""Selector contains rule."""
__slots__ = ("text", "own", "_hash")
text: Tuple[str, ...]
own: bool
def __init__(self, text: Iterable[str], own: bool) -> None:
"""Initialize."""
super().__init__(text=tuple(text), own=own)
class SelectorNth(Immutable):
"""Selector nth type."""
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
a: int
n: bool
b: int
of_type: bool
last: bool
selectors: 'SelectorList'
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
"""Initialize."""
super().__init__(
a=a,
n=n,
b=b,
of_type=of_type,
last=last,
selectors=selectors
)
class SelectorLang(Immutable):
"""Selector language rules."""
__slots__ = ("languages", "_hash",)
languages: Tuple[str, ...]
def __init__(self, languages: Iterable[str]):
"""Initialize."""
super().__init__(languages=tuple(languages))
def __iter__(self) -> Iterator[str]:
"""Iterator."""
return iter(self.languages)
def __len__(self) -> int: # pragma: no cover
"""Length."""
return len(self.languages)
def __getitem__(self, index: int) -> str: # pragma: no cover
"""Get item."""
return self.languages[index]
class SelectorList(Immutable):
"""Selector list."""
__slots__ = ("selectors", "is_not", "is_html", "_hash")
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
is_not: bool
is_html: bool
def __init__(
self,
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
is_not: bool = False,
is_html: bool = False
) -> None:
"""Initialize."""
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
is_not=is_not,
is_html=is_html
)
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
"""Iterator."""
return iter(self.selectors)
def __len__(self) -> int:
"""Length."""
return len(self.selectors)
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
"""Get item."""
return self.selectors[index]
def _pickle(p: Any) -> Any:
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
def pickle_register(obj: Any) -> None:
"""Allow object to be pickled."""
copyreg.pickle(obj, _pickle)
pickle_register(Selector)
pickle_register(SelectorNull)
pickle_register(SelectorTag)
pickle_register(SelectorAttribute)
pickle_register(SelectorContains)
pickle_register(SelectorNth)
pickle_register(SelectorLang)
pickle_register(SelectorList)

137
lib/soupsieve_old/pretty.py Normal file
View File

@@ -0,0 +1,137 @@
"""
Format a pretty string of a `SoupSieve` object for easy debugging.
This won't necessarily support all types and such, and definitely
not support custom outputs.
It is mainly geared towards our types as the `SelectorList`
object is a beast to look at without some indentation and newlines.
The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```
"""
import re
from typing import Any
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
RE_LSTRT = re.compile(r'\[')
RE_DSTRT = re.compile(r'\{')
RE_TSTRT = re.compile(r'\(')
RE_LEND = re.compile(r'\]')
RE_DEND = re.compile(r'\}')
RE_TEND = re.compile(r'\)')
RE_INT = re.compile(r'\d+')
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
RE_SEP = re.compile(r'\s*(,)\s*')
RE_DSEP = re.compile(r'\s*(:)\s*')
TOKENS = {
'class': RE_CLASS,
'param': RE_PARAM,
'empty': RE_EMPTY,
'lstrt': RE_LSTRT,
'dstrt': RE_DSTRT,
'tstrt': RE_TSTRT,
'lend': RE_LEND,
'dend': RE_DEND,
'tend': RE_TEND,
'sqstr': RE_SQSTR,
'sep': RE_SEP,
'dsep': RE_DSEP,
'int': RE_INT,
'kword': RE_KWORD,
'dqstr': RE_DQSTR
}
def pretty(obj: Any) -> str: # pragma: no cover
"""Make the object output string pretty."""
sel = str(obj)
index = 0
end = len(sel) - 1
indent = 0
output = []
while index <= end:
m = None
for k, v in TOKENS.items():
m = v.match(sel, index)
if m:
name = k
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
break
return ''.join(output)

View File

116
lib/soupsieve_old/util.py Normal file
View File

@@ -0,0 +1,116 @@
"""Utility."""
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional, Tuple, List
DEBUG = 0x00001
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
UC_A = ord('A')
UC_Z = ord('Z')
@lru_cache(maxsize=512)
def lower(string: str) -> str:
"""Lower."""
new_string = []
for c in string:
o = ord(c)
new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c)
return ''.join(new_string)
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
"""Initialize."""
self.line = None
self.col = None
self.context = None
if pattern is not None and index is not None:
# Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
super().__init__(msg)
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
"""
Raise a `DeprecationWarning` when wrapped function/method is called.
Usage:
@deprecated("This method will be removed in version X; use Y instead.")
def some_method()"
pass
"""
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
warnings.warn(
f"'{func.__name__}' is deprecated. {message}",
category=DeprecationWarning,
stacklevel=stacklevel
)
return func(*args, **kwargs)
return _deprecated_func
return _wrapper
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
"""Warn deprecated."""
warnings.warn(
message,
category=DeprecationWarning,
stacklevel=stacklevel
)
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
"""Get the pattern context."""
last = 0
current_line = 1
col = 1
text = [] # type: List[str]
line = 1
offset = None # type: Optional[int]
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
linetext = pattern[last:m.start(0)]
if not len(m.group(0)) and not len(text):
indent = ''
offset = -1
col = index - last + 1
elif last <= index < m.end(0):
indent = '--> '
offset = (-1 if index > m.start(0) else 0) + 3
col = index - last + 1
else:
indent = ' '
offset = None
if len(text):
# Regardless of whether we are presented with `\r\n`, `\r`, or `\n`,
# we will render the output with just `\n`. We will still log the column
# correctly though.
text.append('\n')
text.append('{}{}'.format(indent, linetext))
if offset is not None:
text.append('\n')
text.append(' ' * (col + offset) + '^')
line = current_line
current_line += 1
last = m.end(0)
return ''.join(text), line, col