update Soupsieve

This commit is contained in:
AdeHub
2024-08-24 16:36:55 +12:00
parent b3edfa0d87
commit 38d8e13e4e
15 changed files with 4282 additions and 383 deletions

View File

@@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from __future__ import annotations
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Dict, Optional, Any, List, Iterator, Iterable
import bs4 # type: ignore[import-untyped]
from typing import Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[Dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
@@ -64,7 +62,12 @@ def compile( # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, ns, cs, flags)
return cp._cached_css_compile(
pattern,
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
ct.CustomSelectors(custom) if custom is not None else custom,
flags
)
def purge() -> None:
@@ -75,13 +78,13 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
@@ -89,11 +92,11 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bool:
"""Match node."""
@@ -103,13 +106,13 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[Dict[str, str]] = None,
iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> List['bs4.Tag']:
) -> list[bs4.Tag]:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
@@ -117,13 +120,13 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
@@ -131,14 +134,14 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> List['bs4.Tag']:
) -> list[bs4.Tag]:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@@ -146,18 +149,17 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
def escape(ident: str) -> str:

View File

@@ -1,4 +1,5 @@
"""Meta related things."""
from __future__ import annotations
from collections import namedtuple
import re
@@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> "Version":
) -> Version:
"""Validate version info."""
# Ensure all parts are positive integers.
@@ -92,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release))
raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
@@ -117,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev:
raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
@@ -144,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor)
ver = f"{self.major}.{self.minor}"
else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre)
ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post():
ver += ".post{}".format(self.post)
ver += f".post{self.post}"
if self._is_dev():
ver += ".dev{}".format(self.dev)
ver += f".dev{self.dev}"
return ver
@@ -163,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro
major = int(m.group('major'))
@@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 3, 1, "final")
__version_info__ = Version(2, 6, 0, "final")
__version__ = __version_info__._get_canonical()

View File

@@ -1,11 +1,12 @@
"""CSS matcher."""
from __future__ import annotations
from datetime import datetime
from . import util
import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
import bs4 # type: ignore[import-untyped]
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@@ -64,12 +65,12 @@ class _FakeParent:
fake parent so we can traverse the root element as a child.
"""
def __init__(self, element: 'bs4.Tag') -> None:
def __init__(self, element: bs4.Tag) -> None:
"""Initialize."""
self.contents = [element]
def __len__(self) -> 'bs4.PageElement':
def __len__(self) -> bs4.PageElement:
"""Length."""
return len(self.contents)
@@ -84,62 +85,62 @@ class _DocumentNav:
# Fail on unexpected types.
if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod
def is_doc(obj: 'bs4.Tag') -> bool:
def is_doc(obj: bs4.Tag) -> bool:
"""Is `BeautifulSoup` object."""
return isinstance(obj, bs4.BeautifulSoup)
@staticmethod
def is_tag(obj: 'bs4.PageElement') -> bool:
def is_tag(obj: bs4.PageElement) -> bool:
"""Is tag."""
return isinstance(obj, bs4.Tag)
@staticmethod
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is declaration."""
return isinstance(obj, bs4.Declaration)
@staticmethod
def is_cdata(obj: 'bs4.PageElement') -> bool:
def is_cdata(obj: bs4.PageElement) -> bool:
"""Is CDATA."""
return isinstance(obj, bs4.CData)
@staticmethod
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is processing instruction."""
return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
def is_navigable_string(obj: bs4.PageElement) -> bool:
"""Is navigable string."""
return isinstance(obj, bs4.NavigableString)
@staticmethod
def is_special_string(obj: 'bs4.PageElement') -> bool:
def is_special_string(obj: bs4.PageElement) -> bool:
"""Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
def is_content_string(cls, obj: bs4.PageElement) -> bool:
"""Check if node is content string."""
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
@staticmethod
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
def create_fake_parent(el: bs4.Tag) -> _FakeParent:
"""Create fake parent for a given element."""
return _FakeParent(el)
@staticmethod
def is_xml_tree(el: 'bs4.Tag') -> bool:
def is_xml_tree(el: bs4.Tag) -> bool:
"""Check if element (or document) is from a XML tree."""
return bool(el._is_xml)
def is_iframe(self, el: 'bs4.Tag') -> bool:
def is_iframe(self, el: bs4.Tag) -> bool:
"""Check if element is an `iframe`."""
return bool(
@@ -147,7 +148,7 @@ class _DocumentNav:
self.is_html_tag(el) # type: ignore[attr-defined]
)
def is_root(self, el: 'bs4.Tag') -> bool:
def is_root(self, el: bs4.Tag) -> bool:
"""
Return whether element is a root element.
@@ -161,20 +162,19 @@ class _DocumentNav:
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
return root
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
for content in el.contents:
yield content
yield from el.contents
def get_children(
self,
el: 'bs4.Tag',
start: Optional[int] = None,
el: bs4.Tag,
start: int | None = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
) -> Iterator[bs4.PageElement]:
"""Get children."""
if not no_iframe or not self.is_iframe(el):
@@ -195,10 +195,10 @@ class _DocumentNav:
def get_descendants(
self,
el: 'bs4.Tag',
el: bs4.Tag,
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
) -> Iterator[bs4.PageElement]:
"""Get descendants."""
if not no_iframe or not self.is_iframe(el):
@@ -229,7 +229,7 @@ class _DocumentNav:
if not tags or is_tag:
yield child
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
"""Get parent."""
parent = el.parent
@@ -238,25 +238,25 @@ class _DocumentNav:
return parent
@staticmethod
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag."""
return cast(Optional[str], el.name)
return cast('str | None', el.name)
@staticmethod
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix."""
return cast(Optional[str], el.prefix)
return cast('str | None', el.prefix)
@staticmethod
def get_uri(el: 'bs4.Tag') -> Optional[str]:
def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`."""
return cast(Optional[str], el.namespace)
return cast('str | None', el.namespace)
@classmethod
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get next sibling tag."""
sibling = el.next_sibling
@@ -265,7 +265,7 @@ class _DocumentNav:
return sibling
@classmethod
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get previous sibling tag."""
sibling = el.previous_sibling
@@ -274,7 +274,7 @@ class _DocumentNav:
return sibling
@staticmethod
def has_html_ns(el: 'bs4.Tag') -> bool:
def has_html_ns(el: bs4.Tag) -> bool:
"""
Check if element has an HTML namespace.
@@ -282,17 +282,17 @@ class _DocumentNav:
like we do in the case of `is_html_tag`.
"""
ns = getattr(el, 'namespace') if el else None
ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@classmethod
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
def normalize_value(cls, value: Any) -> str | Sequence[str]:
"""Normalize the value to be a string or list of strings."""
# Treat `None` as empty string.
@@ -327,10 +327,10 @@ class _DocumentNav:
@classmethod
def get_attribute_by_name(
cls,
el: 'bs4.Tag',
el: bs4.Tag,
name: str,
default: Optional[Union[str, Sequence[str]]] = None
) -> Optional[Union[str, Sequence[str]]]:
default: str | Sequence[str] | None = None
) -> str | Sequence[str] | None:
"""Get attribute by name."""
value = default
@@ -347,14 +347,14 @@ class _DocumentNav:
return value
@classmethod
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
yield k, cls.normalize_value(v)
@classmethod
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
"""Get classes."""
classes = cls.get_attribute_by_name(el, 'class', [])
@@ -362,14 +362,14 @@ class _DocumentNav:
classes = RE_NOT_WS.findall(classes)
return cast(Sequence[str], classes)
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
"""Get text."""
return ''.join(
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
)
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
"""Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
@@ -393,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool:
"""Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1:
max_week = 53
return 1 <= week <= max_week
@@ -423,10 +423,10 @@ class Inputs:
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value."""
parsed = None # type: Optional[Tuple[float, ...]]
parsed = None # type: tuple[float, ...] | None
if value is None:
return value
if itype == "date":
@@ -484,19 +484,19 @@ class CSSMatch(_DocumentNav):
def __init__(
self,
selectors: ct.SelectorList,
scope: 'bs4.Tag',
namespaces: Optional[ct.Namespaces],
scope: bs4.Tag,
namespaces: ct.Namespaces | None,
flags: int
) -> None:
"""Initialize."""
self.assert_valid_input(scope)
self.tag = scope
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
self.cached_meta_lang = [] # type: list[tuple[str, str]]
self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
self.selectors = selectors
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
self.flags = flags
self.iframe_restrict = False
@@ -527,7 +527,7 @@ class CSSMatch(_DocumentNav):
return self.is_xml or self.has_html_namespace
def get_tag_ns(self, el: 'bs4.Tag') -> str:
def get_tag_ns(self, el: bs4.Tag) -> str:
"""Get tag namespace."""
if self.supports_namespaces():
@@ -539,24 +539,24 @@ class CSSMatch(_DocumentNav):
namespace = NS_XHTML
return namespace
def is_html_tag(self, el: 'bs4.Tag') -> bool:
def is_html_tag(self, el: bs4.Tag) -> bool:
"""Check if tag is in HTML namespace."""
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@@ -600,13 +600,18 @@ class CSSMatch(_DocumentNav):
ranges = lang_range.split('-')
subtags = lang_tag.lower().split('-')
length = len(ranges)
slength = len(subtags)
rindex = 0
sindex = 0
r = ranges[rindex]
s = subtags[sindex]
# Empty specified language should match unspecified language attributes
if length == 1 and slength == 1 and not r and r == s:
return True
# Primary tag needs to match
if r != '*' and r != s:
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
match = False
rindex += 1
@@ -645,10 +650,10 @@ class CSSMatch(_DocumentNav):
def match_attribute_name(
self,
el: 'bs4.Tag',
el: bs4.Tag,
attr: str,
prefix: Optional[str]
) -> Optional[Union[str, Sequence[str]]]:
prefix: str | None
) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists."""
value = None
@@ -696,7 +701,7 @@ class CSSMatch(_DocumentNav):
break
return value
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match the namespace of the element."""
match = True
@@ -717,7 +722,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
"""Match attributes."""
match = True
@@ -736,7 +741,7 @@ class CSSMatch(_DocumentNav):
break
return match
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match tag name."""
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
@@ -745,7 +750,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag."""
match = True
@@ -757,7 +762,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match past relationship."""
found = False
@@ -785,12 +790,12 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation)
return found
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
"""Match future child."""
match = False
if recursive:
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
else:
children = self.get_children
for child in children(parent, no_iframe=self.iframe_restrict):
@@ -799,7 +804,7 @@ class CSSMatch(_DocumentNav):
break
return match
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match future relationship."""
found = False
@@ -822,7 +827,7 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation)
return found
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match relationship to other elements."""
found = False
@@ -837,7 +842,7 @@ class CSSMatch(_DocumentNav):
return found
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
"""Match element's ID."""
found = True
@@ -847,7 +852,7 @@ class CSSMatch(_DocumentNav):
break
return found
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
"""Match element's classes."""
current_classes = self.get_classes(el)
@@ -858,7 +863,7 @@ class CSSMatch(_DocumentNav):
break
return found
def match_root(self, el: 'bs4.Tag') -> bool:
def match_root(self, el: bs4.Tag) -> bool:
"""Match element as root."""
is_root = self.is_root(el)
@@ -884,20 +889,20 @@ class CSSMatch(_DocumentNav):
sibling = self.get_next(sibling, tags=False)
return is_root
def match_scope(self, el: 'bs4.Tag') -> bool:
def match_scope(self, el: bs4.Tag) -> bool:
"""Match element as scope."""
return self.scope is el
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
"""Match tag type for `nth` matches."""
return(
return (
(self.get_tag(child) == self.get_tag(el)) and
(self.get_tag_ns(child) == self.get_tag_ns(el))
)
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
"""Match `nth` elements."""
matched = True
@@ -998,7 +1003,7 @@ class CSSMatch(_DocumentNav):
break
return matched
def match_empty(self, el: 'bs4.Tag') -> bool:
def match_empty(self, el: bs4.Tag) -> bool:
"""Check if element is empty (if requested)."""
is_empty = True
@@ -1011,7 +1016,7 @@ class CSSMatch(_DocumentNav):
break
return is_empty
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
"""Match selectors."""
match = True
@@ -1020,11 +1025,11 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
"""Match element if it contains text."""
match = True
content = None # type: Optional[Union[str, Sequence[str]]]
content = None # type: str | Sequence[str] | None
for contain_list in contains:
if content is None:
if contain_list.own:
@@ -1048,7 +1053,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_default(self, el: 'bs4.Tag') -> bool:
def match_default(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
@@ -1087,13 +1092,13 @@ class CSSMatch(_DocumentNav):
break
return match
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
def match_indeterminate(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@@ -1148,7 +1153,7 @@ class CSSMatch(_DocumentNav):
return match
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
"""Match languages."""
match = False
@@ -1183,7 +1188,7 @@ class CSSMatch(_DocumentNav):
break
# Use cached meta language.
if not found_lang and self.cached_meta_lang:
if found_lang is None and self.cached_meta_lang:
for cache in self.cached_meta_lang:
if root is cache[0]:
found_lang = cache[1]
@@ -1217,13 +1222,13 @@ class CSSMatch(_DocumentNav):
found_lang = content
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break
if found_lang:
if found_lang is not None:
break
if not found_lang:
if found_lang is None:
self.cached_meta_lang.append((cast(str, root), ''))
# If we determined a language, compare.
if found_lang:
if found_lang is not None:
for patterns in langs:
match = False
for pattern in patterns:
@@ -1234,7 +1239,7 @@ class CSSMatch(_DocumentNav):
return match
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
"""Check directionality."""
# If we have to match both left and right, we can't match either.
@@ -1266,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea:
temp = []
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
else:
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value:
@@ -1297,7 +1298,7 @@ class CSSMatch(_DocumentNav):
# Match parents direction
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
def match_range(self, el: bs4.Tag, condition: int) -> bool:
"""
Match range.
@@ -1337,7 +1338,7 @@ class CSSMatch(_DocumentNav):
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
def match_defined(self, el: 'bs4.Tag') -> bool:
def match_defined(self, el: bs4.Tag) -> bool:
"""
Match defined.
@@ -1360,7 +1361,7 @@ class CSSMatch(_DocumentNav):
)
)
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
def match_placeholder_shown(self, el: bs4.Tag) -> bool:
"""
Match placeholder shown according to HTML spec.
@@ -1375,7 +1376,7 @@ class CSSMatch(_DocumentNav):
return match
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
"""Check if element matches one of the selectors."""
match = False
@@ -1459,7 +1460,7 @@ class CSSMatch(_DocumentNav):
return match
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
"""Match all tags under the targeted tag."""
lim = None if limit < 1 else limit
@@ -1472,7 +1473,7 @@ class CSSMatch(_DocumentNav):
if lim < 1:
break
def closest(self) -> Optional['bs4.Tag']:
def closest(self) -> bs4.Tag | None:
"""Match closest ancestor."""
current = self.tag
@@ -1484,12 +1485,12 @@ class CSSMatch(_DocumentNav):
current = self.get_parent(current)
return closest
def filter(self) -> List['bs4.Tag']: # noqa A001
def filter(self) -> list[bs4.Tag]: # noqa A001
"""Filter tag's children."""
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
def match(self, el: 'bs4.Tag') -> bool:
def match(self, el: bs4.Tag) -> bool:
"""Match."""
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
@@ -1500,8 +1501,8 @@ class SoupSieve(ct.Immutable):
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
custom: Dict[str, str]
namespaces: ct.Namespaces | None
custom: dict[str, str]
flags: int
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
@@ -1510,8 +1511,8 @@ class SoupSieve(ct.Immutable):
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
):
"""Initialize."""
@@ -1524,17 +1525,17 @@ class SoupSieve(ct.Immutable):
flags=flags
)
def match(self, tag: 'bs4.Tag') -> bool:
def match(self, tag: bs4.Tag) -> bool:
"""Match."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
def closest(self, tag: bs4.Tag) -> bs4.Tag:
"""Match closest ancestor."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
"""
Filter.
@@ -1551,31 +1552,28 @@ class SoupSieve(ct.Immutable):
else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
def select_one(self, tag: bs4.Tag) -> bs4.Tag:
"""Select a single tag."""
tags = self.select(tag, limit=1)
return tags[0] if tags else None
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
"""Select the specified tags."""
return list(self.iselect(tag, limit))
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
yield el
yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
self.pattern,
self.namespaces,
self.custom,
self.flags
return (
f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
f"custom={self.custom!r}, flags={self.flags!r})"
)
__str__ = __repr__

View File

@@ -1,4 +1,5 @@
"""CSS selector parser."""
from __future__ import annotations
import re
from functools import lru_cache
from . import util
@@ -6,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast
from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@@ -91,94 +92,81 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts
# Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE)
WS = fr'(?:[ \t]|{NEWLINE})'
# Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)
WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)
CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier
IDENTIFIER = r'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)
'''.format(esc=CSS_ESCAPES)
IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''
# `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)
NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier
VALUE = r'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
# Selector patterns
# IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r'''
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Nesting ampersand selector. Matches `&`
PAT_AMP = r'&'
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)
PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r'''
(?P<pseudo_nth_child>{name}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)
PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r'''
(?P<pseudo_nth_type>{name}
(?P<nth_type>{nth}|even|odd)){ws}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)
PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''
# Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)
PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)
PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Regular expressions
# CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)
RE_CSS_STR_ESC = re.compile(
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
# Pattern to break up `nth` specifiers
RE_NTH = re.compile(
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
# Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)
RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks
RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC))
RE_WS_END = re.compile('{}*$'.format(WSC))
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)
RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants
# List split token
@@ -206,8 +194,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(
pattern: str,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
@@ -232,7 +220,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]:
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@@ -240,9 +228,9 @@ def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str,
for key, value in custom.items():
name = util.lower(key)
if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))
raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name))
raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value
return custom_selectors
@@ -282,23 +270,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident))
string.append(f'\\{ident}')
else:
for index, c in enumerate(ident):
codepoint = ord(c)
if codepoint == 0x00:
string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
):
string.append(c)
else:
string.append('\\{}'.format(c))
string.append(f'\\{c}')
return ''.join(string)
@@ -316,7 +304,7 @@ class SelectorPattern:
return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
return self.re_pattern.match(selector, index)
@@ -325,7 +313,7 @@ class SelectorPattern:
class SpecialPseudoPattern(SelectorPattern):
"""Selector pattern."""
def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None:
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
"""Initialize."""
self.patterns = {}
@@ -335,7 +323,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern]
self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str:
@@ -343,7 +331,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
pseudo = None
@@ -371,20 +359,20 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.ids = kwargs.get('ids', []) # type: List[str]
self.classes = kwargs.get('classes', []) # type: List[str]
self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: List[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang]
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
self.no_match = kwargs.get('no_match', False) # type: bool
def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList:
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
"""Freeze relation."""
if relations:
@@ -394,7 +382,7 @@ class _Selector:
else:
return ct.SelectorList()
def freeze(self) -> Union[ct.Selector, ct.SelectorNull]:
def freeze(self) -> ct.Selector | ct.SelectorNull:
"""Freeze self."""
if self.no_match:
@@ -418,11 +406,10 @@ class _Selector:
"""String representation."""
return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
).format(
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
f'no_match={self.no_match!r})'
)
__repr__ = __str__
@@ -450,6 +437,7 @@ class CSSParser:
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
SelectorPattern("amp", PAT_AMP),
SelectorPattern("at_rule", PAT_AT_RULE),
SelectorPattern("id", PAT_ID),
SelectorPattern("class", PAT_CLASS),
@@ -461,7 +449,7 @@ class CSSParser:
def __init__(
self,
selector: str,
custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None,
custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0
) -> None:
"""Initialize."""
@@ -562,7 +550,7 @@ class CSSParser:
selector = self.custom.get(pseudo)
if selector is None:
raise SelectorSyntaxError(
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern,
m.end(0)
)
@@ -583,9 +571,9 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
is_html: bool
) -> Tuple[bool, bool]:
) -> tuple[bool, bool]:
"""Parse pseudo class."""
complex_pseudo = False
@@ -662,13 +650,16 @@ class CSSParser:
has_selector = True
elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo),
f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern,
m.start(0)
)
else:
raise NotImplementedError(
"'{}' pseudo-class is not implemented at this time".format(pseudo)
raise SelectorSyntaxError(
f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. "
"If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.",
self.pattern,
m.start(0)
)
return has_selector, is_html
@@ -678,7 +669,7 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]]
iselector: Iterator[tuple[str, Match[str]]]
) -> bool:
"""Parse `nth` pseudo."""
@@ -743,7 +734,7 @@ class CSSParser:
sel: _Selector,
name: str,
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
index: int
) -> bool:
"""Parse pseudo with opening bracket."""
@@ -752,7 +743,7 @@ class CSSParser:
if name == ':not':
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
flags |= FLG_RELATIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE
@@ -766,21 +757,16 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: List[_Selector],
selectors: list[_Selector],
rel_type: str,
index: int
) -> Tuple[bool, _Selector, str]:
) -> tuple[bool, _Selector, str]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
if not combinator:
combinator = WS_COMBINATOR
if combinator == COMMA_COMBINATOR:
if not has_selector:
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
sel.no_match = True
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR
@@ -797,7 +783,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index),
f'The multiple combinators at position {index}',
self.pattern,
index
)
@@ -814,12 +800,12 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: List[_Selector],
relations: List[_Selector],
selectors: list[_Selector],
relations: list[_Selector],
is_pseudo: bool,
is_forgive: bool,
index: int
) -> Tuple[bool, _Selector]:
) -> tuple[bool, _Selector]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
@@ -828,7 +814,7 @@ class CSSParser:
if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError(
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern,
index
)
@@ -873,7 +859,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn(
warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
@@ -924,7 +910,7 @@ class CSSParser:
def parse_selectors(
self,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
index: int = 0,
flags: int = 0
) -> ct.SelectorList:
@@ -935,7 +921,7 @@ class CSSParser:
selectors = []
has_selector = False
closed = False
relations = [] # type: List[_Selector]
relations = [] # type: list[_Selector]
rel_type = ":" + WS_COMBINATOR
# Setup various flags
@@ -986,13 +972,16 @@ class CSSParser:
# Handle parts
if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0)))
raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == "amp":
sel.flags |= ct.SEL_SCOPE
has_selector = True
elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@@ -1007,7 +996,7 @@ class CSSParser:
if not has_selector:
if not is_forgive:
raise SelectorSyntaxError(
"Expected a selector at postion {}".format(m.start(0)),
f"Expected a selector at position {m.start(0)}",
self.pattern,
m.start(0)
)
@@ -1017,7 +1006,7 @@ class CSSParser:
break
else:
raise SelectorSyntaxError(
"Unmatched pseudo-class close at postion {}".format(m.start(0)),
f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern,
m.start(0)
)
@@ -1035,7 +1024,7 @@ class CSSParser:
elif key == 'tag':
if has_selector:
raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)),
f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern,
m.start(0)
)
@@ -1050,7 +1039,7 @@ class CSSParser:
# Handle selectors that are not closed
if is_open and not closed:
raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index),
f"Unclosed pseudo-class at position {index}",
self.pattern,
index
)
@@ -1069,28 +1058,18 @@ class CSSParser:
selectors.append(sel)
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive:
if is_relative:
# Handle relative selectors pseudo-classes with empty slots like `:has()`
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
sel.rel_type = rel_type
sel.no_match = True
selectors[-1].relations.append(sel)
has_selector = True
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
elif is_forgive and (not selectors or not relations):
# Handle normal pseudo-classes with empty slots like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
if not has_selector:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well.
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
f'Expected a selector at position {index}',
self.pattern,
index
)
@@ -1112,7 +1091,7 @@ class CSSParser:
# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]:
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
"""Iterate selector tokens."""
# Ignore whitespace and comments at start and end of pattern
@@ -1122,7 +1101,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern))
print(f'## PARSING: {pattern!r}')
while index <= end:
m = None
for v in self.css_tokens:
@@ -1130,7 +1109,7 @@ class CSSParser:
if m:
name = v.get_name()
if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))
print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0)
yield name, m
break
@@ -1140,15 +1119,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character.
if c == '[':
msg = "Malformed attribute selector at position {}".format(index)
msg = f"Malformed attribute selector at position {index}"
elif c == '.':
msg = "Malformed class selector at position {}".format(index)
msg = f"Malformed class selector at position {index}"
elif c == '#':
msg = "Malformed id selector at position {}".format(index)
msg = f"Malformed id selector at position {index}"
elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index)
msg = f"Malformed pseudo-class selector at position {index}"
else:
msg = "Invalid character {!r} position {}".format(c, index)
msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover
print('## END PARSING')

View File

@@ -1,7 +1,8 @@
"""CSS selector structure items."""
from __future__ import annotations
import copyreg
from .pretty import pretty
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable:
"""Immutable."""
__slots__: Tuple[str, ...] = ('_hash',)
__slots__: tuple[str, ...] = ('_hash',)
_hash: int
@@ -44,11 +45,11 @@ class Immutable:
for k, v in kwargs.items():
temp.append(type(v))
temp.append(v)
super(Immutable, self).__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
super().__setattr__(k, v)
super().__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "Type[Immutable]":
def __base__(cls) -> type[Immutable]:
"""Get base class."""
return cls
@@ -58,7 +59,7 @@ class Immutable:
return (
isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __ne__(self, other: Any) -> bool:
@@ -66,7 +67,7 @@ class Immutable:
return (
not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __hash__(self) -> int:
@@ -77,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
return f"{self.__class__.__name__}({r})"
__str__ = __repr__
@@ -99,7 +99,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __init__(
self,
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
) -> None:
"""Initialize."""
@@ -107,14 +107,14 @@ class ImmutableDict(Mapping[Any, Any]):
self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
@@ -139,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
return f"{self._d!r}"
__str__ = __repr__
@@ -147,37 +147,37 @@ class ImmutableDict(Mapping[Any, Any]):
class Namespaces(ImmutableDict):
"""Namespaces."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict):
"""Custom selectors."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable):
@@ -188,30 +188,30 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional['SelectorTag']
ids: Tuple[str, ...]
classes: Tuple[str, ...]
attributes: Tuple['SelectorAttribute', ...]
nth: Tuple['SelectorNth', ...]
selectors: Tuple['SelectorList', ...]
relation: 'SelectorList'
rel_type: Optional[str]
contains: Tuple['SelectorContains', ...]
lang: Tuple['SelectorLang', ...]
tag: SelectorTag | None
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: str | None
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self,
tag: Optional['SelectorTag'],
ids: Tuple[str, ...],
classes: Tuple[str, ...],
attributes: Tuple['SelectorAttribute', ...],
nth: Tuple['SelectorNth', ...],
selectors: Tuple['SelectorList', ...],
relation: 'SelectorList',
rel_type: Optional[str],
contains: Tuple['SelectorContains', ...],
lang: Tuple['SelectorLang', ...],
tag: SelectorTag | None,
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: str | None,
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
):
"""Initialize."""
@@ -246,9 +246,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None:
def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
@@ -261,15 +261,15 @@ class SelectorAttribute(Immutable):
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None
xml_type_pattern: Pattern[str] | None
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None,
xml_type_pattern: Pattern[str] | None
) -> None:
"""Initialize."""
@@ -286,7 +286,7 @@ class SelectorContains(Immutable):
__slots__ = ("text", "own", "_hash")
text: Tuple[str, ...]
text: tuple[str, ...]
own: bool
def __init__(self, text: Iterable[str], own: bool) -> None:
@@ -305,9 +305,9 @@ class SelectorNth(Immutable):
b: int
of_type: bool
last: bool
selectors: 'SelectorList'
selectors: SelectorList
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
"""Initialize."""
super().__init__(
@@ -325,7 +325,7 @@ class SelectorLang(Immutable):
__slots__ = ("languages", "_hash",)
languages: Tuple[str, ...]
languages: tuple[str, ...]
def __init__(self, languages: Iterable[str]):
"""Initialize."""
@@ -353,25 +353,25 @@ class SelectorList(Immutable):
__slots__ = ("selectors", "is_not", "is_html", "_hash")
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
selectors: tuple[Selector | SelectorNull, ...]
is_not: bool
is_html: bool
def __init__(
self,
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False,
is_html: bool = False
) -> None:
"""Initialize."""
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not,
is_html=is_html
)
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
def __iter__(self) -> Iterator[Selector | SelectorNull]:
"""Iterator."""
return iter(self.selectors)
@@ -381,7 +381,7 @@ class SelectorList(Immutable):
return len(self.selectors)
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
def __getitem__(self, index: int) -> Selector | SelectorNull:
"""Get item."""
return self.selectors[index]

View File

@@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
-------
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
@@ -64,7 +64,9 @@ SelectorList(
is_not=False,
is_html=False)
```
"""
from __future__ import annotations
import re
from typing import Any
@@ -122,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
output.append(f'{m.group(1)} ')
break
return ''.join(output)

View File

@@ -1,8 +1,9 @@
"""Utility."""
from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional, Tuple, List
from typing import Callable, Any
DEBUG = 0x00001
@@ -26,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize."""
self.line = None
@@ -36,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None:
# Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg)
@@ -75,15 +76,15 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co
)
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
"""Get the pattern context."""
last = 0
current_line = 1
col = 1
text = [] # type: List[str]
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]
offset = None # type: int | None
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
@@ -104,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column
# correctly though.
text.append('\n')
text.append('{}{}'.format(indent, linetext))
text.append(f'{indent}{linetext}')
if offset is not None:
text.append('\n')
text.append(' ' * (col + offset) + '^')

View File

@@ -0,0 +1,166 @@
"""
Soup Sieve.
A CSS selector filter for BeautifulSoup4.
MIT License
Copyright (c) 2018 Isaac Muse
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Dict, Optional, Any, List, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
'closest', 'compile', 'filter', 'iselect',
'match', 'select', 'select_one'
)
SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
elif namespaces is not None:
raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
elif custom is not None:
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, ns, cs, flags)
def purge() -> None:
"""Purge cached patterns."""
cp._purge_cache()
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> bool:
"""Match node."""
return compile(select, namespaces, flags, **kwargs).match(tag)
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
def escape(ident: str) -> str:
"""Escape identifier."""
return cp.escape(ident)

View File

@@ -0,0 +1,196 @@
"""Meta related things."""
from collections import namedtuple
import re
RE_VER = re.compile(
r'''(?x)
(?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))?
(?:(?P<type>a|b|rc)(?P<pre>\d+))?
(?:\.post(?P<post>\d+))?
(?:\.dev(?P<dev>\d+))?
'''
)
REL_MAP = {
".dev": "",
".dev-alpha": "a",
".dev-beta": "b",
".dev-candidate": "rc",
"alpha": "a",
"beta": "b",
"candidate": "rc",
"final": ""
}
DEV_STATUS = {
".dev": "2 - Pre-Alpha",
".dev-alpha": "2 - Pre-Alpha",
".dev-beta": "2 - Pre-Alpha",
".dev-candidate": "2 - Pre-Alpha",
"alpha": "3 - Alpha",
"beta": "4 - Beta",
"candidate": "4 - Beta",
"final": "5 - Production/Stable"
}
PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
"""
Get the version (PEP 440).
A biased approach to the PEP 440 semantic version.
Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
(major, minor, micro, release type, pre-release build, post-release build, development release build)
Release types are named in is such a way they are comparable with ease.
Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
development status for setup files.
How it works (currently):
- You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
- To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
The dot is used to ensure all development specifiers are sorted before `alpha`.
You can specify a `dev` number for development builds, but do not have to as implicit development releases
are allowed.
- You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
allow implicit prereleases.
- You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
does not allow implicit post releases.
- It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
Acceptable version releases:
```
Version(1, 0, 0, "final") 1.0
Version(1, 2, 0, "final") 1.2
Version(1, 2, 3, "final") 1.2.3
Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4
Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4
Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4
Version(1, 2, 0, "final", post=1) 1.2.post1
Version(1, 2, 3, ".dev") 1.2.3.dev0
Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1
```
"""
def __new__(
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> "Version":
"""Validate version info."""
# Ensure all parts are positive integers.
for value in (major, minor, micro, pre, post):
if not (isinstance(value, int) and value >= 0):
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release))
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
if pre == 0:
raise ValueError("Implicit pre-releases not allowed.")
elif dev:
raise ValueError("Version is not a development release.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure valid development or development/pre release
elif release < "alpha":
if release > ".dev" and pre == 0:
raise ValueError("Implicit pre-release not allowed.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure a valid normal release
else:
if pre:
raise ValueError("Version is not a pre-release.")
elif dev:
raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
return bool(self.pre > 0)
def _is_dev(self) -> bool:
"""Is development."""
return bool(self.release < "alpha")
def _is_post(self) -> bool:
"""Is post."""
return bool(self.post > 0)
def _get_dev_status(self) -> str: # pragma: no cover
"""Get development status string."""
return DEV_STATUS[self.release]
def _get_canonical(self) -> str:
"""Get the canonical output string."""
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor)
else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre)
if self._is_post():
ver += ".post{}".format(self.post)
if self._is_dev():
ver += ".dev{}".format(self.dev)
return ver
def parse_version(ver: str) -> Version:
"""Parse version into a comparable Version tuple."""
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
# Handle major, minor, micro
major = int(m.group('major'))
minor = int(m.group('minor')) if m.group('minor') else 0
micro = int(m.group('micro')) if m.group('micro') else 0
# Handle pre releases
if m.group('type'):
release = PRE_REL_MAP[m.group('type')]
pre = int(m.group('pre'))
else:
release = "final"
pre = 0
# Handle development releases
dev = m.group('dev') if m.group('dev') else 0
if m.group('dev'):
dev = int(m.group('dev'))
release = '.dev-' + release if pre else '.dev'
else:
dev = 0
# Handle post
post = int(m.group('post')) if m.group('post') else 0
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 3, 1, "final")
__version__ = __version_info__._get_canonical()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,407 @@
"""CSS selector structure items."""
import copyreg
from .pretty import pretty
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
__all__ = (
'Selector',
'SelectorNull',
'SelectorTag',
'SelectorAttribute',
'SelectorContains',
'SelectorNth',
'SelectorLang',
'SelectorList',
'Namespaces',
'CustomSelectors'
)
SEL_EMPTY = 0x1
SEL_ROOT = 0x2
SEL_DEFAULT = 0x4
SEL_INDETERMINATE = 0x8
SEL_SCOPE = 0x10
SEL_DIR_LTR = 0x20
SEL_DIR_RTL = 0x40
SEL_IN_RANGE = 0x80
SEL_OUT_OF_RANGE = 0x100
SEL_DEFINED = 0x200
SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable:
"""Immutable."""
__slots__: Tuple[str, ...] = ('_hash',)
_hash: int
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
temp = []
for k, v in kwargs.items():
temp.append(type(v))
temp.append(v)
super(Immutable, self).__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "Type[Immutable]":
"""Get base class."""
return cls
def __eq__(self, other: Any) -> bool:
"""Equal."""
return (
isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __ne__(self, other: Any) -> bool:
"""Equal."""
return (
not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
__str__ = __repr__
def pretty(self) -> None: # pragma: no cover
"""Pretty print."""
print(pretty(self))
class ImmutableDict(Mapping[Any, Any]):
"""Hashable, immutable dictionary."""
def __init__(
self,
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
) -> None:
"""Initialize."""
self._validate(arg)
self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
return iter(self._d)
def __len__(self) -> int:
"""Length."""
return len(self._d)
def __getitem__(self, key: Any) -> Any:
"""Get item: `namespace['key']`."""
return self._d[key]
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
__str__ = __repr__
class Namespaces(ImmutableDict):
"""Namespaces."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class CustomSelectors(ImmutableDict):
"""Custom selectors."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class Selector(Immutable):
"""Selector."""
__slots__ = (
'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional['SelectorTag']
ids: Tuple[str, ...]
classes: Tuple[str, ...]
attributes: Tuple['SelectorAttribute', ...]
nth: Tuple['SelectorNth', ...]
selectors: Tuple['SelectorList', ...]
relation: 'SelectorList'
rel_type: Optional[str]
contains: Tuple['SelectorContains', ...]
lang: Tuple['SelectorLang', ...]
flags: int
def __init__(
self,
tag: Optional['SelectorTag'],
ids: Tuple[str, ...],
classes: Tuple[str, ...],
attributes: Tuple['SelectorAttribute', ...],
nth: Tuple['SelectorNth', ...],
selectors: Tuple['SelectorList', ...],
relation: 'SelectorList',
rel_type: Optional[str],
contains: Tuple['SelectorContains', ...],
lang: Tuple['SelectorLang', ...],
flags: int
):
"""Initialize."""
super().__init__(
tag=tag,
ids=ids,
classes=classes,
attributes=attributes,
nth=nth,
selectors=selectors,
relation=relation,
rel_type=rel_type,
contains=contains,
lang=lang,
flags=flags
)
class SelectorNull(Immutable):
"""Null Selector."""
def __init__(self) -> None:
"""Initialize."""
super().__init__()
class SelectorTag(Immutable):
"""Selector tag."""
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
def __init__(self, name: str, prefix: Optional[str]) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
class SelectorAttribute(Immutable):
"""Selector attribute rule."""
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
) -> None:
"""Initialize."""
super().__init__(
attribute=attribute,
prefix=prefix,
pattern=pattern,
xml_type_pattern=xml_type_pattern
)
class SelectorContains(Immutable):
"""Selector contains rule."""
__slots__ = ("text", "own", "_hash")
text: Tuple[str, ...]
own: bool
def __init__(self, text: Iterable[str], own: bool) -> None:
"""Initialize."""
super().__init__(text=tuple(text), own=own)
class SelectorNth(Immutable):
"""Selector nth type."""
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
a: int
n: bool
b: int
of_type: bool
last: bool
selectors: 'SelectorList'
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
"""Initialize."""
super().__init__(
a=a,
n=n,
b=b,
of_type=of_type,
last=last,
selectors=selectors
)
class SelectorLang(Immutable):
"""Selector language rules."""
__slots__ = ("languages", "_hash",)
languages: Tuple[str, ...]
def __init__(self, languages: Iterable[str]):
"""Initialize."""
super().__init__(languages=tuple(languages))
def __iter__(self) -> Iterator[str]:
"""Iterator."""
return iter(self.languages)
def __len__(self) -> int: # pragma: no cover
"""Length."""
return len(self.languages)
def __getitem__(self, index: int) -> str: # pragma: no cover
"""Get item."""
return self.languages[index]
class SelectorList(Immutable):
"""Selector list."""
__slots__ = ("selectors", "is_not", "is_html", "_hash")
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
is_not: bool
is_html: bool
def __init__(
self,
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
is_not: bool = False,
is_html: bool = False
) -> None:
"""Initialize."""
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
is_not=is_not,
is_html=is_html
)
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
"""Iterator."""
return iter(self.selectors)
def __len__(self) -> int:
"""Length."""
return len(self.selectors)
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
"""Get item."""
return self.selectors[index]
def _pickle(p: Any) -> Any:
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
def pickle_register(obj: Any) -> None:
"""Allow object to be pickled."""
copyreg.pickle(obj, _pickle)
pickle_register(Selector)
pickle_register(SelectorNull)
pickle_register(SelectorTag)
pickle_register(SelectorAttribute)
pickle_register(SelectorContains)
pickle_register(SelectorNth)
pickle_register(SelectorLang)
pickle_register(SelectorList)

137
lib/soupsieve_old/pretty.py Normal file
View File

@@ -0,0 +1,137 @@
"""
Format a pretty string of a `SoupSieve` object for easy debugging.
This won't necessarily support all types and such, and definitely
not support custom outputs.
It is mainly geared towards our types as the `SelectorList`
object is a beast to look at without some indentation and newlines.
The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```
"""
import re
from typing import Any
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
RE_LSTRT = re.compile(r'\[')
RE_DSTRT = re.compile(r'\{')
RE_TSTRT = re.compile(r'\(')
RE_LEND = re.compile(r'\]')
RE_DEND = re.compile(r'\}')
RE_TEND = re.compile(r'\)')
RE_INT = re.compile(r'\d+')
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
RE_SEP = re.compile(r'\s*(,)\s*')
RE_DSEP = re.compile(r'\s*(:)\s*')
TOKENS = {
'class': RE_CLASS,
'param': RE_PARAM,
'empty': RE_EMPTY,
'lstrt': RE_LSTRT,
'dstrt': RE_DSTRT,
'tstrt': RE_TSTRT,
'lend': RE_LEND,
'dend': RE_DEND,
'tend': RE_TEND,
'sqstr': RE_SQSTR,
'sep': RE_SEP,
'dsep': RE_DSEP,
'int': RE_INT,
'kword': RE_KWORD,
'dqstr': RE_DQSTR
}
def pretty(obj: Any) -> str: # pragma: no cover
"""Make the object output string pretty."""
sel = str(obj)
index = 0
end = len(sel) - 1
indent = 0
output = []
while index <= end:
m = None
for k, v in TOKENS.items():
m = v.match(sel, index)
if m:
name = k
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
break
return ''.join(output)

View File

116
lib/soupsieve_old/util.py Normal file
View File

@@ -0,0 +1,116 @@
"""Utility."""
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional, Tuple, List
DEBUG = 0x00001
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
UC_A = ord('A')
UC_Z = ord('Z')
@lru_cache(maxsize=512)
def lower(string: str) -> str:
"""Lower."""
new_string = []
for c in string:
o = ord(c)
new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c)
return ''.join(new_string)
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
"""Initialize."""
self.line = None
self.col = None
self.context = None
if pattern is not None and index is not None:
# Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
super().__init__(msg)
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
"""
Raise a `DeprecationWarning` when wrapped function/method is called.
Usage:
@deprecated("This method will be removed in version X; use Y instead.")
def some_method()"
pass
"""
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
warnings.warn(
f"'{func.__name__}' is deprecated. {message}",
category=DeprecationWarning,
stacklevel=stacklevel
)
return func(*args, **kwargs)
return _deprecated_func
return _wrapper
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
"""Warn deprecated."""
warnings.warn(
message,
category=DeprecationWarning,
stacklevel=stacklevel
)
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
"""Get the pattern context."""
last = 0
current_line = 1
col = 1
text = [] # type: List[str]
line = 1
offset = None # type: Optional[int]
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
linetext = pattern[last:m.start(0)]
if not len(m.group(0)) and not len(text):
indent = ''
offset = -1
col = index - last + 1
elif last <= index < m.end(0):
indent = '--> '
offset = (-1 if index > m.start(0) else 0) + 3
col = index - last + 1
else:
indent = ' '
offset = None
if len(text):
# Regardless of whether we are presented with `\r\n`, `\r`, or `\n`,
# we will render the output with just `\n`. We will still log the column
# correctly though.
text.append('\n')
text.append('{}{}'.format(indent, linetext))
if offset is not None:
text.append('\n')
text.append(' ' * (col + offset) + '^')
line = current_line
current_line += 1
last = m.end(0)
return ''.join(text), line, col