From 38d8e13e4e02efadb39981437fffd346df869c6a Mon Sep 17 00:00:00 2001 From: AdeHub Date: Sat, 24 Aug 2024 16:36:55 +1200 Subject: [PATCH] update Soupsieve --- lib/soupsieve/__init__.py | 68 +- lib/soupsieve/__meta__.py | 21 +- lib/soupsieve/css_match.py | 242 +++-- lib/soupsieve/css_parser.py | 263 +++-- lib/soupsieve/css_types.py | 130 +-- lib/soupsieve/pretty.py | 10 +- lib/soupsieve/util.py | 15 +- lib/soupsieve_old/__init__.py | 166 ++++ lib/soupsieve_old/__meta__.py | 196 ++++ lib/soupsieve_old/css_match.py | 1584 +++++++++++++++++++++++++++++++ lib/soupsieve_old/css_parser.py | 1310 +++++++++++++++++++++++++ lib/soupsieve_old/css_types.py | 407 ++++++++ lib/soupsieve_old/pretty.py | 137 +++ lib/soupsieve_old/py.typed | 0 lib/soupsieve_old/util.py | 116 +++ 15 files changed, 4282 insertions(+), 383 deletions(-) create mode 100644 lib/soupsieve_old/__init__.py create mode 100644 lib/soupsieve_old/__meta__.py create mode 100644 lib/soupsieve_old/css_match.py create mode 100644 lib/soupsieve_old/css_parser.py create mode 100644 lib/soupsieve_old/css_types.py create mode 100644 lib/soupsieve_old/pretty.py create mode 100644 lib/soupsieve_old/py.typed create mode 100644 lib/soupsieve_old/util.py diff --git a/lib/soupsieve/__init__.py b/lib/soupsieve/__init__.py index c89b7002..45730dfa 100644 --- a/lib/soupsieve/__init__.py +++ b/lib/soupsieve/__init__.py @@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from __future__ import annotations from .__meta__ import __version__, __version_info__ # noqa: F401 from . import css_parser as cp from . import css_match as cm from . import css_types as ct from .util import DEBUG, SelectorSyntaxError # noqa: F401 -import bs4 # type: ignore[import] -from typing import Dict, Optional, Any, List, Iterator, Iterable +import bs4 # type: ignore[import-untyped] +from typing import Any, Iterator, Iterable __all__ = ( 'DEBUG', 'SelectorSyntaxError', 'SoupSieve', @@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve def compile( # noqa: A001 pattern: str, - namespaces: Optional[Dict[str, str]] = None, + namespaces: dict[str, str] | None = None, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any ) -> cm.SoupSieve: """Compile CSS pattern.""" - ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces] - cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors] - if isinstance(pattern, SoupSieve): if flags: raise ValueError("Cannot process 'flags' argument on a compiled selector list") @@ -64,7 +62,12 @@ def compile( # noqa: A001 raise ValueError("Cannot process 'custom' argument on a compiled selector list") return pattern - return cp._cached_css_compile(pattern, ns, cs, flags) + return cp._cached_css_compile( + pattern, + ct.Namespaces(namespaces) if namespaces is not None else namespaces, + ct.CustomSelectors(custom) if custom is not None else custom, + flags + ) def purge() -> None: @@ -75,13 +78,13 @@ def purge() -> None: def closest( select: str, - tag: 'bs4.Tag', - namespaces: Optional[Dict[str, str]] = None, + tag: bs4.Tag, + namespaces: dict[str, str] | None = None, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any -) -> 'bs4.Tag': +) -> bs4.Tag: """Match closest ancestor.""" return compile(select, namespaces, flags, **kwargs).closest(tag) @@ -89,11 +92,11 @@ def closest( def match( select: str, - tag: 'bs4.Tag', - namespaces: Optional[Dict[str, str]] = None, + tag: bs4.Tag, + namespaces: dict[str, str] | None = None, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any ) -> bool: """Match node.""" @@ -103,13 +106,13 @@ def match( def filter( # noqa: A001 select: str, - iterable: Iterable['bs4.Tag'], - namespaces: Optional[Dict[str, str]] = None, + iterable: Iterable[bs4.Tag], + namespaces: dict[str, str] | None = None, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any -) -> List['bs4.Tag']: +) -> list[bs4.Tag]: """Filter list of nodes.""" return compile(select, namespaces, flags, **kwargs).filter(iterable) @@ -117,13 +120,13 @@ def filter( # noqa: A001 def select_one( select: str, - tag: 'bs4.Tag', - namespaces: Optional[Dict[str, str]] = None, + tag: bs4.Tag, + namespaces: dict[str, str] | None = None, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any -) -> 'bs4.Tag': +) -> bs4.Tag: """Select a single tag.""" return compile(select, namespaces, flags, **kwargs).select_one(tag) @@ -131,14 +134,14 @@ def select_one( def select( select: str, - tag: 'bs4.Tag', - namespaces: Optional[Dict[str, str]] = None, + tag: bs4.Tag, + namespaces: dict[str, str] | None = None, limit: int = 0, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any -) -> List['bs4.Tag']: +) -> list[bs4.Tag]: """Select the specified tags.""" return compile(select, namespaces, flags, **kwargs).select(tag, limit) @@ -146,18 +149,17 @@ def select( def iselect( select: str, - tag: 'bs4.Tag', - namespaces: Optional[Dict[str, str]] = None, + tag: bs4.Tag, + namespaces: dict[str, str] | None = None, limit: int = 0, flags: int = 0, *, - custom: Optional[Dict[str, str]] = None, + custom: dict[str, str] | None = None, **kwargs: Any -) -> Iterator['bs4.Tag']: +) -> Iterator[bs4.Tag]: """Iterate the specified tags.""" - for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): - yield el + yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit) def escape(ident: str) -> str: diff --git a/lib/soupsieve/__meta__.py b/lib/soupsieve/__meta__.py index 2d769fbf..0fbf71b0 100644 --- a/lib/soupsieve/__meta__.py +++ b/lib/soupsieve/__meta__.py @@ -1,4 +1,5 @@ """Meta related things.""" +from __future__ import annotations from collections import namedtuple import re @@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" cls, major: int, minor: int, micro: int, release: str = "final", pre: int = 0, post: int = 0, dev: int = 0 - ) -> "Version": + ) -> Version: """Validate version info.""" # Ensure all parts are positive integers. @@ -92,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" raise ValueError("All version parts except 'release' should be integers.") if release not in REL_MAP: - raise ValueError("'{}' is not a valid release type.".format(release)) + raise ValueError(f"'{release}' is not a valid release type.") # Ensure valid pre-release (we do not allow implicit pre-releases). if ".dev-candidate" < release < "final": @@ -117,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" elif dev: raise ValueError("Version is not a development release.") - return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev) + return super().__new__(cls, major, minor, micro, release, pre, post, dev) def _is_pre(self) -> bool: """Is prerelease.""" @@ -144,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. if self.micro == 0: - ver = "{}.{}".format(self.major, self.minor) + ver = f"{self.major}.{self.minor}" else: - ver = "{}.{}.{}".format(self.major, self.minor, self.micro) + ver = f"{self.major}.{self.minor}.{self.micro}" if self._is_pre(): - ver += '{}{}'.format(REL_MAP[self.release], self.pre) + ver += f'{REL_MAP[self.release]}{self.pre}' if self._is_post(): - ver += ".post{}".format(self.post) + ver += f".post{self.post}" if self._is_dev(): - ver += ".dev{}".format(self.dev) + ver += f".dev{self.dev}" return ver @@ -163,7 +164,7 @@ def parse_version(ver: str) -> Version: m = RE_VER.match(ver) if m is None: - raise ValueError("'{}' is not a valid version".format(ver)) + raise ValueError(f"'{ver}' is not a valid version") # Handle major, minor, micro major = int(m.group('major')) @@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version: return Version(major, minor, micro, release, pre, post, dev) -__version_info__ = Version(2, 3, 1, "final") +__version_info__ = Version(2, 6, 0, "final") __version__ = __version_info__._get_canonical() diff --git a/lib/soupsieve/css_match.py b/lib/soupsieve/css_match.py index 79bb8707..e52e42d5 100644 --- a/lib/soupsieve/css_match.py +++ b/lib/soupsieve/css_match.py @@ -1,11 +1,12 @@ """CSS matcher.""" +from __future__ import annotations from datetime import datetime from . import util import re from . import css_types as ct import unicodedata -import bs4 # type: ignore[import] -from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast +import bs4 # type: ignore[import-untyped] +from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401 # Empty tag pattern (whitespace okay) RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') @@ -64,12 +65,12 @@ class _FakeParent: fake parent so we can traverse the root element as a child. """ - def __init__(self, element: 'bs4.Tag') -> None: + def __init__(self, element: bs4.Tag) -> None: """Initialize.""" self.contents = [element] - def __len__(self) -> 'bs4.PageElement': + def __len__(self) -> bs4.PageElement: """Length.""" return len(self.contents) @@ -84,62 +85,62 @@ class _DocumentNav: # Fail on unexpected types. if not cls.is_tag(tag): - raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag))) + raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}") @staticmethod - def is_doc(obj: 'bs4.Tag') -> bool: + def is_doc(obj: bs4.Tag) -> bool: """Is `BeautifulSoup` object.""" return isinstance(obj, bs4.BeautifulSoup) @staticmethod - def is_tag(obj: 'bs4.PageElement') -> bool: + def is_tag(obj: bs4.PageElement) -> bool: """Is tag.""" return isinstance(obj, bs4.Tag) @staticmethod - def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover + def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover """Is declaration.""" return isinstance(obj, bs4.Declaration) @staticmethod - def is_cdata(obj: 'bs4.PageElement') -> bool: + def is_cdata(obj: bs4.PageElement) -> bool: """Is CDATA.""" return isinstance(obj, bs4.CData) @staticmethod - def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover + def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover """Is processing instruction.""" return isinstance(obj, bs4.ProcessingInstruction) @staticmethod - def is_navigable_string(obj: 'bs4.PageElement') -> bool: + def is_navigable_string(obj: bs4.PageElement) -> bool: """Is navigable string.""" return isinstance(obj, bs4.NavigableString) @staticmethod - def is_special_string(obj: 'bs4.PageElement') -> bool: + def is_special_string(obj: bs4.PageElement) -> bool: """Is special string.""" return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) @classmethod - def is_content_string(cls, obj: 'bs4.PageElement') -> bool: + def is_content_string(cls, obj: bs4.PageElement) -> bool: """Check if node is content string.""" return cls.is_navigable_string(obj) and not cls.is_special_string(obj) @staticmethod - def create_fake_parent(el: 'bs4.Tag') -> _FakeParent: + def create_fake_parent(el: bs4.Tag) -> _FakeParent: """Create fake parent for a given element.""" return _FakeParent(el) @staticmethod - def is_xml_tree(el: 'bs4.Tag') -> bool: + def is_xml_tree(el: bs4.Tag) -> bool: """Check if element (or document) is from a XML tree.""" return bool(el._is_xml) - def is_iframe(self, el: 'bs4.Tag') -> bool: + def is_iframe(self, el: bs4.Tag) -> bool: """Check if element is an `iframe`.""" return bool( @@ -147,7 +148,7 @@ class _DocumentNav: self.is_html_tag(el) # type: ignore[attr-defined] ) - def is_root(self, el: 'bs4.Tag') -> bool: + def is_root(self, el: bs4.Tag) -> bool: """ Return whether element is a root element. @@ -161,20 +162,19 @@ class _DocumentNav: root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined] return root - def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']: + def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]: """Get contents or contents in reverse.""" if not no_iframe or not self.is_iframe(el): - for content in el.contents: - yield content + yield from el.contents def get_children( self, - el: 'bs4.Tag', - start: Optional[int] = None, + el: bs4.Tag, + start: int | None = None, reverse: bool = False, tags: bool = True, no_iframe: bool = False - ) -> Iterator['bs4.PageElement']: + ) -> Iterator[bs4.PageElement]: """Get children.""" if not no_iframe or not self.is_iframe(el): @@ -195,10 +195,10 @@ class _DocumentNav: def get_descendants( self, - el: 'bs4.Tag', + el: bs4.Tag, tags: bool = True, no_iframe: bool = False - ) -> Iterator['bs4.PageElement']: + ) -> Iterator[bs4.PageElement]: """Get descendants.""" if not no_iframe or not self.is_iframe(el): @@ -229,7 +229,7 @@ class _DocumentNav: if not tags or is_tag: yield child - def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag': + def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag: """Get parent.""" parent = el.parent @@ -238,25 +238,25 @@ class _DocumentNav: return parent @staticmethod - def get_tag_name(el: 'bs4.Tag') -> Optional[str]: + def get_tag_name(el: bs4.Tag) -> str | None: """Get tag.""" - return cast(Optional[str], el.name) + return cast('str | None', el.name) @staticmethod - def get_prefix_name(el: 'bs4.Tag') -> Optional[str]: + def get_prefix_name(el: bs4.Tag) -> str | None: """Get prefix.""" - return cast(Optional[str], el.prefix) + return cast('str | None', el.prefix) @staticmethod - def get_uri(el: 'bs4.Tag') -> Optional[str]: + def get_uri(el: bs4.Tag) -> str | None: """Get namespace `URI`.""" - return cast(Optional[str], el.namespace) + return cast('str | None', el.namespace) @classmethod - def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement': + def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement: """Get next sibling tag.""" sibling = el.next_sibling @@ -265,7 +265,7 @@ class _DocumentNav: return sibling @classmethod - def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement': + def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement: """Get previous sibling tag.""" sibling = el.previous_sibling @@ -274,7 +274,7 @@ class _DocumentNav: return sibling @staticmethod - def has_html_ns(el: 'bs4.Tag') -> bool: + def has_html_ns(el: bs4.Tag) -> bool: """ Check if element has an HTML namespace. @@ -282,17 +282,17 @@ class _DocumentNav: like we do in the case of `is_html_tag`. """ - ns = getattr(el, 'namespace') if el else None + ns = getattr(el, 'namespace') if el else None # noqa: B009 return bool(ns and ns == NS_XHTML) @staticmethod - def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]: + def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]: """Return namespace and attribute name without the prefix.""" return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) @classmethod - def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]: + def normalize_value(cls, value: Any) -> str | Sequence[str]: """Normalize the value to be a string or list of strings.""" # Treat `None` as empty string. @@ -327,10 +327,10 @@ class _DocumentNav: @classmethod def get_attribute_by_name( cls, - el: 'bs4.Tag', + el: bs4.Tag, name: str, - default: Optional[Union[str, Sequence[str]]] = None - ) -> Optional[Union[str, Sequence[str]]]: + default: str | Sequence[str] | None = None + ) -> str | Sequence[str] | None: """Get attribute by name.""" value = default @@ -347,14 +347,14 @@ class _DocumentNav: return value @classmethod - def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]: + def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]: """Iterate attributes.""" for k, v in el.attrs.items(): yield k, cls.normalize_value(v) @classmethod - def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]: + def get_classes(cls, el: bs4.Tag) -> Sequence[str]: """Get classes.""" classes = cls.get_attribute_by_name(el, 'class', []) @@ -362,14 +362,14 @@ class _DocumentNav: classes = RE_NOT_WS.findall(classes) return cast(Sequence[str], classes) - def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str: + def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str: """Get text.""" return ''.join( [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)] ) - def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]: + def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]: """Get Own Text.""" return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)] @@ -393,7 +393,7 @@ class Inputs: def validate_week(year: int, week: int) -> bool: """Validate week.""" - max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] + max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1] if max_week == 1: max_week = 53 return 1 <= week <= max_week @@ -423,10 +423,10 @@ class Inputs: return 0 <= minutes <= 59 @classmethod - def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]: + def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None: """Parse the input value.""" - parsed = None # type: Optional[Tuple[float, ...]] + parsed = None # type: tuple[float, ...] | None if value is None: return value if itype == "date": @@ -484,19 +484,19 @@ class CSSMatch(_DocumentNav): def __init__( self, selectors: ct.SelectorList, - scope: 'bs4.Tag', - namespaces: Optional[ct.Namespaces], + scope: bs4.Tag, + namespaces: ct.Namespaces | None, flags: int ) -> None: """Initialize.""" self.assert_valid_input(scope) self.tag = scope - self.cached_meta_lang = [] # type: List[Tuple[str, str]] - self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']] - self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]] + self.cached_meta_lang = [] # type: list[tuple[str, str]] + self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]] + self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]] self.selectors = selectors - self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]] + self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str] self.flags = flags self.iframe_restrict = False @@ -527,7 +527,7 @@ class CSSMatch(_DocumentNav): return self.is_xml or self.has_html_namespace - def get_tag_ns(self, el: 'bs4.Tag') -> str: + def get_tag_ns(self, el: bs4.Tag) -> str: """Get tag namespace.""" if self.supports_namespaces(): @@ -539,24 +539,24 @@ class CSSMatch(_DocumentNav): namespace = NS_XHTML return namespace - def is_html_tag(self, el: 'bs4.Tag') -> bool: + def is_html_tag(self, el: bs4.Tag) -> bool: """Check if tag is in HTML namespace.""" return self.get_tag_ns(el) == NS_XHTML - def get_tag(self, el: 'bs4.Tag') -> Optional[str]: + def get_tag(self, el: bs4.Tag) -> str | None: """Get tag.""" name = self.get_tag_name(el) return util.lower(name) if name is not None and not self.is_xml else name - def get_prefix(self, el: 'bs4.Tag') -> Optional[str]: + def get_prefix(self, el: bs4.Tag) -> str | None: """Get prefix.""" prefix = self.get_prefix_name(el) return util.lower(prefix) if prefix is not None and not self.is_xml else prefix - def find_bidi(self, el: 'bs4.Tag') -> Optional[int]: + def find_bidi(self, el: bs4.Tag) -> int | None: """Get directionality from element text.""" for node in self.get_children(el, tags=False): @@ -600,13 +600,18 @@ class CSSMatch(_DocumentNav): ranges = lang_range.split('-') subtags = lang_tag.lower().split('-') length = len(ranges) + slength = len(subtags) rindex = 0 sindex = 0 r = ranges[rindex] s = subtags[sindex] + # Empty specified language should match unspecified language attributes + if length == 1 and slength == 1 and not r and r == s: + return True + # Primary tag needs to match - if r != '*' and r != s: + if (r != '*' and r != s) or (r == '*' and slength == 1 and not s): match = False rindex += 1 @@ -645,10 +650,10 @@ class CSSMatch(_DocumentNav): def match_attribute_name( self, - el: 'bs4.Tag', + el: bs4.Tag, attr: str, - prefix: Optional[str] - ) -> Optional[Union[str, Sequence[str]]]: + prefix: str | None + ) -> str | Sequence[str] | None: """Match attribute name and return value if it exists.""" value = None @@ -696,7 +701,7 @@ class CSSMatch(_DocumentNav): break return value - def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool: + def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool: """Match the namespace of the element.""" match = True @@ -717,7 +722,7 @@ class CSSMatch(_DocumentNav): match = False return match - def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool: + def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool: """Match attributes.""" match = True @@ -736,7 +741,7 @@ class CSSMatch(_DocumentNav): break return match - def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool: + def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool: """Match tag name.""" name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name) @@ -745,7 +750,7 @@ class CSSMatch(_DocumentNav): name not in (self.get_tag(el), '*') ) - def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool: + def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool: """Match the tag.""" match = True @@ -757,7 +762,7 @@ class CSSMatch(_DocumentNav): match = False return match - def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: + def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool: """Match past relationship.""" found = False @@ -785,12 +790,12 @@ class CSSMatch(_DocumentNav): found = self.match_selectors(sibling, relation) return found - def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool: + def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool: """Match future child.""" match = False if recursive: - children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']] + children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]] else: children = self.get_children for child in children(parent, no_iframe=self.iframe_restrict): @@ -799,7 +804,7 @@ class CSSMatch(_DocumentNav): break return match - def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: + def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool: """Match future relationship.""" found = False @@ -822,7 +827,7 @@ class CSSMatch(_DocumentNav): found = self.match_selectors(sibling, relation) return found - def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool: + def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool: """Match relationship to other elements.""" found = False @@ -837,7 +842,7 @@ class CSSMatch(_DocumentNav): return found - def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool: + def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool: """Match element's ID.""" found = True @@ -847,7 +852,7 @@ class CSSMatch(_DocumentNav): break return found - def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool: + def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool: """Match element's classes.""" current_classes = self.get_classes(el) @@ -858,7 +863,7 @@ class CSSMatch(_DocumentNav): break return found - def match_root(self, el: 'bs4.Tag') -> bool: + def match_root(self, el: bs4.Tag) -> bool: """Match element as root.""" is_root = self.is_root(el) @@ -884,20 +889,20 @@ class CSSMatch(_DocumentNav): sibling = self.get_next(sibling, tags=False) return is_root - def match_scope(self, el: 'bs4.Tag') -> bool: + def match_scope(self, el: bs4.Tag) -> bool: """Match element as scope.""" return self.scope is el - def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool: + def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool: """Match tag type for `nth` matches.""" - return( + return ( (self.get_tag(child) == self.get_tag(el)) and (self.get_tag_ns(child) == self.get_tag_ns(el)) ) - def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool: + def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool: """Match `nth` elements.""" matched = True @@ -998,7 +1003,7 @@ class CSSMatch(_DocumentNav): break return matched - def match_empty(self, el: 'bs4.Tag') -> bool: + def match_empty(self, el: bs4.Tag) -> bool: """Check if element is empty (if requested).""" is_empty = True @@ -1011,7 +1016,7 @@ class CSSMatch(_DocumentNav): break return is_empty - def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool: + def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool: """Match selectors.""" match = True @@ -1020,11 +1025,11 @@ class CSSMatch(_DocumentNav): match = False return match - def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool: + def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool: """Match element if it contains text.""" match = True - content = None # type: Optional[Union[str, Sequence[str]]] + content = None # type: str | Sequence[str] | None for contain_list in contains: if content is None: if contain_list.own: @@ -1048,7 +1053,7 @@ class CSSMatch(_DocumentNav): match = False return match - def match_default(self, el: 'bs4.Tag') -> bool: + def match_default(self, el: bs4.Tag) -> bool: """Match default.""" match = False @@ -1087,13 +1092,13 @@ class CSSMatch(_DocumentNav): break return match - def match_indeterminate(self, el: 'bs4.Tag') -> bool: + def match_indeterminate(self, el: bs4.Tag) -> bool: """Match default.""" match = False name = cast(str, self.get_attribute_by_name(el, 'name')) - def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']: + def get_parent_form(el: bs4.Tag) -> bs4.Tag | None: """Find this input's form.""" form = None parent = self.get_parent(el, no_iframe=True) @@ -1148,7 +1153,7 @@ class CSSMatch(_DocumentNav): return match - def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool: + def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool: """Match languages.""" match = False @@ -1183,7 +1188,7 @@ class CSSMatch(_DocumentNav): break # Use cached meta language. - if not found_lang and self.cached_meta_lang: + if found_lang is None and self.cached_meta_lang: for cache in self.cached_meta_lang: if root is cache[0]: found_lang = cache[1] @@ -1217,13 +1222,13 @@ class CSSMatch(_DocumentNav): found_lang = content self.cached_meta_lang.append((cast(str, root), cast(str, found_lang))) break - if found_lang: + if found_lang is not None: break - if not found_lang: + if found_lang is None: self.cached_meta_lang.append((cast(str, root), '')) # If we determined a language, compare. - if found_lang: + if found_lang is not None: for patterns in langs: match = False for pattern in patterns: @@ -1234,7 +1239,7 @@ class CSSMatch(_DocumentNav): return match - def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool: + def match_dir(self, el: bs4.Tag, directionality: int) -> bool: """Check directionality.""" # If we have to match both left and right, we can't match either. @@ -1266,11 +1271,7 @@ class CSSMatch(_DocumentNav): # Auto handling for text inputs if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: if is_textarea: - temp = [] - for node in self.get_contents(el, no_iframe=True): - if self.is_content_string(node): - temp.append(node) - value = ''.join(temp) + value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node)) else: value = cast(str, self.get_attribute_by_name(el, 'value', '')) if value: @@ -1297,7 +1298,7 @@ class CSSMatch(_DocumentNav): # Match parents direction return self.match_dir(self.get_parent(el, no_iframe=True), directionality) - def match_range(self, el: 'bs4.Tag', condition: int) -> bool: + def match_range(self, el: bs4.Tag, condition: int) -> bool: """ Match range. @@ -1337,7 +1338,7 @@ class CSSMatch(_DocumentNav): return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range - def match_defined(self, el: 'bs4.Tag') -> bool: + def match_defined(self, el: bs4.Tag) -> bool: """ Match defined. @@ -1360,7 +1361,7 @@ class CSSMatch(_DocumentNav): ) ) - def match_placeholder_shown(self, el: 'bs4.Tag') -> bool: + def match_placeholder_shown(self, el: bs4.Tag) -> bool: """ Match placeholder shown according to HTML spec. @@ -1375,7 +1376,7 @@ class CSSMatch(_DocumentNav): return match - def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool: + def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool: """Check if element matches one of the selectors.""" match = False @@ -1459,7 +1460,7 @@ class CSSMatch(_DocumentNav): return match - def select(self, limit: int = 0) -> Iterator['bs4.Tag']: + def select(self, limit: int = 0) -> Iterator[bs4.Tag]: """Match all tags under the targeted tag.""" lim = None if limit < 1 else limit @@ -1472,7 +1473,7 @@ class CSSMatch(_DocumentNav): if lim < 1: break - def closest(self) -> Optional['bs4.Tag']: + def closest(self) -> bs4.Tag | None: """Match closest ancestor.""" current = self.tag @@ -1484,12 +1485,12 @@ class CSSMatch(_DocumentNav): current = self.get_parent(current) return closest - def filter(self) -> List['bs4.Tag']: # noqa A001 + def filter(self) -> list[bs4.Tag]: # noqa A001 """Filter tag's children.""" return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)] - def match(self, el: 'bs4.Tag') -> bool: + def match(self, el: bs4.Tag) -> bool: """Match.""" return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors) @@ -1500,8 +1501,8 @@ class SoupSieve(ct.Immutable): pattern: str selectors: ct.SelectorList - namespaces: Optional[ct.Namespaces] - custom: Dict[str, str] + namespaces: ct.Namespaces | None + custom: dict[str, str] flags: int __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash") @@ -1510,8 +1511,8 @@ class SoupSieve(ct.Immutable): self, pattern: str, selectors: ct.SelectorList, - namespaces: Optional[ct.Namespaces], - custom: Optional[ct.CustomSelectors], + namespaces: ct.Namespaces | None, + custom: ct.CustomSelectors | None, flags: int ): """Initialize.""" @@ -1524,17 +1525,17 @@ class SoupSieve(ct.Immutable): flags=flags ) - def match(self, tag: 'bs4.Tag') -> bool: + def match(self, tag: bs4.Tag) -> bool: """Match.""" return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag) - def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag': + def closest(self, tag: bs4.Tag) -> bs4.Tag: """Match closest ancestor.""" return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest() - def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001 + def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001 """ Filter. @@ -1551,31 +1552,28 @@ class SoupSieve(ct.Immutable): else: return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)] - def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag': + def select_one(self, tag: bs4.Tag) -> bs4.Tag: """Select a single tag.""" tags = self.select(tag, limit=1) return tags[0] if tags else None - def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']: + def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]: """Select the specified tags.""" return list(self.iselect(tag, limit)) - def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']: + def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]: """Iterate the specified tags.""" - for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): - yield el + yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit) def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( - self.pattern, - self.namespaces, - self.custom, - self.flags + return ( + f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, " + f"custom={self.custom!r}, flags={self.flags!r})" ) __str__ = __repr__ diff --git a/lib/soupsieve/css_parser.py b/lib/soupsieve/css_parser.py index 0536b80f..bedae694 100644 --- a/lib/soupsieve/css_parser.py +++ b/lib/soupsieve/css_parser.py @@ -1,4 +1,5 @@ """CSS selector parser.""" +from __future__ import annotations import re from functools import lru_cache from . import util @@ -6,7 +7,7 @@ from . import css_match as cm from . import css_types as ct from .util import SelectorSyntaxError import warnings -from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast +from typing import Match, Any, Iterator, cast UNICODE_REPLACEMENT_CHAR = 0xFFFD @@ -91,94 +92,81 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE # Sub-patterns parts # Whitespace NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' -WS = r'(?:[ \t]|{})'.format(NEWLINE) +WS = fr'(?:[ \t]|{NEWLINE})' # Comments COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' # Whitespace with comments included -WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) +WSC = fr'(?:{WS}|{COMMENTS})' # CSS escapes -CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) -CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) +CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))' +CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))' # CSS Identifier -IDENTIFIER = r''' -(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) -(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) -'''.format(esc=CSS_ESCAPES) +IDENTIFIER = fr''' +(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--) +(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*) +''' # `nth` content -NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) +NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?' # Value: quoted string or identifier -VALUE = r''' -(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) -'''.format(nl=NEWLINE, ident=IDENTIFIER) +VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)''' # Attribute value comparison. `!=` is handled special as it is non-standard. -ATTR = r''' -(?:{ws}*(?P[!~^|*$]?=){ws}*(?P{value})(?:{ws}+(?P[is]))?)?{ws}*\] -'''.format(ws=WSC, value=VALUE) +ATTR = fr'(?:{WSC}*(?P[!~^|*$]?=){WSC}*(?P{VALUE})(?:{WSC}*(?P[is]))?)?{WSC}*\]' # Selector patterns # IDs (`#id`) -PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) +PAT_ID = fr'\#{IDENTIFIER}' # Classes (`.class`) -PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) +PAT_CLASS = fr'\.{IDENTIFIER}' # Prefix:Tag (`prefix|tag`) -PAT_TAG = r'(?P(?:{ident}|\*)?\|)?(?P{ident}|\*)'.format(ident=IDENTIFIER) +PAT_TAG = fr'(?P(?:{IDENTIFIER}|\*)?\|)?(?P{IDENTIFIER}|\*)' # Attributes (`[attr]`, `[attr=value]`, etc.) -PAT_ATTR = r''' -\[{ws}*(?P(?:{ident}|\*)?\|)?(?P{ident}){attr} -'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) +PAT_ATTR = fr'\[{WSC}*(?P(?:{IDENTIFIER}|\*)?\|)?(?P{IDENTIFIER}){ATTR}' # Pseudo class (`:pseudo-class`, `:pseudo-class(`) -PAT_PSEUDO_CLASS = r'(?P:{ident})(?P\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) +PAT_PSEUDO_CLASS = fr'(?P:{IDENTIFIER})(?P\({WSC}*)?' # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. -PAT_PSEUDO_CLASS_SPECIAL = r'(?P:{ident})(?P\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) +PAT_PSEUDO_CLASS_SPECIAL = fr'(?P:{IDENTIFIER})(?P\({WSC}*)' # Custom pseudo class (`:--custom-pseudo`) -PAT_PSEUDO_CLASS_CUSTOM = r'(?P:(?=--){ident})'.format(ident=IDENTIFIER) +PAT_PSEUDO_CLASS_CUSTOM = fr'(?P:(?=--){IDENTIFIER})' +# Nesting ampersand selector. Matches `&` +PAT_AMP = r'&' # Closing pseudo group (`)`) -PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) +PAT_PSEUDO_CLOSE = fr'{WSC}*\)' # Pseudo element (`::pseudo-element`) -PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) +PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}' # At rule (`@page`, etc.) (not supported) -PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) +PAT_AT_RULE = fr'@P{IDENTIFIER}' # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) -PAT_PSEUDO_NTH_CHILD = r''' -(?P{name} -(?P{nth}|even|odd))(?:{wsc}*\)|(?P{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) -'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) +PAT_PSEUDO_NTH_CHILD = fr''' +(?P{PAT_PSEUDO_CLASS_SPECIAL} +(?P{NTH}|even|odd))(?:{WSC}*\)|(?P{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*)) +''' # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) -PAT_PSEUDO_NTH_TYPE = r''' -(?P{name} -(?P{nth}|even|odd)){ws}*\) -'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) +PAT_PSEUDO_NTH_TYPE = fr''' +(?P{PAT_PSEUDO_CLASS_SPECIAL} +(?P{NTH}|even|odd)){WSC}*\) +''' # Pseudo class language (`:lang("*-de", en)`) -PAT_PSEUDO_LANG = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( - name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE -) +PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' # Pseudo class direction (`:dir(ltr)`) -PAT_PSEUDO_DIR = r'{name}(?Pltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) +PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?Pltr|rtl){WSC}*\)' # Combining characters (`>`, `~`, ` `, `+`, `,`) -PAT_COMBINE = r'{wsc}*?(?P[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) +PAT_COMBINE = fr'{WSC}*?(?P[,+>~]|{WS}(?![,+>~])){WSC}*' # Extra: Contains (`:contains(text)`) -PAT_PSEUDO_CONTAINS = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( - name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE -) +PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' # Regular expressions # CSS escape pattern -RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) -RE_CSS_STR_ESC = re.compile( - r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I -) +RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I) +RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I) # Pattern to break up `nth` specifiers -RE_NTH = re.compile( - r'(?P[-+])?(?P[0-9]+n?|n)(?:(?<=n){ws}*(?P[-+]){ws}*(?P[0-9]+))?'.format(ws=WSC), - re.I -) +RE_NTH = re.compile(fr'(?P[-+])?(?P[0-9]+n?|n)(?:(?<=n){WSC}*(?P[-+]){WSC}*(?P[0-9]+))?', re.I) # Pattern to iterate multiple values. -RE_VALUES = re.compile(r'(?:(?P{value})|(?P{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) +RE_VALUES = re.compile(fr'(?:(?P{VALUE})|(?P{WSC}*,{WSC}*))', re.X) # Whitespace checks RE_WS = re.compile(WS) -RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) -RE_WS_END = re.compile('{}*$'.format(WSC)) -RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) +RE_WS_BEGIN = re.compile(fr'^{WSC}*') +RE_WS_END = re.compile(fr'{WSC}*$') +RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X) # Constants # List split token @@ -206,8 +194,8 @@ _MAXCACHE = 500 @lru_cache(maxsize=_MAXCACHE) def _cached_css_compile( pattern: str, - namespaces: Optional[ct.Namespaces], - custom: Optional[ct.CustomSelectors], + namespaces: ct.Namespaces | None, + custom: ct.CustomSelectors | None, flags: int ) -> cm.SoupSieve: """Cached CSS compile.""" @@ -232,7 +220,7 @@ def _purge_cache() -> None: _cached_css_compile.cache_clear() -def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]: +def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]: """Process custom.""" custom_selectors = {} @@ -240,9 +228,9 @@ def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, for key, value in custom.items(): name = util.lower(key) if RE_CUSTOM.match(name) is None: - raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) + raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name") if name in custom_selectors: - raise KeyError("The custom selector '{}' has already been registered".format(name)) + raise KeyError(f"The custom selector '{name}' has already been registered") custom_selectors[css_unescape(name)] = value return custom_selectors @@ -282,23 +270,23 @@ def escape(ident: str) -> str: start_dash = length > 0 and ident[0] == '-' if length == 1 and start_dash: # Need to escape identifier that is a single `-` with no other characters - string.append('\\{}'.format(ident)) + string.append(f'\\{ident}') else: for index, c in enumerate(ident): codepoint = ord(c) if codepoint == 0x00: string.append('\ufffd') elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: - string.append('\\{:x} '.format(codepoint)) + string.append(f'\\{codepoint:x} ') elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): - string.append('\\{:x} '.format(codepoint)) + string.append(f'\\{codepoint:x} ') elif ( codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) ): string.append(c) else: - string.append('\\{}'.format(c)) + string.append(f'\\{c}') return ''.join(string) @@ -316,7 +304,7 @@ class SelectorPattern: return self.name - def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: + def match(self, selector: str, index: int, flags: int) -> Match[str] | None: """Match the selector.""" return self.re_pattern.match(selector, index) @@ -325,7 +313,7 @@ class SelectorPattern: class SpecialPseudoPattern(SelectorPattern): """Selector pattern.""" - def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None: + def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None: """Initialize.""" self.patterns = {} @@ -335,7 +323,7 @@ class SpecialPseudoPattern(SelectorPattern): for pseudo in p[1]: self.patterns[pseudo] = pattern - self.matched_name = None # type: Optional[SelectorPattern] + self.matched_name = None # type: SelectorPattern | None self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) def get_name(self) -> str: @@ -343,7 +331,7 @@ class SpecialPseudoPattern(SelectorPattern): return '' if self.matched_name is None else self.matched_name.get_name() - def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]: + def match(self, selector: str, index: int, flags: int) -> Match[str] | None: """Match the selector.""" pseudo = None @@ -371,20 +359,20 @@ class _Selector: def __init__(self, **kwargs: Any) -> None: """Initialize.""" - self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag] - self.ids = kwargs.get('ids', []) # type: List[str] - self.classes = kwargs.get('classes', []) # type: List[str] - self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute] - self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth] - self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList] - self.relations = kwargs.get('relations', []) # type: List[_Selector] - self.rel_type = kwargs.get('rel_type', None) # type: Optional[str] - self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains] - self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang] + self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None + self.ids = kwargs.get('ids', []) # type: list[str] + self.classes = kwargs.get('classes', []) # type: list[str] + self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] + self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] + self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] + self.relations = kwargs.get('relations', []) # type: list[_Selector] + self.rel_type = kwargs.get('rel_type', None) # type: str | None + self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] + self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] self.flags = kwargs.get('flags', 0) # type: int self.no_match = kwargs.get('no_match', False) # type: bool - def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList: + def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList: """Freeze relation.""" if relations: @@ -394,7 +382,7 @@ class _Selector: else: return ct.SelectorList() - def freeze(self) -> Union[ct.Selector, ct.SelectorNull]: + def freeze(self) -> ct.Selector | ct.SelectorNull: """Freeze self.""" if self.no_match: @@ -418,11 +406,10 @@ class _Selector: """String representation.""" return ( - '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' - 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' - ).format( - self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, - self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match + f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, ' + f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, ' + f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, ' + f'no_match={self.no_match!r})' ) __repr__ = __str__ @@ -450,6 +437,7 @@ class CSSParser: SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), + SelectorPattern("amp", PAT_AMP), SelectorPattern("at_rule", PAT_AT_RULE), SelectorPattern("id", PAT_ID), SelectorPattern("class", PAT_CLASS), @@ -461,7 +449,7 @@ class CSSParser: def __init__( self, selector: str, - custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None, + custom: dict[str, str | ct.SelectorList] | None = None, flags: int = 0 ) -> None: """Initialize.""" @@ -562,7 +550,7 @@ class CSSParser: selector = self.custom.get(pseudo) if selector is None: raise SelectorSyntaxError( - "Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)), + f"Undefined custom selector '{pseudo}' found at position {m.end(0)}", self.pattern, m.end(0) ) @@ -583,9 +571,9 @@ class CSSParser: sel: _Selector, m: Match[str], has_selector: bool, - iselector: Iterator[Tuple[str, Match[str]]], + iselector: Iterator[tuple[str, Match[str]]], is_html: bool - ) -> Tuple[bool, bool]: + ) -> tuple[bool, bool]: """Parse pseudo class.""" complex_pseudo = False @@ -662,13 +650,16 @@ class CSSParser: has_selector = True elif pseudo in PSEUDO_SUPPORTED: raise SelectorSyntaxError( - "Invalid syntax for pseudo class '{}'".format(pseudo), + f"Invalid syntax for pseudo class '{pseudo}'", self.pattern, m.start(0) ) else: - raise NotImplementedError( - "'{}' pseudo-class is not implemented at this time".format(pseudo) + raise SelectorSyntaxError( + f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. " + "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.", + self.pattern, + m.start(0) ) return has_selector, is_html @@ -678,7 +669,7 @@ class CSSParser: sel: _Selector, m: Match[str], has_selector: bool, - iselector: Iterator[Tuple[str, Match[str]]] + iselector: Iterator[tuple[str, Match[str]]] ) -> bool: """Parse `nth` pseudo.""" @@ -743,7 +734,7 @@ class CSSParser: sel: _Selector, name: str, has_selector: bool, - iselector: Iterator[Tuple[str, Match[str]]], + iselector: Iterator[tuple[str, Match[str]]], index: int ) -> bool: """Parse pseudo with opening bracket.""" @@ -752,7 +743,7 @@ class CSSParser: if name == ':not': flags |= FLG_NOT elif name == ':has': - flags |= FLG_RELATIVE | FLG_FORGIVE + flags |= FLG_RELATIVE elif name in (':where', ':is'): flags |= FLG_FORGIVE @@ -766,21 +757,16 @@ class CSSParser: sel: _Selector, m: Match[str], has_selector: bool, - selectors: List[_Selector], + selectors: list[_Selector], rel_type: str, index: int - ) -> Tuple[bool, _Selector, str]: + ) -> tuple[bool, _Selector, str]: """Parse combinator tokens.""" combinator = m.group('relation').strip() if not combinator: combinator = WS_COMBINATOR if combinator == COMMA_COMBINATOR: - if not has_selector: - # If we've not captured any selector parts, the comma is either at the beginning of the pattern - # or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class. - sel.no_match = True - sel.rel_type = rel_type selectors[-1].relations.append(sel) rel_type = ":" + WS_COMBINATOR @@ -797,7 +783,7 @@ class CSSParser: # multiple non-whitespace combinators. So if the current combinator is not a whitespace, # then we've hit the multiple combinator case, so we should fail. raise SelectorSyntaxError( - 'The multiple combinators at position {}'.format(index), + f'The multiple combinators at position {index}', self.pattern, index ) @@ -814,12 +800,12 @@ class CSSParser: sel: _Selector, m: Match[str], has_selector: bool, - selectors: List[_Selector], - relations: List[_Selector], + selectors: list[_Selector], + relations: list[_Selector], is_pseudo: bool, is_forgive: bool, index: int - ) -> Tuple[bool, _Selector]: + ) -> tuple[bool, _Selector]: """Parse combinator tokens.""" combinator = m.group('relation').strip() @@ -828,7 +814,7 @@ class CSSParser: if not has_selector: if not is_forgive or combinator != COMMA_COMBINATOR: raise SelectorSyntaxError( - "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), + f"The combinator '{combinator}' at position {index}, must have a selector before it", self.pattern, index ) @@ -873,7 +859,7 @@ class CSSParser: pseudo = util.lower(css_unescape(m.group('name'))) if pseudo == ":contains": - warnings.warn( + warnings.warn( # noqa: B028 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", FutureWarning ) @@ -924,7 +910,7 @@ class CSSParser: def parse_selectors( self, - iselector: Iterator[Tuple[str, Match[str]]], + iselector: Iterator[tuple[str, Match[str]]], index: int = 0, flags: int = 0 ) -> ct.SelectorList: @@ -935,7 +921,7 @@ class CSSParser: selectors = [] has_selector = False closed = False - relations = [] # type: List[_Selector] + relations = [] # type: list[_Selector] rel_type = ":" + WS_COMBINATOR # Setup various flags @@ -986,13 +972,16 @@ class CSSParser: # Handle parts if key == "at_rule": - raise NotImplementedError("At-rules found at position {}".format(m.start(0))) + raise NotImplementedError(f"At-rules found at position {m.start(0)}") + elif key == "amp": + sel.flags |= ct.SEL_SCOPE + has_selector = True elif key == 'pseudo_class_custom': has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) elif key == 'pseudo_class': has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) elif key == 'pseudo_element': - raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) + raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}") elif key == 'pseudo_contains': has_selector = self.parse_pseudo_contains(sel, m, has_selector) elif key in ('pseudo_nth_type', 'pseudo_nth_child'): @@ -1007,7 +996,7 @@ class CSSParser: if not has_selector: if not is_forgive: raise SelectorSyntaxError( - "Expected a selector at postion {}".format(m.start(0)), + f"Expected a selector at position {m.start(0)}", self.pattern, m.start(0) ) @@ -1017,7 +1006,7 @@ class CSSParser: break else: raise SelectorSyntaxError( - "Unmatched pseudo-class close at postion {}".format(m.start(0)), + f"Unmatched pseudo-class close at position {m.start(0)}", self.pattern, m.start(0) ) @@ -1035,7 +1024,7 @@ class CSSParser: elif key == 'tag': if has_selector: raise SelectorSyntaxError( - "Tag name found at position {} instead of at the start".format(m.start(0)), + f"Tag name found at position {m.start(0)} instead of at the start", self.pattern, m.start(0) ) @@ -1050,7 +1039,7 @@ class CSSParser: # Handle selectors that are not closed if is_open and not closed: raise SelectorSyntaxError( - "Unclosed pseudo-class at position {}".format(index), + f"Unclosed pseudo-class at position {index}", self.pattern, index ) @@ -1069,28 +1058,18 @@ class CSSParser: selectors.append(sel) # Forgive empty slots in pseudo-classes that have lists (and are forgiving) - elif is_forgive: - if is_relative: - # Handle relative selectors pseudo-classes with empty slots like `:has()` - if selectors and selectors[-1].rel_type is None and rel_type == ': ': - sel.rel_type = rel_type - sel.no_match = True - selectors[-1].relations.append(sel) - has_selector = True - else: - # Handle normal pseudo-classes with empty slots - if not selectors or not relations: - # Others like `:is()` etc. - sel.no_match = True - del relations[:] - selectors.append(sel) - has_selector = True + elif is_forgive and (not selectors or not relations): + # Handle normal pseudo-classes with empty slots like `:is()` etc. + sel.no_match = True + del relations[:] + selectors.append(sel) + has_selector = True if not has_selector: # We will always need to finish a selector when `:has()` is used as it leads with combining. # May apply to others as well. raise SelectorSyntaxError( - 'Expected a selector at position {}'.format(index), + f'Expected a selector at position {index}', self.pattern, index ) @@ -1112,7 +1091,7 @@ class CSSParser: # Return selector list return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) - def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]: + def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]: """Iterate selector tokens.""" # Ignore whitespace and comments at start and end of pattern @@ -1122,7 +1101,7 @@ class CSSParser: end = (m.start(0) - 1) if m else (len(pattern) - 1) if self.debug: # pragma: no cover - print('## PARSING: {!r}'.format(pattern)) + print(f'## PARSING: {pattern!r}') while index <= end: m = None for v in self.css_tokens: @@ -1130,7 +1109,7 @@ class CSSParser: if m: name = v.get_name() if self.debug: # pragma: no cover - print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) + print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}") index = m.end(0) yield name, m break @@ -1140,15 +1119,15 @@ class CSSParser: # throw an exception mentioning that the known selector type is in error; # otherwise, report the invalid character. if c == '[': - msg = "Malformed attribute selector at position {}".format(index) + msg = f"Malformed attribute selector at position {index}" elif c == '.': - msg = "Malformed class selector at position {}".format(index) + msg = f"Malformed class selector at position {index}" elif c == '#': - msg = "Malformed id selector at position {}".format(index) + msg = f"Malformed id selector at position {index}" elif c == ':': - msg = "Malformed pseudo-class selector at position {}".format(index) + msg = f"Malformed pseudo-class selector at position {index}" else: - msg = "Invalid character {!r} position {}".format(c, index) + msg = f"Invalid character {c!r} position {index}" raise SelectorSyntaxError(msg, self.pattern, index) if self.debug: # pragma: no cover print('## END PARSING') diff --git a/lib/soupsieve/css_types.py b/lib/soupsieve/css_types.py index e5a6e49c..71a6519b 100644 --- a/lib/soupsieve/css_types.py +++ b/lib/soupsieve/css_types.py @@ -1,7 +1,8 @@ """CSS selector structure items.""" +from __future__ import annotations import copyreg from .pretty import pretty -from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping +from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping __all__ = ( 'Selector', @@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400 class Immutable: """Immutable.""" - __slots__: Tuple[str, ...] = ('_hash',) + __slots__: tuple[str, ...] = ('_hash',) _hash: int @@ -44,11 +45,11 @@ class Immutable: for k, v in kwargs.items(): temp.append(type(v)) temp.append(v) - super(Immutable, self).__setattr__(k, v) - super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) + super().__setattr__(k, v) + super().__setattr__('_hash', hash(tuple(temp))) @classmethod - def __base__(cls) -> "Type[Immutable]": + def __base__(cls) -> type[Immutable]: """Get base class.""" return cls @@ -58,7 +59,7 @@ class Immutable: return ( isinstance(other, self.__base__()) and - all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) + all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash') ) def __ne__(self, other: Any) -> bool: @@ -66,7 +67,7 @@ class Immutable: return ( not isinstance(other, self.__base__()) or - any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) + any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash') ) def __hash__(self) -> int: @@ -77,14 +78,13 @@ class Immutable: def __setattr__(self, name: str, value: Any) -> None: """Prevent mutability.""" - raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) + raise AttributeError(f"'{self.__class__.__name__}' is immutable") def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "{}({})".format( - self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) - ) + r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]]) + return f"{self.__class__.__name__}({r})" __str__ = __repr__ @@ -99,7 +99,7 @@ class ImmutableDict(Mapping[Any, Any]): def __init__( self, - arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]] + arg: dict[Any, Any] | Iterable[tuple[Any, Any]] ) -> None: """Initialize.""" @@ -107,14 +107,14 @@ class ImmutableDict(Mapping[Any, Any]): self._d = dict(arg) self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())])) - def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None: + def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None: """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, Hashable) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + if not all(isinstance(v, Hashable) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} values must be hashable') def __iter__(self) -> Iterator[Any]: """Iterator.""" @@ -139,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]): def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "{!r}".format(self._d) + return f"{self._d!r}" __str__ = __repr__ @@ -147,37 +147,37 @@ class ImmutableDict(Mapping[Any, Any]): class Namespaces(ImmutableDict): """Namespaces.""" - def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None: """Initialize.""" super().__init__(arg) - def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None: """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, str) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): - raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + if not all(isinstance(v, str) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings') class CustomSelectors(ImmutableDict): """Custom selectors.""" - def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None: """Initialize.""" super().__init__(arg) - def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None: """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, str) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): - raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + if not all(isinstance(v, str) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings') class Selector(Immutable): @@ -188,30 +188,30 @@ class Selector(Immutable): 'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash' ) - tag: Optional['SelectorTag'] - ids: Tuple[str, ...] - classes: Tuple[str, ...] - attributes: Tuple['SelectorAttribute', ...] - nth: Tuple['SelectorNth', ...] - selectors: Tuple['SelectorList', ...] - relation: 'SelectorList' - rel_type: Optional[str] - contains: Tuple['SelectorContains', ...] - lang: Tuple['SelectorLang', ...] + tag: SelectorTag | None + ids: tuple[str, ...] + classes: tuple[str, ...] + attributes: tuple[SelectorAttribute, ...] + nth: tuple[SelectorNth, ...] + selectors: tuple[SelectorList, ...] + relation: SelectorList + rel_type: str | None + contains: tuple[SelectorContains, ...] + lang: tuple[SelectorLang, ...] flags: int def __init__( self, - tag: Optional['SelectorTag'], - ids: Tuple[str, ...], - classes: Tuple[str, ...], - attributes: Tuple['SelectorAttribute', ...], - nth: Tuple['SelectorNth', ...], - selectors: Tuple['SelectorList', ...], - relation: 'SelectorList', - rel_type: Optional[str], - contains: Tuple['SelectorContains', ...], - lang: Tuple['SelectorLang', ...], + tag: SelectorTag | None, + ids: tuple[str, ...], + classes: tuple[str, ...], + attributes: tuple[SelectorAttribute, ...], + nth: tuple[SelectorNth, ...], + selectors: tuple[SelectorList, ...], + relation: SelectorList, + rel_type: str | None, + contains: tuple[SelectorContains, ...], + lang: tuple[SelectorLang, ...], flags: int ): """Initialize.""" @@ -246,9 +246,9 @@ class SelectorTag(Immutable): __slots__ = ("name", "prefix", "_hash") name: str - prefix: Optional[str] + prefix: str | None - def __init__(self, name: str, prefix: Optional[str]) -> None: + def __init__(self, name: str, prefix: str | None) -> None: """Initialize.""" super().__init__(name=name, prefix=prefix) @@ -261,15 +261,15 @@ class SelectorAttribute(Immutable): attribute: str prefix: str - pattern: Optional[Pattern[str]] - xml_type_pattern: Optional[Pattern[str]] + pattern: Pattern[str] | None + xml_type_pattern: Pattern[str] | None def __init__( self, attribute: str, prefix: str, - pattern: Optional[Pattern[str]], - xml_type_pattern: Optional[Pattern[str]] + pattern: Pattern[str] | None, + xml_type_pattern: Pattern[str] | None ) -> None: """Initialize.""" @@ -286,7 +286,7 @@ class SelectorContains(Immutable): __slots__ = ("text", "own", "_hash") - text: Tuple[str, ...] + text: tuple[str, ...] own: bool def __init__(self, text: Iterable[str], own: bool) -> None: @@ -305,9 +305,9 @@ class SelectorNth(Immutable): b: int of_type: bool last: bool - selectors: 'SelectorList' + selectors: SelectorList - def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None: + def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None: """Initialize.""" super().__init__( @@ -325,7 +325,7 @@ class SelectorLang(Immutable): __slots__ = ("languages", "_hash",) - languages: Tuple[str, ...] + languages: tuple[str, ...] def __init__(self, languages: Iterable[str]): """Initialize.""" @@ -353,25 +353,25 @@ class SelectorList(Immutable): __slots__ = ("selectors", "is_not", "is_html", "_hash") - selectors: Tuple[Union['Selector', 'SelectorNull'], ...] + selectors: tuple[Selector | SelectorNull, ...] is_not: bool is_html: bool def __init__( self, - selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None, + selectors: Iterable[Selector | SelectorNull] | None = None, is_not: bool = False, is_html: bool = False ) -> None: """Initialize.""" super().__init__( - selectors=tuple(selectors) if selectors is not None else tuple(), + selectors=tuple(selectors) if selectors is not None else (), is_not=is_not, is_html=is_html ) - def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]: + def __iter__(self) -> Iterator[Selector | SelectorNull]: """Iterator.""" return iter(self.selectors) @@ -381,7 +381,7 @@ class SelectorList(Immutable): return len(self.selectors) - def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']: + def __getitem__(self, index: int) -> Selector | SelectorNull: """Get item.""" return self.selectors[index] diff --git a/lib/soupsieve/pretty.py b/lib/soupsieve/pretty.py index 57d16c97..193db05e 100644 --- a/lib/soupsieve/pretty.py +++ b/lib/soupsieve/pretty.py @@ -10,7 +10,7 @@ The format and various output types is fairly known (though it hasn't been tested extensively to make sure we aren't missing corners). Example: - +------- ``` >>> import soupsieve as sv >>> sv.compile('this > that.class[name=value]').selectors.pretty() @@ -64,7 +64,9 @@ SelectorList( is_not=False, is_html=False) ``` + """ +from __future__ import annotations import re from typing import Any @@ -122,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover index = m.end(0) if name in ('class', 'lstrt', 'dstrt', 'tstrt'): indent += 4 - output.append('{}\n{}'.format(m.group(0), " " * indent)) + output.append(f'{m.group(0)}\n{" " * indent}') elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): output.append(m.group(0)) elif name in ('lend', 'dend', 'tend'): indent -= 4 output.append(m.group(0)) elif name in ('sep',): - output.append('{}\n{}'.format(m.group(1), " " * indent)) + output.append(f'{m.group(1)}\n{" " * indent}') elif name in ('dsep',): - output.append('{} '.format(m.group(1))) + output.append(f'{m.group(1)} ') break return ''.join(output) diff --git a/lib/soupsieve/util.py b/lib/soupsieve/util.py index 2b1ed24b..9b2e64df 100644 --- a/lib/soupsieve/util.py +++ b/lib/soupsieve/util.py @@ -1,8 +1,9 @@ """Utility.""" +from __future__ import annotations from functools import wraps, lru_cache import warnings import re -from typing import Callable, Any, Optional, Tuple, List +from typing import Callable, Any DEBUG = 0x00001 @@ -26,7 +27,7 @@ def lower(string: str) -> str: class SelectorSyntaxError(Exception): """Syntax error in a CSS selector.""" - def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None: + def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None: """Initialize.""" self.line = None @@ -36,7 +37,7 @@ class SelectorSyntaxError(Exception): if pattern is not None and index is not None: # Format pattern to show line and column position self.context, self.line, self.col = get_pattern_context(pattern, index) - msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) + msg = f'{msg}\n line {self.line}:\n{self.context}' super().__init__(msg) @@ -75,15 +76,15 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co ) -def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]: +def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]: """Get the pattern context.""" last = 0 current_line = 1 col = 1 - text = [] # type: List[str] + text = [] # type: list[str] line = 1 - offset = None # type: Optional[int] + offset = None # type: int | None # Split pattern by newline and handle the text before the newline for m in RE_PATTERN_LINE_SPLIT.finditer(pattern): @@ -104,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]: # we will render the output with just `\n`. We will still log the column # correctly though. text.append('\n') - text.append('{}{}'.format(indent, linetext)) + text.append(f'{indent}{linetext}') if offset is not None: text.append('\n') text.append(' ' * (col + offset) + '^') diff --git a/lib/soupsieve_old/__init__.py b/lib/soupsieve_old/__init__.py new file mode 100644 index 00000000..c89b7002 --- /dev/null +++ b/lib/soupsieve_old/__init__.py @@ -0,0 +1,166 @@ +""" +Soup Sieve. + +A CSS selector filter for BeautifulSoup4. + +MIT License + +Copyright (c) 2018 Isaac Muse + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from .__meta__ import __version__, __version_info__ # noqa: F401 +from . import css_parser as cp +from . import css_match as cm +from . import css_types as ct +from .util import DEBUG, SelectorSyntaxError # noqa: F401 +import bs4 # type: ignore[import] +from typing import Dict, Optional, Any, List, Iterator, Iterable + +__all__ = ( + 'DEBUG', 'SelectorSyntaxError', 'SoupSieve', + 'closest', 'compile', 'filter', 'iselect', + 'match', 'select', 'select_one' +) + +SoupSieve = cm.SoupSieve + + +def compile( # noqa: A001 + pattern: str, + namespaces: Optional[Dict[str, str]] = None, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> cm.SoupSieve: + """Compile CSS pattern.""" + + ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces] + cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors] + + if isinstance(pattern, SoupSieve): + if flags: + raise ValueError("Cannot process 'flags' argument on a compiled selector list") + elif namespaces is not None: + raise ValueError("Cannot process 'namespaces' argument on a compiled selector list") + elif custom is not None: + raise ValueError("Cannot process 'custom' argument on a compiled selector list") + return pattern + + return cp._cached_css_compile(pattern, ns, cs, flags) + + +def purge() -> None: + """Purge cached patterns.""" + + cp._purge_cache() + + +def closest( + select: str, + tag: 'bs4.Tag', + namespaces: Optional[Dict[str, str]] = None, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> 'bs4.Tag': + """Match closest ancestor.""" + + return compile(select, namespaces, flags, **kwargs).closest(tag) + + +def match( + select: str, + tag: 'bs4.Tag', + namespaces: Optional[Dict[str, str]] = None, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> bool: + """Match node.""" + + return compile(select, namespaces, flags, **kwargs).match(tag) + + +def filter( # noqa: A001 + select: str, + iterable: Iterable['bs4.Tag'], + namespaces: Optional[Dict[str, str]] = None, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> List['bs4.Tag']: + """Filter list of nodes.""" + + return compile(select, namespaces, flags, **kwargs).filter(iterable) + + +def select_one( + select: str, + tag: 'bs4.Tag', + namespaces: Optional[Dict[str, str]] = None, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> 'bs4.Tag': + """Select a single tag.""" + + return compile(select, namespaces, flags, **kwargs).select_one(tag) + + +def select( + select: str, + tag: 'bs4.Tag', + namespaces: Optional[Dict[str, str]] = None, + limit: int = 0, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> List['bs4.Tag']: + """Select the specified tags.""" + + return compile(select, namespaces, flags, **kwargs).select(tag, limit) + + +def iselect( + select: str, + tag: 'bs4.Tag', + namespaces: Optional[Dict[str, str]] = None, + limit: int = 0, + flags: int = 0, + *, + custom: Optional[Dict[str, str]] = None, + **kwargs: Any +) -> Iterator['bs4.Tag']: + """Iterate the specified tags.""" + + for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): + yield el + + +def escape(ident: str) -> str: + """Escape identifier.""" + + return cp.escape(ident) diff --git a/lib/soupsieve_old/__meta__.py b/lib/soupsieve_old/__meta__.py new file mode 100644 index 00000000..2d769fbf --- /dev/null +++ b/lib/soupsieve_old/__meta__.py @@ -0,0 +1,196 @@ +"""Meta related things.""" +from collections import namedtuple +import re + +RE_VER = re.compile( + r'''(?x) + (?P\d+)(?:\.(?P\d+))?(?:\.(?P\d+))? + (?:(?Pa|b|rc)(?P
\d+))?
+    (?:\.post(?P\d+))?
+    (?:\.dev(?P\d+))?
+    '''
+)
+
+REL_MAP = {
+    ".dev": "",
+    ".dev-alpha": "a",
+    ".dev-beta": "b",
+    ".dev-candidate": "rc",
+    "alpha": "a",
+    "beta": "b",
+    "candidate": "rc",
+    "final": ""
+}
+
+DEV_STATUS = {
+    ".dev": "2 - Pre-Alpha",
+    ".dev-alpha": "2 - Pre-Alpha",
+    ".dev-beta": "2 - Pre-Alpha",
+    ".dev-candidate": "2 - Pre-Alpha",
+    "alpha": "3 - Alpha",
+    "beta": "4 - Beta",
+    "candidate": "4 - Beta",
+    "final": "5 - Production/Stable"
+}
+
+PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
+
+
+class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
+    """
+    Get the version (PEP 440).
+
+    A biased approach to the PEP 440 semantic version.
+
+    Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
+      (major, minor, micro, release type, pre-release build, post-release build, development release build)
+    Release types are named in is such a way they are comparable with ease.
+    Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
+    development status for setup files.
+
+    How it works (currently):
+
+    - You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
+    - To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
+      The dot is used to ensure all development specifiers are sorted before `alpha`.
+      You can specify a `dev` number for development builds, but do not have to as implicit development releases
+      are allowed.
+    - You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
+      allow implicit prereleases.
+    - You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
+      are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
+      noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
+      does not allow implicit post releases.
+    - It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
+
+    Acceptable version releases:
+
+    ```
+    Version(1, 0, 0, "final")                    1.0
+    Version(1, 2, 0, "final")                    1.2
+    Version(1, 2, 3, "final")                    1.2.3
+    Version(1, 2, 0, ".dev-alpha", pre=4)        1.2a4
+    Version(1, 2, 0, ".dev-beta", pre=4)         1.2b4
+    Version(1, 2, 0, ".dev-candidate", pre=4)    1.2rc4
+    Version(1, 2, 0, "final", post=1)            1.2.post1
+    Version(1, 2, 3, ".dev")                     1.2.3.dev0
+    Version(1, 2, 3, ".dev", dev=1)              1.2.3.dev1
+    ```
+
+    """
+
+    def __new__(
+        cls,
+        major: int, minor: int, micro: int, release: str = "final",
+        pre: int = 0, post: int = 0, dev: int = 0
+    ) -> "Version":
+        """Validate version info."""
+
+        # Ensure all parts are positive integers.
+        for value in (major, minor, micro, pre, post):
+            if not (isinstance(value, int) and value >= 0):
+                raise ValueError("All version parts except 'release' should be integers.")
+
+        if release not in REL_MAP:
+            raise ValueError("'{}' is not a valid release type.".format(release))
+
+        # Ensure valid pre-release (we do not allow implicit pre-releases).
+        if ".dev-candidate" < release < "final":
+            if pre == 0:
+                raise ValueError("Implicit pre-releases not allowed.")
+            elif dev:
+                raise ValueError("Version is not a development release.")
+            elif post:
+                raise ValueError("Post-releases are not allowed with pre-releases.")
+
+        # Ensure valid development or development/pre release
+        elif release < "alpha":
+            if release > ".dev" and pre == 0:
+                raise ValueError("Implicit pre-release not allowed.")
+            elif post:
+                raise ValueError("Post-releases are not allowed with pre-releases.")
+
+        # Ensure a valid normal release
+        else:
+            if pre:
+                raise ValueError("Version is not a pre-release.")
+            elif dev:
+                raise ValueError("Version is not a development release.")
+
+        return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
+
+    def _is_pre(self) -> bool:
+        """Is prerelease."""
+
+        return bool(self.pre > 0)
+
+    def _is_dev(self) -> bool:
+        """Is development."""
+
+        return bool(self.release < "alpha")
+
+    def _is_post(self) -> bool:
+        """Is post."""
+
+        return bool(self.post > 0)
+
+    def _get_dev_status(self) -> str:  # pragma: no cover
+        """Get development status string."""
+
+        return DEV_STATUS[self.release]
+
+    def _get_canonical(self) -> str:
+        """Get the canonical output string."""
+
+        # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
+        if self.micro == 0:
+            ver = "{}.{}".format(self.major, self.minor)
+        else:
+            ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
+        if self._is_pre():
+            ver += '{}{}'.format(REL_MAP[self.release], self.pre)
+        if self._is_post():
+            ver += ".post{}".format(self.post)
+        if self._is_dev():
+            ver += ".dev{}".format(self.dev)
+
+        return ver
+
+
+def parse_version(ver: str) -> Version:
+    """Parse version into a comparable Version tuple."""
+
+    m = RE_VER.match(ver)
+
+    if m is None:
+        raise ValueError("'{}' is not a valid version".format(ver))
+
+    # Handle major, minor, micro
+    major = int(m.group('major'))
+    minor = int(m.group('minor')) if m.group('minor') else 0
+    micro = int(m.group('micro')) if m.group('micro') else 0
+
+    # Handle pre releases
+    if m.group('type'):
+        release = PRE_REL_MAP[m.group('type')]
+        pre = int(m.group('pre'))
+    else:
+        release = "final"
+        pre = 0
+
+    # Handle development releases
+    dev = m.group('dev') if m.group('dev') else 0
+    if m.group('dev'):
+        dev = int(m.group('dev'))
+        release = '.dev-' + release if pre else '.dev'
+    else:
+        dev = 0
+
+    # Handle post
+    post = int(m.group('post')) if m.group('post') else 0
+
+    return Version(major, minor, micro, release, pre, post, dev)
+
+
+__version_info__ = Version(2, 3, 1, "final")
+__version__ = __version_info__._get_canonical()
diff --git a/lib/soupsieve_old/css_match.py b/lib/soupsieve_old/css_match.py
new file mode 100644
index 00000000..79bb8707
--- /dev/null
+++ b/lib/soupsieve_old/css_match.py
@@ -0,0 +1,1584 @@
+"""CSS matcher."""
+from datetime import datetime
+from . import util
+import re
+from . import css_types as ct
+import unicodedata
+import bs4  # type: ignore[import]
+from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
+
+# Empty tag pattern (whitespace okay)
+RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
+
+RE_NOT_WS = re.compile('[^ \t\r\n\f]+')
+
+# Relationships
+REL_PARENT = ' '
+REL_CLOSE_PARENT = '>'
+REL_SIBLING = '~'
+REL_CLOSE_SIBLING = '+'
+
+# Relationships for :has() (forward looking)
+REL_HAS_PARENT = ': '
+REL_HAS_CLOSE_PARENT = ':>'
+REL_HAS_SIBLING = ':~'
+REL_HAS_CLOSE_SIBLING = ':+'
+
+NS_XHTML = 'http://www.w3.org/1999/xhtml'
+NS_XML = 'http://www.w3.org/XML/1998/namespace'
+
+DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL
+RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE
+
+DIR_MAP = {
+    'ltr': ct.SEL_DIR_LTR,
+    'rtl': ct.SEL_DIR_RTL,
+    'auto': 0
+}
+
+RE_NUM = re.compile(r"^(?P-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")
+RE_TIME = re.compile(r'^(?P[0-9]{2}):(?P[0-9]{2})$')
+RE_MONTH = re.compile(r'^(?P[0-9]{4,})-(?P[0-9]{2})$')
+RE_WEEK = re.compile(r'^(?P[0-9]{4,})-W(?P[0-9]{2})$')
+RE_DATE = re.compile(r'^(?P[0-9]{4,})-(?P[0-9]{2})-(?P[0-9]{2})$')
+RE_DATETIME = re.compile(
+    r'^(?P[0-9]{4,})-(?P[0-9]{2})-(?P[0-9]{2})T(?P[0-9]{2}):(?P[0-9]{2})$'
+)
+RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
+
+MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
+FEB = 2
+SHORT_MONTH = 30
+LONG_MONTH = 31
+FEB_MONTH = 28
+FEB_LEAP_MONTH = 29
+DAYS_IN_WEEK = 7
+
+
+class _FakeParent:
+    """
+    Fake parent class.
+
+    When we have a fragment with no `BeautifulSoup` document object,
+    we can't evaluate `nth` selectors properly.  Create a temporary
+    fake parent so we can traverse the root element as a child.
+    """
+
+    def __init__(self, element: 'bs4.Tag') -> None:
+        """Initialize."""
+
+        self.contents = [element]
+
+    def __len__(self) -> 'bs4.PageElement':
+        """Length."""
+
+        return len(self.contents)
+
+
+class _DocumentNav:
+    """Navigate a Beautiful Soup document."""
+
+    @classmethod
+    def assert_valid_input(cls, tag: Any) -> None:
+        """Check if valid input tag or document."""
+
+        # Fail on unexpected types.
+        if not cls.is_tag(tag):
+            raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
+
+    @staticmethod
+    def is_doc(obj: 'bs4.Tag') -> bool:
+        """Is `BeautifulSoup` object."""
+        return isinstance(obj, bs4.BeautifulSoup)
+
+    @staticmethod
+    def is_tag(obj: 'bs4.PageElement') -> bool:
+        """Is tag."""
+        return isinstance(obj, bs4.Tag)
+
+    @staticmethod
+    def is_declaration(obj: 'bs4.PageElement') -> bool:  # pragma: no cover
+        """Is declaration."""
+        return isinstance(obj, bs4.Declaration)
+
+    @staticmethod
+    def is_cdata(obj: 'bs4.PageElement') -> bool:
+        """Is CDATA."""
+        return isinstance(obj, bs4.CData)
+
+    @staticmethod
+    def is_processing_instruction(obj: 'bs4.PageElement') -> bool:  # pragma: no cover
+        """Is processing instruction."""
+        return isinstance(obj, bs4.ProcessingInstruction)
+
+    @staticmethod
+    def is_navigable_string(obj: 'bs4.PageElement') -> bool:
+        """Is navigable string."""
+        return isinstance(obj, bs4.NavigableString)
+
+    @staticmethod
+    def is_special_string(obj: 'bs4.PageElement') -> bool:
+        """Is special string."""
+        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
+
+    @classmethod
+    def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
+        """Check if node is content string."""
+
+        return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
+
+    @staticmethod
+    def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
+        """Create fake parent for a given element."""
+
+        return _FakeParent(el)
+
+    @staticmethod
+    def is_xml_tree(el: 'bs4.Tag') -> bool:
+        """Check if element (or document) is from a XML tree."""
+
+        return bool(el._is_xml)
+
+    def is_iframe(self, el: 'bs4.Tag') -> bool:
+        """Check if element is an `iframe`."""
+
+        return bool(
+            ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
+            self.is_html_tag(el)  # type: ignore[attr-defined]
+        )
+
+    def is_root(self, el: 'bs4.Tag') -> bool:
+        """
+        Return whether element is a root element.
+
+        We check that the element is the root of the tree (which we have already pre-calculated),
+        and we check if it is the root element under an `iframe`.
+        """
+
+        root = self.root and self.root is el  # type: ignore[attr-defined]
+        if not root:
+            parent = self.get_parent(el)
+            root = parent is not None and self.is_html and self.is_iframe(parent)  # type: ignore[attr-defined]
+        return root
+
+    def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
+        """Get contents or contents in reverse."""
+        if not no_iframe or not self.is_iframe(el):
+            for content in el.contents:
+                yield content
+
+    def get_children(
+        self,
+        el: 'bs4.Tag',
+        start: Optional[int] = None,
+        reverse: bool = False,
+        tags: bool = True,
+        no_iframe: bool = False
+    ) -> Iterator['bs4.PageElement']:
+        """Get children."""
+
+        if not no_iframe or not self.is_iframe(el):
+            last = len(el.contents) - 1
+            if start is None:
+                index = last if reverse else 0
+            else:
+                index = start
+            end = -1 if reverse else last + 1
+            incr = -1 if reverse else 1
+
+            if 0 <= index <= last:
+                while index != end:
+                    node = el.contents[index]
+                    index += incr
+                    if not tags or self.is_tag(node):
+                        yield node
+
+    def get_descendants(
+        self,
+        el: 'bs4.Tag',
+        tags: bool = True,
+        no_iframe: bool = False
+    ) -> Iterator['bs4.PageElement']:
+        """Get descendants."""
+
+        if not no_iframe or not self.is_iframe(el):
+            next_good = None
+            for child in el.descendants:
+
+                if next_good is not None:
+                    if child is not next_good:
+                        continue
+                    next_good = None
+
+                is_tag = self.is_tag(child)
+
+                if no_iframe and is_tag and self.is_iframe(child):
+                    if child.next_sibling is not None:
+                        next_good = child.next_sibling
+                    else:
+                        last_child = child
+                        while self.is_tag(last_child) and last_child.contents:
+                            last_child = last_child.contents[-1]
+                        next_good = last_child.next_element
+                    yield child
+                    if next_good is None:
+                        break
+                    # Coverage isn't seeing this even though it's executed
+                    continue  # pragma: no cover
+
+                if not tags or is_tag:
+                    yield child
+
+    def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
+        """Get parent."""
+
+        parent = el.parent
+        if no_iframe and parent is not None and self.is_iframe(parent):
+            parent = None
+        return parent
+
+    @staticmethod
+    def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
+        """Get tag."""
+
+        return cast(Optional[str], el.name)
+
+    @staticmethod
+    def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
+        """Get prefix."""
+
+        return cast(Optional[str], el.prefix)
+
+    @staticmethod
+    def get_uri(el: 'bs4.Tag') -> Optional[str]:
+        """Get namespace `URI`."""
+
+        return cast(Optional[str], el.namespace)
+
+    @classmethod
+    def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
+        """Get next sibling tag."""
+
+        sibling = el.next_sibling
+        while tags and not cls.is_tag(sibling) and sibling is not None:
+            sibling = sibling.next_sibling
+        return sibling
+
+    @classmethod
+    def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
+        """Get previous sibling tag."""
+
+        sibling = el.previous_sibling
+        while tags and not cls.is_tag(sibling) and sibling is not None:
+            sibling = sibling.previous_sibling
+        return sibling
+
+    @staticmethod
+    def has_html_ns(el: 'bs4.Tag') -> bool:
+        """
+        Check if element has an HTML namespace.
+
+        This is a bit different than whether a element is treated as having an HTML namespace,
+        like we do in the case of `is_html_tag`.
+        """
+
+        ns = getattr(el, 'namespace') if el else None
+        return bool(ns and ns == NS_XHTML)
+
+    @staticmethod
+    def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
+        """Return namespace and attribute name without the prefix."""
+
+        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
+
+    @classmethod
+    def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
+        """Normalize the value to be a string or list of strings."""
+
+        # Treat `None` as empty string.
+        if value is None:
+            return ''
+
+        # Pass through strings
+        if (isinstance(value, str)):
+            return value
+
+        # If it's a byte string, convert it to Unicode, treating it as UTF-8.
+        if isinstance(value, bytes):
+            return value.decode("utf8")
+
+        # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
+        if isinstance(value, Sequence):
+            new_value = []
+            for v in value:
+                if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
+                    # This is most certainly a user error and will crash and burn later.
+                    # To keep things working, we'll do what we do with all objects,
+                    # And convert them to strings.
+                    new_value.append(str(v))
+                else:
+                    # Convert the child to a string
+                    new_value.append(cast(str, cls.normalize_value(v)))
+            return new_value
+
+        # Try and make anything else a string
+        return str(value)
+
+    @classmethod
+    def get_attribute_by_name(
+        cls,
+        el: 'bs4.Tag',
+        name: str,
+        default: Optional[Union[str, Sequence[str]]] = None
+    ) -> Optional[Union[str, Sequence[str]]]:
+        """Get attribute by name."""
+
+        value = default
+        if el._is_xml:
+            try:
+                value = cls.normalize_value(el.attrs[name])
+            except KeyError:
+                pass
+        else:
+            for k, v in el.attrs.items():
+                if util.lower(k) == name:
+                    value = cls.normalize_value(v)
+                    break
+        return value
+
+    @classmethod
+    def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
+        """Iterate attributes."""
+
+        for k, v in el.attrs.items():
+            yield k, cls.normalize_value(v)
+
+    @classmethod
+    def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
+        """Get classes."""
+
+        classes = cls.get_attribute_by_name(el, 'class', [])
+        if isinstance(classes, str):
+            classes = RE_NOT_WS.findall(classes)
+        return cast(Sequence[str], classes)
+
+    def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
+        """Get text."""
+
+        return ''.join(
+            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
+        )
+
+    def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
+        """Get Own Text."""
+
+        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
+
+
+class Inputs:
+    """Class for parsing and validating input items."""
+
+    @staticmethod
+    def validate_day(year: int, month: int, day: int) -> bool:
+        """Validate day."""
+
+        max_days = LONG_MONTH
+        if month == FEB:
+            max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH
+        elif month in MONTHS_30:
+            max_days = SHORT_MONTH
+        return 1 <= day <= max_days
+
+    @staticmethod
+    def validate_week(year: int, week: int) -> bool:
+        """Validate week."""
+
+        max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
+        if max_week == 1:
+            max_week = 53
+        return 1 <= week <= max_week
+
+    @staticmethod
+    def validate_month(month: int) -> bool:
+        """Validate month."""
+
+        return 1 <= month <= 12
+
+    @staticmethod
+    def validate_year(year: int) -> bool:
+        """Validate year."""
+
+        return 1 <= year
+
+    @staticmethod
+    def validate_hour(hour: int) -> bool:
+        """Validate hour."""
+
+        return 0 <= hour <= 23
+
+    @staticmethod
+    def validate_minutes(minutes: int) -> bool:
+        """Validate minutes."""
+
+        return 0 <= minutes <= 59
+
+    @classmethod
+    def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
+        """Parse the input value."""
+
+        parsed = None  # type: Optional[Tuple[float, ...]]
+        if value is None:
+            return value
+        if itype == "date":
+            m = RE_DATE.match(value)
+            if m:
+                year = int(m.group('year'), 10)
+                month = int(m.group('month'), 10)
+                day = int(m.group('day'), 10)
+                if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):
+                    parsed = (year, month, day)
+        elif itype == "month":
+            m = RE_MONTH.match(value)
+            if m:
+                year = int(m.group('year'), 10)
+                month = int(m.group('month'), 10)
+                if cls.validate_year(year) and cls.validate_month(month):
+                    parsed = (year, month)
+        elif itype == "week":
+            m = RE_WEEK.match(value)
+            if m:
+                year = int(m.group('year'), 10)
+                week = int(m.group('week'), 10)
+                if cls.validate_year(year) and cls.validate_week(year, week):
+                    parsed = (year, week)
+        elif itype == "time":
+            m = RE_TIME.match(value)
+            if m:
+                hour = int(m.group('hour'), 10)
+                minutes = int(m.group('minutes'), 10)
+                if cls.validate_hour(hour) and cls.validate_minutes(minutes):
+                    parsed = (hour, minutes)
+        elif itype == "datetime-local":
+            m = RE_DATETIME.match(value)
+            if m:
+                year = int(m.group('year'), 10)
+                month = int(m.group('month'), 10)
+                day = int(m.group('day'), 10)
+                hour = int(m.group('hour'), 10)
+                minutes = int(m.group('minutes'), 10)
+                if (
+                    cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and
+                    cls.validate_hour(hour) and cls.validate_minutes(minutes)
+                ):
+                    parsed = (year, month, day, hour, minutes)
+        elif itype in ("number", "range"):
+            m = RE_NUM.match(value)
+            if m:
+                parsed = (float(m.group('value')),)
+        return parsed
+
+
+class CSSMatch(_DocumentNav):
+    """Perform CSS matching."""
+
+    def __init__(
+        self,
+        selectors: ct.SelectorList,
+        scope: 'bs4.Tag',
+        namespaces: Optional[ct.Namespaces],
+        flags: int
+    ) -> None:
+        """Initialize."""
+
+        self.assert_valid_input(scope)
+        self.tag = scope
+        self.cached_meta_lang = []  # type: List[Tuple[str, str]]
+        self.cached_default_forms = []  # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
+        self.cached_indeterminate_forms = []  # type: List[Tuple['bs4.Tag', str, bool]]
+        self.selectors = selectors
+        self.namespaces = {} if namespaces is None else namespaces  # type: Union[ct.Namespaces, Dict[str, str]]
+        self.flags = flags
+        self.iframe_restrict = False
+
+        # Find the root element for the whole tree
+        doc = scope
+        parent = self.get_parent(doc)
+        while parent:
+            doc = parent
+            parent = self.get_parent(doc)
+        root = None
+        if not self.is_doc(doc):
+            root = doc
+        else:
+            for child in self.get_children(doc):
+                root = child
+                break
+
+        self.root = root
+        self.scope = scope if scope is not doc else root
+        self.has_html_namespace = self.has_html_ns(root)
+
+        # A document can be both XML and HTML (XHTML)
+        self.is_xml = self.is_xml_tree(doc)
+        self.is_html = not self.is_xml or self.has_html_namespace
+
+    def supports_namespaces(self) -> bool:
+        """Check if namespaces are supported in the HTML type."""
+
+        return self.is_xml or self.has_html_namespace
+
+    def get_tag_ns(self, el: 'bs4.Tag') -> str:
+        """Get tag namespace."""
+
+        if self.supports_namespaces():
+            namespace = ''
+            ns = self.get_uri(el)
+            if ns:
+                namespace = ns
+        else:
+            namespace = NS_XHTML
+        return namespace
+
+    def is_html_tag(self, el: 'bs4.Tag') -> bool:
+        """Check if tag is in HTML namespace."""
+
+        return self.get_tag_ns(el) == NS_XHTML
+
+    def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
+        """Get tag."""
+
+        name = self.get_tag_name(el)
+        return util.lower(name) if name is not None and not self.is_xml else name
+
+    def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
+        """Get prefix."""
+
+        prefix = self.get_prefix_name(el)
+        return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
+
+    def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
+        """Get directionality from element text."""
+
+        for node in self.get_children(el, tags=False):
+
+            # Analyze child text nodes
+            if self.is_tag(node):
+
+                # Avoid analyzing certain elements specified in the specification.
+                direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)
+                if (
+                    self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or
+                    not self.is_html_tag(node) or
+                    direction is not None
+                ):
+                    continue  # pragma: no cover
+
+                # Check directionality of this node's text
+                value = self.find_bidi(node)
+                if value is not None:
+                    return value
+
+                # Direction could not be determined
+                continue  # pragma: no cover
+
+            # Skip `doctype` comments, etc.
+            if self.is_special_string(node):
+                continue
+
+            # Analyze text nodes for directionality.
+            for c in node:
+                bidi = unicodedata.bidirectional(c)
+                if bidi in ('AL', 'R', 'L'):
+                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
+        return None
+
+    def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
+        """Filter the language tags."""
+
+        match = True
+        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
+        ranges = lang_range.split('-')
+        subtags = lang_tag.lower().split('-')
+        length = len(ranges)
+        rindex = 0
+        sindex = 0
+        r = ranges[rindex]
+        s = subtags[sindex]
+
+        # Primary tag needs to match
+        if r != '*' and r != s:
+            match = False
+
+        rindex += 1
+        sindex += 1
+
+        # Match until we run out of ranges
+        while match and rindex < length:
+            r = ranges[rindex]
+            try:
+                s = subtags[sindex]
+            except IndexError:
+                # Ran out of subtags,
+                # but we still have ranges
+                match = False
+                continue
+
+            # Empty range
+            if not r:
+                match = False
+                continue
+
+            # Matched range
+            elif s == r:
+                rindex += 1
+
+            # Implicit wildcard cannot match
+            # singletons
+            elif len(s) == 1:
+                match = False
+                continue
+
+            # Implicitly matched, so grab next subtag
+            sindex += 1
+
+        return match
+
+    def match_attribute_name(
+        self,
+        el: 'bs4.Tag',
+        attr: str,
+        prefix: Optional[str]
+    ) -> Optional[Union[str, Sequence[str]]]:
+        """Match attribute name and return value if it exists."""
+
+        value = None
+        if self.supports_namespaces():
+            value = None
+            # If we have not defined namespaces, we can't very well find them, so don't bother trying.
+            if prefix:
+                ns = self.namespaces.get(prefix)
+                if ns is None and prefix != '*':
+                    return None
+            else:
+                ns = None
+
+            for k, v in self.iter_attributes(el):
+
+                # Get attribute parts
+                namespace, name = self.split_namespace(el, k)
+
+                # Can't match a prefix attribute as we haven't specified one to match
+                # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.
+                if ns is None:
+                    if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):
+                        value = v
+                        break
+                    # Coverage is not finding this even though it is executed.
+                    # Adding a print statement before this (and erasing coverage) causes coverage to find the line.
+                    # Ignore the false positive message.
+                    continue  # pragma: no cover
+
+                # We can't match our desired prefix attribute as the attribute doesn't have a prefix
+                if namespace is None or ns != namespace and prefix != '*':
+                    continue
+
+                # The attribute doesn't match.
+                if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):
+                    continue
+
+                value = v
+                break
+        else:
+            for k, v in self.iter_attributes(el):
+                if util.lower(attr) != util.lower(k):
+                    continue
+                value = v
+                break
+        return value
+
+    def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
+        """Match the namespace of the element."""
+
+        match = True
+        namespace = self.get_tag_ns(el)
+        default_namespace = self.namespaces.get('')
+        tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
+        # We must match the default namespace if one is not provided
+        if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
+            match = False
+        # If we specified `|tag`, we must not have a namespace.
+        elif (tag.prefix is not None and tag.prefix == '' and namespace):
+            match = False
+        # Verify prefix matches
+        elif (
+            tag.prefix and
+            tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)
+        ):
+            match = False
+        return match
+
+    def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
+        """Match attributes."""
+
+        match = True
+        if attributes:
+            for a in attributes:
+                temp = self.match_attribute_name(el, a.attribute, a.prefix)
+                pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
+                if temp is None:
+                    match = False
+                    break
+                value = temp if isinstance(temp, str) else ' '.join(temp)
+                if pattern is None:
+                    continue
+                elif pattern.match(value) is None:
+                    match = False
+                    break
+        return match
+
+    def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
+        """Match tag name."""
+
+        name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
+        return not (
+            name is not None and
+            name not in (self.get_tag(el), '*')
+        )
+
+    def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
+        """Match the tag."""
+
+        match = True
+        if tag is not None:
+            # Verify namespace
+            if not self.match_namespace(el, tag):
+                match = False
+            if not self.match_tagname(el, tag):
+                match = False
+        return match
+
+    def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
+        """Match past relationship."""
+
+        found = False
+        # I don't think this can ever happen, but it makes `mypy` happy
+        if isinstance(relation[0], ct.SelectorNull):  # pragma: no cover
+            return found
+
+        if relation[0].rel_type == REL_PARENT:
+            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
+            while not found and parent:
+                found = self.match_selectors(parent, relation)
+                parent = self.get_parent(parent, no_iframe=self.iframe_restrict)
+        elif relation[0].rel_type == REL_CLOSE_PARENT:
+            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
+            if parent:
+                found = self.match_selectors(parent, relation)
+        elif relation[0].rel_type == REL_SIBLING:
+            sibling = self.get_previous(el)
+            while not found and sibling:
+                found = self.match_selectors(sibling, relation)
+                sibling = self.get_previous(sibling)
+        elif relation[0].rel_type == REL_CLOSE_SIBLING:
+            sibling = self.get_previous(el)
+            if sibling and self.is_tag(sibling):
+                found = self.match_selectors(sibling, relation)
+        return found
+
+    def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
+        """Match future child."""
+
+        match = False
+        if recursive:
+            children = self.get_descendants  # type: Callable[..., Iterator['bs4.Tag']]
+        else:
+            children = self.get_children
+        for child in children(parent, no_iframe=self.iframe_restrict):
+            match = self.match_selectors(child, relation)
+            if match:
+                break
+        return match
+
+    def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
+        """Match future relationship."""
+
+        found = False
+        # I don't think this can ever happen, but it makes `mypy` happy
+        if isinstance(relation[0], ct.SelectorNull):  # pragma: no cover
+            return found
+
+        if relation[0].rel_type == REL_HAS_PARENT:
+            found = self.match_future_child(el, relation, True)
+        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
+            found = self.match_future_child(el, relation)
+        elif relation[0].rel_type == REL_HAS_SIBLING:
+            sibling = self.get_next(el)
+            while not found and sibling:
+                found = self.match_selectors(sibling, relation)
+                sibling = self.get_next(sibling)
+        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
+            sibling = self.get_next(el)
+            if sibling and self.is_tag(sibling):
+                found = self.match_selectors(sibling, relation)
+        return found
+
+    def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
+        """Match relationship to other elements."""
+
+        found = False
+
+        if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
+            return found
+
+        if relation[0].rel_type.startswith(':'):
+            found = self.match_future_relations(el, relation)
+        else:
+            found = self.match_past_relations(el, relation)
+
+        return found
+
+    def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
+        """Match element's ID."""
+
+        found = True
+        for i in ids:
+            if i != self.get_attribute_by_name(el, 'id', ''):
+                found = False
+                break
+        return found
+
+    def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
+        """Match element's classes."""
+
+        current_classes = self.get_classes(el)
+        found = True
+        for c in classes:
+            if c not in current_classes:
+                found = False
+                break
+        return found
+
+    def match_root(self, el: 'bs4.Tag') -> bool:
+        """Match element as root."""
+
+        is_root = self.is_root(el)
+        if is_root:
+            sibling = self.get_previous(el, tags=False)
+            while is_root and sibling is not None:
+                if (
+                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+                    self.is_cdata(sibling)
+                ):
+                    is_root = False
+                else:
+                    sibling = self.get_previous(sibling, tags=False)
+        if is_root:
+            sibling = self.get_next(el, tags=False)
+            while is_root and sibling is not None:
+                if (
+                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+                    self.is_cdata(sibling)
+                ):
+                    is_root = False
+                else:
+                    sibling = self.get_next(sibling, tags=False)
+        return is_root
+
+    def match_scope(self, el: 'bs4.Tag') -> bool:
+        """Match element as scope."""
+
+        return self.scope is el
+
+    def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
+        """Match tag type for `nth` matches."""
+
+        return(
+            (self.get_tag(child) == self.get_tag(el)) and
+            (self.get_tag_ns(child) == self.get_tag_ns(el))
+        )
+
+    def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
+        """Match `nth` elements."""
+
+        matched = True
+
+        for n in nth:
+            matched = False
+            if n.selectors and not self.match_selectors(el, n.selectors):
+                break
+            parent = self.get_parent(el)
+            if parent is None:
+                parent = self.create_fake_parent(el)
+            last = n.last
+            last_index = len(parent) - 1
+            index = last_index if last else 0
+            relative_index = 0
+            a = n.a
+            b = n.b
+            var = n.n
+            count = 0
+            count_incr = 1
+            factor = -1 if last else 1
+            idx = last_idx = a * count + b if var else a
+
+            # We can only adjust bounds within a variable index
+            if var:
+                # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.
+                # Otherwise, increment to try to get in bounds.
+                adjust = None
+                while idx < 1 or idx > last_index:
+                    if idx < 0:
+                        diff_low = 0 - idx
+                        if adjust is not None and adjust == 1:
+                            break
+                        adjust = -1
+                        count += count_incr
+                        idx = last_idx = a * count + b if var else a
+                        diff = 0 - idx
+                        if diff >= diff_low:
+                            break
+                    else:
+                        diff_high = idx - last_index
+                        if adjust is not None and adjust == -1:
+                            break
+                        adjust = 1
+                        count += count_incr
+                        idx = last_idx = a * count + b if var else a
+                        diff = idx - last_index
+                        if diff >= diff_high:
+                            break
+                        diff_high = diff
+
+                # If a < 0, our count is working backwards, so floor the index by increasing the count.
+                # Find the count that yields the lowest, in bound value and use that.
+                # Lastly reverse count increment so that we'll increase our index.
+                lowest = count
+                if a < 0:
+                    while idx >= 1:
+                        lowest = count
+                        count += count_incr
+                        idx = last_idx = a * count + b if var else a
+                    count_incr = -1
+                count = lowest
+                idx = last_idx = a * count + b if var else a
+
+            # Evaluate elements while our calculated nth index is still in range
+            while 1 <= idx <= last_index + 1:
+                child = None
+                # Evaluate while our child index is still in range.
+                for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):
+                    index += factor
+                    if not self.is_tag(child):
+                        continue
+                    # Handle `of S` in `nth-child`
+                    if n.selectors and not self.match_selectors(child, n.selectors):
+                        continue
+                    # Handle `of-type`
+                    if n.of_type and not self.match_nth_tag_type(el, child):
+                        continue
+                    relative_index += 1
+                    if relative_index == idx:
+                        if child is el:
+                            matched = True
+                        else:
+                            break
+                    if child is el:
+                        break
+                if child is el:
+                    break
+                last_idx = idx
+                count += count_incr
+                if count < 0:
+                    # Count is counting down and has now ventured into invalid territory.
+                    break
+                idx = a * count + b if var else a
+                if last_idx == idx:
+                    break
+            if not matched:
+                break
+        return matched
+
+    def match_empty(self, el: 'bs4.Tag') -> bool:
+        """Check if element is empty (if requested)."""
+
+        is_empty = True
+        for child in self.get_children(el, tags=False):
+            if self.is_tag(child):
+                is_empty = False
+                break
+            elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):
+                is_empty = False
+                break
+        return is_empty
+
+    def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
+        """Match selectors."""
+
+        match = True
+        for sel in selectors:
+            if not self.match_selectors(el, sel):
+                match = False
+        return match
+
+    def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
+        """Match element if it contains text."""
+
+        match = True
+        content = None  # type: Optional[Union[str, Sequence[str]]]
+        for contain_list in contains:
+            if content is None:
+                if contain_list.own:
+                    content = self.get_own_text(el, no_iframe=self.is_html)
+                else:
+                    content = self.get_text(el, no_iframe=self.is_html)
+            found = False
+            for text in contain_list.text:
+                if contain_list.own:
+                    for c in content:
+                        if text in c:
+                            found = True
+                            break
+                    if found:
+                        break
+                else:
+                    if text in content:
+                        found = True
+                        break
+            if not found:
+                match = False
+        return match
+
+    def match_default(self, el: 'bs4.Tag') -> bool:
+        """Match default."""
+
+        match = False
+
+        # Find this input's form
+        form = None
+        parent = self.get_parent(el, no_iframe=True)
+        while parent and form is None:
+            if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
+                form = parent
+            else:
+                parent = self.get_parent(parent, no_iframe=True)
+
+        # Look in form cache to see if we've already located its default button
+        found_form = False
+        for f, t in self.cached_default_forms:
+            if f is form:
+                found_form = True
+                if t is el:
+                    match = True
+                break
+
+        # We didn't have the form cached, so look for its default button
+        if not found_form:
+            for child in self.get_descendants(form, no_iframe=True):
+                name = self.get_tag(child)
+                # Can't do nested forms (haven't figured out why we never hit this)
+                if name == 'form':  # pragma: no cover
+                    break
+                if name in ('input', 'button'):
+                    v = self.get_attribute_by_name(child, 'type', '')
+                    if v and util.lower(v) == 'submit':
+                        self.cached_default_forms.append((form, child))
+                        if el is child:
+                            match = True
+                        break
+        return match
+
+    def match_indeterminate(self, el: 'bs4.Tag') -> bool:
+        """Match default."""
+
+        match = False
+        name = cast(str, self.get_attribute_by_name(el, 'name'))
+
+        def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
+            """Find this input's form."""
+            form = None
+            parent = self.get_parent(el, no_iframe=True)
+            while form is None:
+                if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
+                    form = parent
+                    break
+                last_parent = parent
+                parent = self.get_parent(parent, no_iframe=True)
+                if parent is None:
+                    form = last_parent
+                    break
+            return form
+
+        form = get_parent_form(el)
+
+        # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate
+        found_form = False
+        for f, n, i in self.cached_indeterminate_forms:
+            if f is form and n == name:
+                found_form = True
+                if i is True:
+                    match = True
+                break
+
+        # We didn't have the form cached, so validate that the radio button is indeterminate
+        if not found_form:
+            checked = False
+            for child in self.get_descendants(form, no_iframe=True):
+                if child is el:
+                    continue
+                tag_name = self.get_tag(child)
+                if tag_name == 'input':
+                    is_radio = False
+                    check = False
+                    has_name = False
+                    for k, v in self.iter_attributes(child):
+                        if util.lower(k) == 'type' and util.lower(v) == 'radio':
+                            is_radio = True
+                        elif util.lower(k) == 'name' and v == name:
+                            has_name = True
+                        elif util.lower(k) == 'checked':
+                            check = True
+                        if is_radio and check and has_name and get_parent_form(child) is form:
+                            checked = True
+                            break
+                if checked:
+                    break
+            if not checked:
+                match = True
+            self.cached_indeterminate_forms.append((form, name, match))
+
+        return match
+
+    def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
+        """Match languages."""
+
+        match = False
+        has_ns = self.supports_namespaces()
+        root = self.root
+        has_html_namespace = self.has_html_namespace
+
+        # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.
+        parent = el
+        found_lang = None
+        last = None
+        while not found_lang:
+            has_html_ns = self.has_html_ns(parent)
+            for k, v in self.iter_attributes(parent):
+                attr_ns, attr = self.split_namespace(parent, k)
+                if (
+                    ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or
+                    (
+                        has_ns and not has_html_ns and attr_ns == NS_XML and
+                        (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'
+                    )
+                ):
+                    found_lang = v
+                    break
+            last = parent
+            parent = self.get_parent(parent, no_iframe=self.is_html)
+
+            if parent is None:
+                root = last
+                has_html_namespace = self.has_html_ns(root)
+                parent = last
+                break
+
+        # Use cached meta language.
+        if not found_lang and self.cached_meta_lang:
+            for cache in self.cached_meta_lang:
+                if root is cache[0]:
+                    found_lang = cache[1]
+
+        # If we couldn't find a language, and the document is HTML, look to meta to determine language.
+        if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):
+            # Find head
+            found = False
+            for tag in ('html', 'head'):
+                found = False
+                for child in self.get_children(parent, no_iframe=self.is_html):
+                    if self.get_tag(child) == tag and self.is_html_tag(child):
+                        found = True
+                        parent = child
+                        break
+                if not found:  # pragma: no cover
+                    break
+
+            # Search meta tags
+            if found:
+                for child in parent:
+                    if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):
+                        c_lang = False
+                        content = None
+                        for k, v in self.iter_attributes(child):
+                            if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':
+                                c_lang = True
+                            if util.lower(k) == 'content':
+                                content = v
+                            if c_lang and content:
+                                found_lang = content
+                                self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
+                                break
+                    if found_lang:
+                        break
+                if not found_lang:
+                    self.cached_meta_lang.append((cast(str, root), ''))
+
+        # If we determined a language, compare.
+        if found_lang:
+            for patterns in langs:
+                match = False
+                for pattern in patterns:
+                    if self.extended_language_filter(pattern, cast(str, found_lang)):
+                        match = True
+                if not match:
+                    break
+
+        return match
+
+    def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
+        """Check directionality."""
+
+        # If we have to match both left and right, we can't match either.
+        if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:
+            return False
+
+        if el is None or not self.is_html_tag(el):
+            return False
+
+        # Element has defined direction of left to right or right to left
+        direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)
+        if direction not in (None, 0):
+            return direction == directionality
+
+        # Element is the document element (the root) and no direction assigned, assume left to right.
+        is_root = self.is_root(el)
+        if is_root and direction is None:
+            return ct.SEL_DIR_LTR == directionality
+
+        # If `input[type=telephone]` and no direction is assigned, assume left to right.
+        name = self.get_tag(el)
+        is_input = name == 'input'
+        is_textarea = name == 'textarea'
+        is_bdi = name == 'bdi'
+        itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''
+        if is_input and itype == 'tel' and direction is None:
+            return ct.SEL_DIR_LTR == directionality
+
+        # Auto handling for text inputs
+        if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
+            if is_textarea:
+                temp = []
+                for node in self.get_contents(el, no_iframe=True):
+                    if self.is_content_string(node):
+                        temp.append(node)
+                value = ''.join(temp)
+            else:
+                value = cast(str, self.get_attribute_by_name(el, 'value', ''))
+            if value:
+                for c in value:
+                    bidi = unicodedata.bidirectional(c)
+                    if bidi in ('AL', 'R', 'L'):
+                        direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
+                        return direction == directionality
+                # Assume left to right
+                return ct.SEL_DIR_LTR == directionality
+            elif is_root:
+                return ct.SEL_DIR_LTR == directionality
+            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+        # Auto handling for `bdi` and other non text inputs.
+        if (is_bdi and direction is None) or direction == 0:
+            direction = self.find_bidi(el)
+            if direction is not None:
+                return direction == directionality
+            elif is_root:
+                return ct.SEL_DIR_LTR == directionality
+            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+        # Match parents direction
+        return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
+
+    def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
+        """
+        Match range.
+
+        Behavior is modeled after what we see in browsers. Browsers seem to evaluate
+        if the value is out of range, and if not, it is in range. So a missing value
+        will not evaluate out of range; therefore, value is in range. Personally, I
+        feel like this should evaluate as neither in or out of range.
+        """
+
+        out_of_range = False
+
+        itype = util.lower(self.get_attribute_by_name(el, 'type'))
+        mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
+        mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
+
+        # There is no valid min or max, so we cannot evaluate a range
+        if mn is None and mx is None:
+            return False
+
+        value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
+        if value is not None:
+            if itype in ("date", "datetime-local", "month", "week", "number", "range"):
+                if mn is not None and value < mn:
+                    out_of_range = True
+                if not out_of_range and mx is not None and value > mx:
+                    out_of_range = True
+            elif itype == "time":
+                if mn is not None and mx is not None and mn > mx:
+                    # Time is periodic, so this is a reversed/discontinuous range
+                    if value < mn and value > mx:
+                        out_of_range = True
+                else:
+                    if mn is not None and value < mn:
+                        out_of_range = True
+                    if not out_of_range and mx is not None and value > mx:
+                        out_of_range = True
+
+        return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
+
+    def match_defined(self, el: 'bs4.Tag') -> bool:
+        """
+        Match defined.
+
+        `:defined` is related to custom elements in a browser.
+
+        - If the document is XML (not XHTML), all tags will match.
+        - Tags that are not custom (don't have a hyphen) are marked defined.
+        - If the tag has a prefix (without or without a namespace), it will not match.
+
+        This is of course requires the parser to provide us with the proper prefix and namespace info,
+        if it doesn't, there is nothing we can do.
+        """
+
+        name = self.get_tag(el)
+        return (
+            name is not None and (
+                name.find('-') == -1 or
+                name.find(':') != -1 or
+                self.get_prefix(el) is not None
+            )
+        )
+
+    def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
+        """
+        Match placeholder shown according to HTML spec.
+
+        - text area should be checked if they have content. A single newline does not count as content.
+
+        """
+
+        match = False
+        content = self.get_text(el)
+        if content in ('', '\n'):
+            match = True
+
+        return match
+
+    def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
+        """Check if element matches one of the selectors."""
+
+        match = False
+        is_not = selectors.is_not
+        is_html = selectors.is_html
+
+        # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.
+        if is_html:
+            namespaces = self.namespaces
+            iframe_restrict = self.iframe_restrict
+            self.namespaces = {'html': NS_XHTML}
+            self.iframe_restrict = True
+
+        if not is_html or self.is_html:
+            for selector in selectors:
+                match = is_not
+                # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)
+                if isinstance(selector, ct.SelectorNull):
+                    continue
+                # Verify tag matches
+                if not self.match_tag(el, selector.tag):
+                    continue
+                # Verify tag is defined
+                if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):
+                    continue
+                # Verify element is root
+                if selector.flags & ct.SEL_ROOT and not self.match_root(el):
+                    continue
+                # Verify element is scope
+                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
+                    continue
+                # Verify element has placeholder shown
+                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
+                    continue
+                # Verify `nth` matches
+                if not self.match_nth(el, selector.nth):
+                    continue
+                if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):
+                    continue
+                # Verify id matches
+                if selector.ids and not self.match_id(el, selector.ids):
+                    continue
+                # Verify classes match
+                if selector.classes and not self.match_classes(el, selector.classes):
+                    continue
+                # Verify attribute(s) match
+                if not self.match_attributes(el, selector.attributes):
+                    continue
+                # Verify ranges
+                if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):
+                    continue
+                # Verify language patterns
+                if selector.lang and not self.match_lang(el, selector.lang):
+                    continue
+                # Verify pseudo selector patterns
+                if selector.selectors and not self.match_subselectors(el, selector.selectors):
+                    continue
+                # Verify relationship selectors
+                if selector.relation and not self.match_relations(el, selector.relation):
+                    continue
+                # Validate that the current default selector match corresponds to the first submit button in the form
+                if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
+                    continue
+                # Validate that the unset radio button is among radio buttons with the same name in a form that are
+                # also not set.
+                if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):
+                    continue
+                # Validate element directionality
+                if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
+                    continue
+                # Validate that the tag contains the specified text.
+                if selector.contains and not self.match_contains(el, selector.contains):
+                    continue
+                match = not is_not
+                break
+
+        # Restore actual namespaces being used for external selector lists
+        if is_html:
+            self.namespaces = namespaces
+            self.iframe_restrict = iframe_restrict
+
+        return match
+
+    def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
+        """Match all tags under the targeted tag."""
+
+        lim = None if limit < 1 else limit
+
+        for child in self.get_descendants(self.tag):
+            if self.match(child):
+                yield child
+                if lim is not None:
+                    lim -= 1
+                    if lim < 1:
+                        break
+
+    def closest(self) -> Optional['bs4.Tag']:
+        """Match closest ancestor."""
+
+        current = self.tag
+        closest = None
+        while closest is None and current is not None:
+            if self.match(current):
+                closest = current
+            else:
+                current = self.get_parent(current)
+        return closest
+
+    def filter(self) -> List['bs4.Tag']:  # noqa A001
+        """Filter tag's children."""
+
+        return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
+
+    def match(self, el: 'bs4.Tag') -> bool:
+        """Match."""
+
+        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
+
+
+class SoupSieve(ct.Immutable):
+    """Compiled Soup Sieve selector matching object."""
+
+    pattern: str
+    selectors: ct.SelectorList
+    namespaces: Optional[ct.Namespaces]
+    custom: Dict[str, str]
+    flags: int
+
+    __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
+
+    def __init__(
+        self,
+        pattern: str,
+        selectors: ct.SelectorList,
+        namespaces: Optional[ct.Namespaces],
+        custom: Optional[ct.CustomSelectors],
+        flags: int
+    ):
+        """Initialize."""
+
+        super().__init__(
+            pattern=pattern,
+            selectors=selectors,
+            namespaces=namespaces,
+            custom=custom,
+            flags=flags
+        )
+
+    def match(self, tag: 'bs4.Tag') -> bool:
+        """Match."""
+
+        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
+
+    def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
+        """Match closest ancestor."""
+
+        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
+
+    def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']:  # noqa A001
+        """
+        Filter.
+
+        `CSSMatch` can cache certain searches for tags of the same document,
+        so if we are given a tag, all tags are from the same document,
+        and we can take advantage of the optimization.
+
+        Any other kind of iterable could have tags from different documents or detached tags,
+        so for those, we use a new `CSSMatch` for each item in the iterable.
+        """
+
+        if CSSMatch.is_tag(iterable):
+            return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()
+        else:
+            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
+
+    def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
+        """Select a single tag."""
+
+        tags = self.select(tag, limit=1)
+        return tags[0] if tags else None
+
+    def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
+        """Select the specified tags."""
+
+        return list(self.iselect(tag, limit))
+
+    def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
+        """Iterate the specified tags."""
+
+        for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
+            yield el
+
+    def __repr__(self) -> str:  # pragma: no cover
+        """Representation."""
+
+        return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
+            self.pattern,
+            self.namespaces,
+            self.custom,
+            self.flags
+        )
+
+    __str__ = __repr__
+
+
+ct.pickle_register(SoupSieve)
diff --git a/lib/soupsieve_old/css_parser.py b/lib/soupsieve_old/css_parser.py
new file mode 100644
index 00000000..0536b80f
--- /dev/null
+++ b/lib/soupsieve_old/css_parser.py
@@ -0,0 +1,1310 @@
+"""CSS selector parser."""
+import re
+from functools import lru_cache
+from . import util
+from . import css_match as cm
+from . import css_types as ct
+from .util import SelectorSyntaxError
+import warnings
+from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast
+
+UNICODE_REPLACEMENT_CHAR = 0xFFFD
+
+# Simple pseudo classes that take no parameters
+PSEUDO_SIMPLE = {
+    ":any-link",
+    ":empty",
+    ":first-child",
+    ":first-of-type",
+    ":in-range",
+    ":out-of-range",
+    ":last-child",
+    ":last-of-type",
+    ":link",
+    ":only-child",
+    ":only-of-type",
+    ":root",
+    ':checked',
+    ':default',
+    ':disabled',
+    ':enabled',
+    ':indeterminate',
+    ':optional',
+    ':placeholder-shown',
+    ':read-only',
+    ':read-write',
+    ':required',
+    ':scope',
+    ':defined'
+}
+
+# Supported, simple pseudo classes that match nothing in the Soup Sieve environment
+PSEUDO_SIMPLE_NO_MATCH = {
+    ':active',
+    ':current',
+    ':focus',
+    ':focus-visible',
+    ':focus-within',
+    ':future',
+    ':host',
+    ':hover',
+    ':local-link',
+    ':past',
+    ':paused',
+    ':playing',
+    ':target',
+    ':target-within',
+    ':user-invalid',
+    ':visited'
+}
+
+# Complex pseudo classes that take selector lists
+PSEUDO_COMPLEX = {
+    ':contains',
+    ':-soup-contains',
+    ':-soup-contains-own',
+    ':has',
+    ':is',
+    ':matches',
+    ':not',
+    ':where'
+}
+
+PSEUDO_COMPLEX_NO_MATCH = {
+    ':current',
+    ':host',
+    ':host-context'
+}
+
+# Complex pseudo classes that take very specific parameters and are handled special
+PSEUDO_SPECIAL = {
+    ':dir',
+    ':lang',
+    ':nth-child',
+    ':nth-last-child',
+    ':nth-last-of-type',
+    ':nth-of-type'
+}
+
+PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL
+
+# Sub-patterns parts
+# Whitespace
+NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
+WS = r'(?:[ \t]|{})'.format(NEWLINE)
+# Comments
+COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
+# Whitespace with comments included
+WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)
+# CSS escapes
+CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)
+CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)
+# CSS Identifier
+IDENTIFIER = r'''
+(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)
+(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)
+'''.format(esc=CSS_ESCAPES)
+# `nth` content
+NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)
+# Value: quoted string or identifier
+VALUE = r'''
+(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
+'''.format(nl=NEWLINE, ident=IDENTIFIER)
+# Attribute value comparison. `!=` is handled special as it is non-standard.
+ATTR = r'''
+(?:{ws}*(?P[!~^|*$]?=){ws}*(?P{value})(?:{ws}+(?P[is]))?)?{ws}*\]
+'''.format(ws=WSC, value=VALUE)
+
+# Selector patterns
+# IDs (`#id`)
+PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
+# Classes (`.class`)
+PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
+# Prefix:Tag (`prefix|tag`)
+PAT_TAG = r'(?P(?:{ident}|\*)?\|)?(?P{ident}|\*)'.format(ident=IDENTIFIER)
+# Attributes (`[attr]`, `[attr=value]`, etc.)
+PAT_ATTR = r'''
+\[{ws}*(?P(?:{ident}|\*)?\|)?(?P{ident}){attr}
+'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
+# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
+PAT_PSEUDO_CLASS = r'(?P:{ident})(?P\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
+# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
+PAT_PSEUDO_CLASS_SPECIAL = r'(?P:{ident})(?P\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)
+# Custom pseudo class (`:--custom-pseudo`)
+PAT_PSEUDO_CLASS_CUSTOM = r'(?P:(?=--){ident})'.format(ident=IDENTIFIER)
+# Closing pseudo group (`)`)
+PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
+# Pseudo element (`::pseudo-element`)
+PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
+# At rule (`@page`, etc.) (not supported)
+PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)
+# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
+PAT_PSEUDO_NTH_CHILD = r'''
+(?P{name}
+(?P{nth}|even|odd))(?:{wsc}*\)|(?P{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))
+'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)
+# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
+PAT_PSEUDO_NTH_TYPE = r'''
+(?P{name}
+(?P{nth}|even|odd)){ws}*\)
+'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)
+# Pseudo class language (`:lang("*-de", en)`)
+PAT_PSEUDO_LANG = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
+    name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
+)
+# Pseudo class direction (`:dir(ltr)`)
+PAT_PSEUDO_DIR = r'{name}(?Pltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)
+# Combining characters (`>`, `~`, ` `, `+`, `,`)
+PAT_COMBINE = r'{wsc}*?(?P[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)
+# Extra: Contains (`:contains(text)`)
+PAT_PSEUDO_CONTAINS = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
+    name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
+)
+
+# Regular expressions
+# CSS escape pattern
+RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)
+RE_CSS_STR_ESC = re.compile(
+    r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
+)
+# Pattern to break up `nth` specifiers
+RE_NTH = re.compile(
+    r'(?P[-+])?(?P[0-9]+n?|n)(?:(?<=n){ws}*(?P[-+]){ws}*(?P[0-9]+))?'.format(ws=WSC),
+    re.I
+)
+# Pattern to iterate multiple values.
+RE_VALUES = re.compile(r'(?:(?P{value})|(?P{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)
+# Whitespace checks
+RE_WS = re.compile(WS)
+RE_WS_BEGIN = re.compile('^{}*'.format(WSC))
+RE_WS_END = re.compile('{}*$'.format(WSC))
+RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)
+
+# Constants
+# List split token
+COMMA_COMBINATOR = ','
+# Relation token for descendant
+WS_COMBINATOR = " "
+
+# Parse flags
+FLG_PSEUDO = 0x01
+FLG_NOT = 0x02
+FLG_RELATIVE = 0x04
+FLG_DEFAULT = 0x08
+FLG_HTML = 0x10
+FLG_INDETERMINATE = 0x20
+FLG_OPEN = 0x40
+FLG_IN_RANGE = 0x80
+FLG_OUT_OF_RANGE = 0x100
+FLG_PLACEHOLDER_SHOWN = 0x200
+FLG_FORGIVE = 0x400
+
+# Maximum cached patterns to store
+_MAXCACHE = 500
+
+
+@lru_cache(maxsize=_MAXCACHE)
+def _cached_css_compile(
+    pattern: str,
+    namespaces: Optional[ct.Namespaces],
+    custom: Optional[ct.CustomSelectors],
+    flags: int
+) -> cm.SoupSieve:
+    """Cached CSS compile."""
+
+    custom_selectors = process_custom(custom)
+    return cm.SoupSieve(
+        pattern,
+        CSSParser(
+            pattern,
+            custom=custom_selectors,
+            flags=flags
+        ).process_selectors(),
+        namespaces,
+        custom,
+        flags
+    )
+
+
+def _purge_cache() -> None:
+    """Purge the cache."""
+
+    _cached_css_compile.cache_clear()
+
+
+def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]:
+    """Process custom."""
+
+    custom_selectors = {}
+    if custom is not None:
+        for key, value in custom.items():
+            name = util.lower(key)
+            if RE_CUSTOM.match(name) is None:
+                raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))
+            if name in custom_selectors:
+                raise KeyError("The custom selector '{}' has already been registered".format(name))
+            custom_selectors[css_unescape(name)] = value
+    return custom_selectors
+
+
+def css_unescape(content: str, string: bool = False) -> str:
+    """
+    Unescape CSS value.
+
+    Strings allow for spanning the value on multiple strings by escaping a new line.
+    """
+
+    def replace(m: Match[str]) -> str:
+        """Replace with the appropriate substitute."""
+
+        if m.group(1):
+            codepoint = int(m.group(1)[1:], 16)
+            if codepoint == 0:
+                codepoint = UNICODE_REPLACEMENT_CHAR
+            value = chr(codepoint)
+        elif m.group(2):
+            value = m.group(2)[1:]
+        elif m.group(3):
+            value = '\ufffd'
+        else:
+            value = ''
+
+        return value
+
+    return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
+
+
+def escape(ident: str) -> str:
+    """Escape identifier."""
+
+    string = []
+    length = len(ident)
+    start_dash = length > 0 and ident[0] == '-'
+    if length == 1 and start_dash:
+        # Need to escape identifier that is a single `-` with no other characters
+        string.append('\\{}'.format(ident))
+    else:
+        for index, c in enumerate(ident):
+            codepoint = ord(c)
+            if codepoint == 0x00:
+                string.append('\ufffd')
+            elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
+                string.append('\\{:x} '.format(codepoint))
+            elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
+                string.append('\\{:x} '.format(codepoint))
+            elif (
+                codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
+                (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
+            ):
+                string.append(c)
+            else:
+                string.append('\\{}'.format(c))
+    return ''.join(string)
+
+
+class SelectorPattern:
+    """Selector pattern."""
+
+    def __init__(self, name: str, pattern: str) -> None:
+        """Initialize."""
+
+        self.name = name
+        self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
+
+    def get_name(self) -> str:
+        """Get name."""
+
+        return self.name
+
+    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+        """Match the selector."""
+
+        return self.re_pattern.match(selector, index)
+
+
+class SpecialPseudoPattern(SelectorPattern):
+    """Selector pattern."""
+
+    def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None:
+        """Initialize."""
+
+        self.patterns = {}
+        for p in patterns:
+            name = p[0]
+            pattern = p[3](name, p[2])
+            for pseudo in p[1]:
+                self.patterns[pseudo] = pattern
+
+        self.matched_name = None  # type: Optional[SelectorPattern]
+        self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
+
+    def get_name(self) -> str:
+        """Get name."""
+
+        return '' if self.matched_name is None else self.matched_name.get_name()
+
+    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+        """Match the selector."""
+
+        pseudo = None
+        m = self.re_pseudo_name.match(selector, index)
+        if m:
+            name = util.lower(css_unescape(m.group('name')))
+            pattern = self.patterns.get(name)
+            if pattern:
+                pseudo = pattern.match(selector, index, flags)
+                if pseudo:
+                    self.matched_name = pattern
+
+        return pseudo
+
+
+class _Selector:
+    """
+    Intermediate selector class.
+
+    This stores selector data for a compound selector as we are acquiring them.
+    Once we are done collecting the data for a compound selector, we freeze
+    the data in an object that can be pickled and hashed.
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize."""
+
+        self.tag = kwargs.get('tag', None)  # type: Optional[ct.SelectorTag]
+        self.ids = kwargs.get('ids', [])  # type: List[str]
+        self.classes = kwargs.get('classes', [])  # type: List[str]
+        self.attributes = kwargs.get('attributes', [])  # type: List[ct.SelectorAttribute]
+        self.nth = kwargs.get('nth', [])  # type: List[ct.SelectorNth]
+        self.selectors = kwargs.get('selectors', [])  # type: List[ct.SelectorList]
+        self.relations = kwargs.get('relations', [])  # type: List[_Selector]
+        self.rel_type = kwargs.get('rel_type', None)  # type: Optional[str]
+        self.contains = kwargs.get('contains', [])  # type: List[ct.SelectorContains]
+        self.lang = kwargs.get('lang', [])  # type: List[ct.SelectorLang]
+        self.flags = kwargs.get('flags', 0)  # type: int
+        self.no_match = kwargs.get('no_match', False)  # type: bool
+
+    def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList:
+        """Freeze relation."""
+
+        if relations:
+            sel = relations[0]
+            sel.relations.extend(relations[1:])
+            return ct.SelectorList([sel.freeze()])
+        else:
+            return ct.SelectorList()
+
+    def freeze(self) -> Union[ct.Selector, ct.SelectorNull]:
+        """Freeze self."""
+
+        if self.no_match:
+            return ct.SelectorNull()
+        else:
+            return ct.Selector(
+                self.tag,
+                tuple(self.ids),
+                tuple(self.classes),
+                tuple(self.attributes),
+                tuple(self.nth),
+                tuple(self.selectors),
+                self._freeze_relations(self.relations),
+                self.rel_type,
+                tuple(self.contains),
+                tuple(self.lang),
+                self.flags
+            )
+
+    def __str__(self) -> str:  # pragma: no cover
+        """String representation."""
+
+        return (
+            '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
+            'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
+        ).format(
+            self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
+            self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
+        )
+
+    __repr__ = __str__
+
+
+class CSSParser:
+    """Parse CSS selectors."""
+
+    css_tokens = (
+        SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
+        SpecialPseudoPattern(
+            (
+                (
+                    "pseudo_contains",
+                    (':contains', ':-soup-contains', ':-soup-contains-own'),
+                    PAT_PSEUDO_CONTAINS,
+                    SelectorPattern
+                ),
+                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
+                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
+                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
+                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
+            )
+        ),
+        SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
+        SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
+        SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
+        SelectorPattern("at_rule", PAT_AT_RULE),
+        SelectorPattern("id", PAT_ID),
+        SelectorPattern("class", PAT_CLASS),
+        SelectorPattern("tag", PAT_TAG),
+        SelectorPattern("attribute", PAT_ATTR),
+        SelectorPattern("combine", PAT_COMBINE)
+    )
+
+    def __init__(
+        self,
+        selector: str,
+        custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None,
+        flags: int = 0
+    ) -> None:
+        """Initialize."""
+
+        self.pattern = selector.replace('\x00', '\ufffd')
+        self.flags = flags
+        self.debug = self.flags & util.DEBUG
+        self.custom = {} if custom is None else custom
+
+    def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Create attribute selector from the returned regex match."""
+
+        inverse = False
+        op = m.group('cmp')
+        case = util.lower(m.group('case')) if m.group('case') else None
+        ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
+        attr = css_unescape(m.group('attr_name'))
+        is_type = False
+        pattern2 = None
+        value = ''
+
+        if case:
+            flags = (re.I if case == 'i' else 0) | re.DOTALL
+        elif util.lower(attr) == 'type':
+            flags = re.I | re.DOTALL
+            is_type = True
+        else:
+            flags = re.DOTALL
+
+        if op:
+            if m.group('value').startswith(('"', "'")):
+                value = css_unescape(m.group('value')[1:-1], True)
+            else:
+                value = css_unescape(m.group('value'))
+
+        if not op:
+            # Attribute name
+            pattern = None
+        elif op.startswith('^'):
+            # Value start with
+            pattern = re.compile(r'^%s.*' % re.escape(value), flags)
+        elif op.startswith('$'):
+            # Value ends with
+            pattern = re.compile(r'.*?%s$' % re.escape(value), flags)
+        elif op.startswith('*'):
+            # Value contains
+            pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)
+        elif op.startswith('~'):
+            # Value contains word within space separated list
+            # `~=` should match nothing if it is empty or contains whitespace,
+            # so if either of these cases is present, use `[^\s\S]` which cannot be matched.
+            value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)
+            pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)
+        elif op.startswith('|'):
+            # Value starts with word in dash separated list
+            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
+        else:
+            # Value matches
+            pattern = re.compile(r'^%s$' % re.escape(value), flags)
+            if op.startswith('!'):
+                # Equivalent to `:not([attr=value])`
+                inverse = True
+        if is_type and pattern:
+            pattern2 = re.compile(pattern.pattern)
+
+        # Append the attribute selector
+        sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)
+        if inverse:
+            # If we are using `!=`, we need to nest the pattern under a `:not()`.
+            sub_sel = _Selector()
+            sub_sel.attributes.append(sel_attr)
+            not_list = ct.SelectorList([sub_sel.freeze()], True, False)
+            sel.selectors.append(not_list)
+        else:
+            sel.attributes.append(sel_attr)
+
+        has_selector = True
+        return has_selector
+
+    def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Parse tag pattern from regex match."""
+
+        prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
+        tag = css_unescape(m.group('tag_name'))
+        sel.tag = ct.SelectorTag(tag, prefix)
+        has_selector = True
+        return has_selector
+
+    def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """
+        Parse custom pseudo class alias.
+
+        Compile custom selectors as we need them. When compiling a custom selector,
+        set it to `None` in the dictionary so we can avoid an infinite loop.
+        """
+
+        pseudo = util.lower(css_unescape(m.group('name')))
+        selector = self.custom.get(pseudo)
+        if selector is None:
+            raise SelectorSyntaxError(
+                "Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
+                self.pattern,
+                m.end(0)
+            )
+
+        if not isinstance(selector, ct.SelectorList):
+            del self.custom[pseudo]
+            selector = CSSParser(
+                selector, custom=self.custom, flags=self.flags
+            ).process_selectors(flags=FLG_PSEUDO)
+            self.custom[pseudo] = selector
+
+        sel.selectors.append(selector)
+        has_selector = True
+        return has_selector
+
+    def parse_pseudo_class(
+        self,
+        sel: _Selector,
+        m: Match[str],
+        has_selector: bool,
+        iselector: Iterator[Tuple[str, Match[str]]],
+        is_html: bool
+    ) -> Tuple[bool, bool]:
+        """Parse pseudo class."""
+
+        complex_pseudo = False
+        pseudo = util.lower(css_unescape(m.group('name')))
+        if m.group('open'):
+            complex_pseudo = True
+        if complex_pseudo and pseudo in PSEUDO_COMPLEX:
+            has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))
+        elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:
+            if pseudo == ':root':
+                sel.flags |= ct.SEL_ROOT
+            elif pseudo == ':defined':
+                sel.flags |= ct.SEL_DEFINED
+                is_html = True
+            elif pseudo == ':scope':
+                sel.flags |= ct.SEL_SCOPE
+            elif pseudo == ':empty':
+                sel.flags |= ct.SEL_EMPTY
+            elif pseudo in (':link', ':any-link'):
+                sel.selectors.append(CSS_LINK)
+            elif pseudo == ':checked':
+                sel.selectors.append(CSS_CHECKED)
+            elif pseudo == ':default':
+                sel.selectors.append(CSS_DEFAULT)
+            elif pseudo == ':indeterminate':
+                sel.selectors.append(CSS_INDETERMINATE)
+            elif pseudo == ":disabled":
+                sel.selectors.append(CSS_DISABLED)
+            elif pseudo == ":enabled":
+                sel.selectors.append(CSS_ENABLED)
+            elif pseudo == ":required":
+                sel.selectors.append(CSS_REQUIRED)
+            elif pseudo == ":optional":
+                sel.selectors.append(CSS_OPTIONAL)
+            elif pseudo == ":read-only":
+                sel.selectors.append(CSS_READ_ONLY)
+            elif pseudo == ":read-write":
+                sel.selectors.append(CSS_READ_WRITE)
+            elif pseudo == ":in-range":
+                sel.selectors.append(CSS_IN_RANGE)
+            elif pseudo == ":out-of-range":
+                sel.selectors.append(CSS_OUT_OF_RANGE)
+            elif pseudo == ":placeholder-shown":
+                sel.selectors.append(CSS_PLACEHOLDER_SHOWN)
+            elif pseudo == ':first-child':
+                sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))
+            elif pseudo == ':last-child':
+                sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))
+            elif pseudo == ':first-of-type':
+                sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))
+            elif pseudo == ':last-of-type':
+                sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))
+            elif pseudo == ':only-child':
+                sel.nth.extend(
+                    [
+                        ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),
+                        ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())
+                    ]
+                )
+            elif pseudo == ':only-of-type':
+                sel.nth.extend(
+                    [
+                        ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),
+                        ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())
+                    ]
+                )
+            has_selector = True
+        elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:
+            self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
+            sel.no_match = True
+            has_selector = True
+        elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:
+            sel.no_match = True
+            has_selector = True
+        elif pseudo in PSEUDO_SUPPORTED:
+            raise SelectorSyntaxError(
+                "Invalid syntax for pseudo class '{}'".format(pseudo),
+                self.pattern,
+                m.start(0)
+            )
+        else:
+            raise NotImplementedError(
+                "'{}' pseudo-class is not implemented at this time".format(pseudo)
+            )
+
+        return has_selector, is_html
+
+    def parse_pseudo_nth(
+        self,
+        sel: _Selector,
+        m: Match[str],
+        has_selector: bool,
+        iselector: Iterator[Tuple[str, Match[str]]]
+    ) -> bool:
+        """Parse `nth` pseudo."""
+
+        mdict = m.groupdict()
+        if mdict.get('pseudo_nth_child'):
+            postfix = '_child'
+        else:
+            postfix = '_type'
+        mdict['name'] = util.lower(css_unescape(mdict['name']))
+        content = util.lower(mdict.get('nth' + postfix))
+        if content == 'even':
+            # 2n
+            s1 = 2
+            s2 = 0
+            var = True
+        elif content == 'odd':
+            # 2n+1
+            s1 = 2
+            s2 = 1
+            var = True
+        else:
+            nth_parts = cast(Match[str], RE_NTH.match(content))
+            _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
+            a = nth_parts.group('a')
+            var = a.endswith('n')
+            if a.startswith('n'):
+                _s1 += '1'
+            elif var:
+                _s1 += a[:-1]
+            else:
+                _s1 += a
+            _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
+            if nth_parts.group('b'):
+                _s2 += nth_parts.group('b')
+            else:
+                _s2 = '0'
+            s1 = int(_s1, 10)
+            s2 = int(_s2, 10)
+
+        pseudo_sel = mdict['name']
+        if postfix == '_child':
+            if m.group('of'):
+                # Parse the rest of `of S`.
+                nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
+            else:
+                # Use default `*|*` for `of S`.
+                nth_sel = CSS_NTH_OF_S_DEFAULT
+            if pseudo_sel == ':nth-child':
+                sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))
+            elif pseudo_sel == ':nth-last-child':
+                sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))
+        else:
+            if pseudo_sel == ':nth-of-type':
+                sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))
+            elif pseudo_sel == ':nth-last-of-type':
+                sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))
+        has_selector = True
+        return has_selector
+
+    def parse_pseudo_open(
+        self,
+        sel: _Selector,
+        name: str,
+        has_selector: bool,
+        iselector: Iterator[Tuple[str, Match[str]]],
+        index: int
+    ) -> bool:
+        """Parse pseudo with opening bracket."""
+
+        flags = FLG_PSEUDO | FLG_OPEN
+        if name == ':not':
+            flags |= FLG_NOT
+        elif name == ':has':
+            flags |= FLG_RELATIVE | FLG_FORGIVE
+        elif name in (':where', ':is'):
+            flags |= FLG_FORGIVE
+
+        sel.selectors.append(self.parse_selectors(iselector, index, flags))
+        has_selector = True
+
+        return has_selector
+
+    def parse_has_combinator(
+        self,
+        sel: _Selector,
+        m: Match[str],
+        has_selector: bool,
+        selectors: List[_Selector],
+        rel_type: str,
+        index: int
+    ) -> Tuple[bool, _Selector, str]:
+        """Parse combinator tokens."""
+
+        combinator = m.group('relation').strip()
+        if not combinator:
+            combinator = WS_COMBINATOR
+        if combinator == COMMA_COMBINATOR:
+            if not has_selector:
+                # If we've not captured any selector parts, the comma is either at the beginning of the pattern
+                # or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
+                sel.no_match = True
+
+            sel.rel_type = rel_type
+            selectors[-1].relations.append(sel)
+            rel_type = ":" + WS_COMBINATOR
+            selectors.append(_Selector())
+        else:
+            if has_selector:
+                # End the current selector and associate the leading combinator with this selector.
+                sel.rel_type = rel_type
+                selectors[-1].relations.append(sel)
+            elif rel_type[1:] != WS_COMBINATOR:
+                # It's impossible to have two whitespace combinators after each other as the patterns
+                # will gobble up trailing whitespace. It is also impossible to have a whitespace
+                # combinator after any other kind for the same reason. But we could have
+                # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
+                # then we've hit the multiple combinator case, so we should fail.
+                raise SelectorSyntaxError(
+                    'The multiple combinators at position {}'.format(index),
+                    self.pattern,
+                    index
+                )
+
+            # Set the leading combinator for the next selector.
+            rel_type = ':' + combinator
+
+        sel = _Selector()
+        has_selector = False
+        return has_selector, sel, rel_type
+
+    def parse_combinator(
+        self,
+        sel: _Selector,
+        m: Match[str],
+        has_selector: bool,
+        selectors: List[_Selector],
+        relations: List[_Selector],
+        is_pseudo: bool,
+        is_forgive: bool,
+        index: int
+    ) -> Tuple[bool, _Selector]:
+        """Parse combinator tokens."""
+
+        combinator = m.group('relation').strip()
+        if not combinator:
+            combinator = WS_COMBINATOR
+        if not has_selector:
+            if not is_forgive or combinator != COMMA_COMBINATOR:
+                raise SelectorSyntaxError(
+                    "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
+                    self.pattern,
+                    index
+                )
+
+            # If we are in a forgiving pseudo class, just make the selector a "no match"
+            if combinator == COMMA_COMBINATOR:
+                sel.no_match = True
+                del relations[:]
+                selectors.append(sel)
+        else:
+            if combinator == COMMA_COMBINATOR:
+                if not sel.tag and not is_pseudo:
+                    # Implied `*`
+                    sel.tag = ct.SelectorTag('*', None)
+                sel.relations.extend(relations)
+                selectors.append(sel)
+                del relations[:]
+            else:
+                sel.relations.extend(relations)
+                sel.rel_type = combinator
+                del relations[:]
+                relations.append(sel)
+
+        sel = _Selector()
+        has_selector = False
+
+        return has_selector, sel
+
+    def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Parse HTML classes and ids."""
+
+        selector = m.group(0)
+        if selector.startswith('.'):
+            sel.classes.append(css_unescape(selector[1:]))
+        else:
+            sel.ids.append(css_unescape(selector[1:]))
+        has_selector = True
+        return has_selector
+
+    def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Parse contains."""
+
+        pseudo = util.lower(css_unescape(m.group('name')))
+        if pseudo == ":contains":
+            warnings.warn(
+                "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
+                FutureWarning
+            )
+        contains_own = pseudo == ":-soup-contains-own"
+        values = css_unescape(m.group('values'))
+        patterns = []
+        for token in RE_VALUES.finditer(values):
+            if token.group('split'):
+                continue
+            value = token.group('value')
+            if value.startswith(("'", '"')):
+                value = css_unescape(value[1:-1], True)
+            else:
+                value = css_unescape(value)
+            patterns.append(value)
+        sel.contains.append(ct.SelectorContains(patterns, contains_own))
+        has_selector = True
+        return has_selector
+
+    def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Parse pseudo language."""
+
+        values = m.group('values')
+        patterns = []
+        for token in RE_VALUES.finditer(values):
+            if token.group('split'):
+                continue
+            value = token.group('value')
+            if value.startswith(('"', "'")):
+                value = css_unescape(value[1:-1], True)
+            else:
+                value = css_unescape(value)
+
+            patterns.append(value)
+
+        sel.lang.append(ct.SelectorLang(patterns))
+        has_selector = True
+
+        return has_selector
+
+    def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
+        """Parse pseudo direction."""
+
+        value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
+        sel.flags |= value
+        has_selector = True
+        return has_selector
+
+    def parse_selectors(
+        self,
+        iselector: Iterator[Tuple[str, Match[str]]],
+        index: int = 0,
+        flags: int = 0
+    ) -> ct.SelectorList:
+        """Parse selectors."""
+
+        # Initialize important variables
+        sel = _Selector()
+        selectors = []
+        has_selector = False
+        closed = False
+        relations = []  # type: List[_Selector]
+        rel_type = ":" + WS_COMBINATOR
+
+        # Setup various flags
+        is_open = bool(flags & FLG_OPEN)
+        is_pseudo = bool(flags & FLG_PSEUDO)
+        is_relative = bool(flags & FLG_RELATIVE)
+        is_not = bool(flags & FLG_NOT)
+        is_html = bool(flags & FLG_HTML)
+        is_default = bool(flags & FLG_DEFAULT)
+        is_indeterminate = bool(flags & FLG_INDETERMINATE)
+        is_in_range = bool(flags & FLG_IN_RANGE)
+        is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
+        is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
+        is_forgive = bool(flags & FLG_FORGIVE)
+
+        # Print out useful debug stuff
+        if self.debug:  # pragma: no cover
+            if is_pseudo:
+                print('    is_pseudo: True')
+            if is_open:
+                print('    is_open: True')
+            if is_relative:
+                print('    is_relative: True')
+            if is_not:
+                print('    is_not: True')
+            if is_html:
+                print('    is_html: True')
+            if is_default:
+                print('    is_default: True')
+            if is_indeterminate:
+                print('    is_indeterminate: True')
+            if is_in_range:
+                print('    is_in_range: True')
+            if is_out_of_range:
+                print('    is_out_of_range: True')
+            if is_placeholder_shown:
+                print('    is_placeholder_shown: True')
+            if is_forgive:
+                print('    is_forgive: True')
+
+        # The algorithm for relative selectors require an initial selector in the selector list
+        if is_relative:
+            selectors.append(_Selector())
+
+        try:
+            while True:
+                key, m = next(iselector)
+
+                # Handle parts
+                if key == "at_rule":
+                    raise NotImplementedError("At-rules found at position {}".format(m.start(0)))
+                elif key == 'pseudo_class_custom':
+                    has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
+                elif key == 'pseudo_class':
+                    has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
+                elif key == 'pseudo_element':
+                    raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
+                elif key == 'pseudo_contains':
+                    has_selector = self.parse_pseudo_contains(sel, m, has_selector)
+                elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
+                    has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)
+                elif key == 'pseudo_lang':
+                    has_selector = self.parse_pseudo_lang(sel, m, has_selector)
+                elif key == 'pseudo_dir':
+                    has_selector = self.parse_pseudo_dir(sel, m, has_selector)
+                    # Currently only supports HTML
+                    is_html = True
+                elif key == 'pseudo_close':
+                    if not has_selector:
+                        if not is_forgive:
+                            raise SelectorSyntaxError(
+                                "Expected a selector at postion {}".format(m.start(0)),
+                                self.pattern,
+                                m.start(0)
+                            )
+                        sel.no_match = True
+                    if is_open:
+                        closed = True
+                        break
+                    else:
+                        raise SelectorSyntaxError(
+                            "Unmatched pseudo-class close at postion {}".format(m.start(0)),
+                            self.pattern,
+                            m.start(0)
+                        )
+                elif key == 'combine':
+                    if is_relative:
+                        has_selector, sel, rel_type = self.parse_has_combinator(
+                            sel, m, has_selector, selectors, rel_type, index
+                        )
+                    else:
+                        has_selector, sel = self.parse_combinator(
+                            sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
+                        )
+                elif key == 'attribute':
+                    has_selector = self.parse_attribute_selector(sel, m, has_selector)
+                elif key == 'tag':
+                    if has_selector:
+                        raise SelectorSyntaxError(
+                            "Tag name found at position {} instead of at the start".format(m.start(0)),
+                            self.pattern,
+                            m.start(0)
+                        )
+                    has_selector = self.parse_tag_pattern(sel, m, has_selector)
+                elif key in ('class', 'id'):
+                    has_selector = self.parse_class_id(sel, m, has_selector)
+
+                index = m.end(0)
+        except StopIteration:
+            pass
+
+        # Handle selectors that are not closed
+        if is_open and not closed:
+            raise SelectorSyntaxError(
+                "Unclosed pseudo-class at position {}".format(index),
+                self.pattern,
+                index
+            )
+
+        # Cleanup completed selector piece
+        if has_selector:
+            if not sel.tag and not is_pseudo:
+                # Implied `*`
+                sel.tag = ct.SelectorTag('*', None)
+            if is_relative:
+                sel.rel_type = rel_type
+                selectors[-1].relations.append(sel)
+            else:
+                sel.relations.extend(relations)
+                del relations[:]
+                selectors.append(sel)
+
+        # Forgive empty slots in pseudo-classes that have lists (and are forgiving)
+        elif is_forgive:
+            if is_relative:
+                # Handle relative selectors pseudo-classes with empty slots like `:has()`
+                if selectors and selectors[-1].rel_type is None and rel_type == ': ':
+                    sel.rel_type = rel_type
+                    sel.no_match = True
+                    selectors[-1].relations.append(sel)
+                    has_selector = True
+            else:
+                # Handle normal pseudo-classes with empty slots
+                if not selectors or not relations:
+                    # Others like `:is()` etc.
+                    sel.no_match = True
+                    del relations[:]
+                    selectors.append(sel)
+                    has_selector = True
+
+        if not has_selector:
+            # We will always need to finish a selector when `:has()` is used as it leads with combining.
+            # May apply to others as well.
+            raise SelectorSyntaxError(
+                'Expected a selector at position {}'.format(index),
+                self.pattern,
+                index
+            )
+
+        # Some patterns require additional logic, such as default. We try to make these the
+        # last pattern, and append the appropriate flag to that selector which communicates
+        # to the matcher what additional logic is required.
+        if is_default:
+            selectors[-1].flags = ct.SEL_DEFAULT
+        if is_indeterminate:
+            selectors[-1].flags = ct.SEL_INDETERMINATE
+        if is_in_range:
+            selectors[-1].flags = ct.SEL_IN_RANGE
+        if is_out_of_range:
+            selectors[-1].flags = ct.SEL_OUT_OF_RANGE
+        if is_placeholder_shown:
+            selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
+
+        # Return selector list
+        return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
+
+    def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]:
+        """Iterate selector tokens."""
+
+        # Ignore whitespace and comments at start and end of pattern
+        m = RE_WS_BEGIN.search(pattern)
+        index = m.end(0) if m else 0
+        m = RE_WS_END.search(pattern)
+        end = (m.start(0) - 1) if m else (len(pattern) - 1)
+
+        if self.debug:  # pragma: no cover
+            print('## PARSING: {!r}'.format(pattern))
+        while index <= end:
+            m = None
+            for v in self.css_tokens:
+                m = v.match(pattern, index, self.flags)
+                if m:
+                    name = v.get_name()
+                    if self.debug:  # pragma: no cover
+                        print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))
+                    index = m.end(0)
+                    yield name, m
+                    break
+            if m is None:
+                c = pattern[index]
+                # If the character represents the start of one of the known selector types,
+                # throw an exception mentioning that the known selector type is in error;
+                # otherwise, report the invalid character.
+                if c == '[':
+                    msg = "Malformed attribute selector at position {}".format(index)
+                elif c == '.':
+                    msg = "Malformed class selector at position {}".format(index)
+                elif c == '#':
+                    msg = "Malformed id selector at position {}".format(index)
+                elif c == ':':
+                    msg = "Malformed pseudo-class selector at position {}".format(index)
+                else:
+                    msg = "Invalid character {!r} position {}".format(c, index)
+                raise SelectorSyntaxError(msg, self.pattern, index)
+        if self.debug:  # pragma: no cover
+            print('## END PARSING')
+
+    def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
+        """Process selectors."""
+
+        return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
+
+
+# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)
+# A few patterns are order dependent as they use patterns previous compiled.
+
+# CSS pattern for `:link` and `:any-link`
+CSS_LINK = CSSParser(
+    'html|*:is(a, area)[href]'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:checked`
+CSS_CHECKED = CSSParser(
+    '''
+    html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:default` (must compile CSS_CHECKED first)
+CSS_DEFAULT = CSSParser(
+    '''
+    :checked,
+
+    /*
+    This pattern must be at the end.
+    Special logic is applied to the last selector.
+    */
+    html|form html|*:is(button, input)[type="submit"]
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)
+# CSS pattern for `:indeterminate`
+CSS_INDETERMINATE = CSSParser(
+    '''
+    html|input[type="checkbox"][indeterminate],
+    html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),
+    html|progress:not([value]),
+
+    /*
+    This pattern must be at the end.
+    Special logic is applied to the last selector.
+    */
+    html|input[type="radio"][name]:not([name='']):not([checked])
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
+# CSS pattern for `:disabled`
+CSS_DISABLED = CSSParser(
+    '''
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+    html|optgroup[disabled] > html|option,
+    html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
+    html|fieldset[disabled] >
+        html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:enabled`
+CSS_ENABLED = CSSParser(
+    '''
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:required`
+CSS_REQUIRED = CSSParser(
+    'html|*:is(input, textarea, select)[required]'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:optional`
+CSS_OPTIONAL = CSSParser(
+    'html|*:is(input, textarea, select):not([required])'
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:placeholder-shown`
+CSS_PLACEHOLDER_SHOWN = CSSParser(
+    '''
+    html|input:is(
+        :not([type]),
+        [type=""],
+        [type=text],
+        [type=search],
+        [type=url],
+        [type=tel],
+        [type=email],
+        [type=password],
+        [type=number]
+    )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
+    html|textarea[placeholder]:not([placeholder=''])
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
+# CSS pattern default for `:nth-child` "of S" feature
+CSS_NTH_OF_S_DEFAULT = CSSParser(
+    '*|*'
+).process_selectors(flags=FLG_PSEUDO)
+# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)
+CSS_READ_WRITE = CSSParser(
+    '''
+    html|*:is(
+        textarea,
+        input:is(
+            :not([type]),
+            [type=""],
+            [type=text],
+            [type=search],
+            [type=url],
+            [type=tel],
+            [type=email],
+            [type=number],
+            [type=password],
+            [type=date],
+            [type=datetime-local],
+            [type=month],
+            [type=time],
+            [type=week]
+        )
+    ):not([readonly], :disabled),
+    html|*:is([contenteditable=""], [contenteditable="true" i])
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:read-only`
+CSS_READ_ONLY = CSSParser(
+    '''
+    html|*:not(:read-write)
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+# CSS pattern for `:in-range`
+CSS_IN_RANGE = CSSParser(
+    '''
+    html|input:is(
+        [type="date"],
+        [type="month"],
+        [type="week"],
+        [type="time"],
+        [type="datetime-local"],
+        [type="number"],
+        [type="range"]
+    ):is(
+        [min],
+        [max]
+    )
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)
+# CSS pattern for `:out-of-range`
+CSS_OUT_OF_RANGE = CSSParser(
+    '''
+    html|input:is(
+        [type="date"],
+        [type="month"],
+        [type="week"],
+        [type="time"],
+        [type="datetime-local"],
+        [type="number"],
+        [type="range"]
+    ):is(
+        [min],
+        [max]
+    )
+    '''
+).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)
diff --git a/lib/soupsieve_old/css_types.py b/lib/soupsieve_old/css_types.py
new file mode 100644
index 00000000..e5a6e49c
--- /dev/null
+++ b/lib/soupsieve_old/css_types.py
@@ -0,0 +1,407 @@
+"""CSS selector structure items."""
+import copyreg
+from .pretty import pretty
+from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
+
+__all__ = (
+    'Selector',
+    'SelectorNull',
+    'SelectorTag',
+    'SelectorAttribute',
+    'SelectorContains',
+    'SelectorNth',
+    'SelectorLang',
+    'SelectorList',
+    'Namespaces',
+    'CustomSelectors'
+)
+
+
+SEL_EMPTY = 0x1
+SEL_ROOT = 0x2
+SEL_DEFAULT = 0x4
+SEL_INDETERMINATE = 0x8
+SEL_SCOPE = 0x10
+SEL_DIR_LTR = 0x20
+SEL_DIR_RTL = 0x40
+SEL_IN_RANGE = 0x80
+SEL_OUT_OF_RANGE = 0x100
+SEL_DEFINED = 0x200
+SEL_PLACEHOLDER_SHOWN = 0x400
+
+
+class Immutable:
+    """Immutable."""
+
+    __slots__: Tuple[str, ...] = ('_hash',)
+
+    _hash: int
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize."""
+
+        temp = []
+        for k, v in kwargs.items():
+            temp.append(type(v))
+            temp.append(v)
+            super(Immutable, self).__setattr__(k, v)
+        super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
+
+    @classmethod
+    def __base__(cls) -> "Type[Immutable]":
+        """Get base class."""
+
+        return cls
+
+    def __eq__(self, other: Any) -> bool:
+        """Equal."""
+
+        return (
+            isinstance(other, self.__base__()) and
+            all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
+        )
+
+    def __ne__(self, other: Any) -> bool:
+        """Equal."""
+
+        return (
+            not isinstance(other, self.__base__()) or
+            any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
+        )
+
+    def __hash__(self) -> int:
+        """Hash."""
+
+        return self._hash
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        """Prevent mutability."""
+
+        raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
+
+    def __repr__(self) -> str:  # pragma: no cover
+        """Representation."""
+
+        return "{}({})".format(
+            self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
+        )
+
+    __str__ = __repr__
+
+    def pretty(self) -> None:  # pragma: no cover
+        """Pretty print."""
+
+        print(pretty(self))
+
+
+class ImmutableDict(Mapping[Any, Any]):
+    """Hashable, immutable dictionary."""
+
+    def __init__(
+        self,
+        arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
+    ) -> None:
+        """Initialize."""
+
+        self._validate(arg)
+        self._d = dict(arg)
+        self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
+
+    def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
+        """Validate arguments."""
+
+        if isinstance(arg, dict):
+            if not all([isinstance(v, Hashable) for v in arg.values()]):
+                raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
+        elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
+            raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
+
+    def __iter__(self) -> Iterator[Any]:
+        """Iterator."""
+
+        return iter(self._d)
+
+    def __len__(self) -> int:
+        """Length."""
+
+        return len(self._d)
+
+    def __getitem__(self, key: Any) -> Any:
+        """Get item: `namespace['key']`."""
+
+        return self._d[key]
+
+    def __hash__(self) -> int:
+        """Hash."""
+
+        return self._hash
+
+    def __repr__(self) -> str:  # pragma: no cover
+        """Representation."""
+
+        return "{!r}".format(self._d)
+
+    __str__ = __repr__
+
+
+class Namespaces(ImmutableDict):
+    """Namespaces."""
+
+    def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
+        """Initialize."""
+
+        super().__init__(arg)
+
+    def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
+        """Validate arguments."""
+
+        if isinstance(arg, dict):
+            if not all([isinstance(v, str) for v in arg.values()]):
+                raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
+        elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+            raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
+
+
+class CustomSelectors(ImmutableDict):
+    """Custom selectors."""
+
+    def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
+        """Initialize."""
+
+        super().__init__(arg)
+
+    def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
+        """Validate arguments."""
+
+        if isinstance(arg, dict):
+            if not all([isinstance(v, str) for v in arg.values()]):
+                raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
+        elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+            raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
+
+
+class Selector(Immutable):
+    """Selector."""
+
+    __slots__ = (
+        'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
+        'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
+    )
+
+    tag: Optional['SelectorTag']
+    ids: Tuple[str, ...]
+    classes: Tuple[str, ...]
+    attributes: Tuple['SelectorAttribute', ...]
+    nth: Tuple['SelectorNth', ...]
+    selectors: Tuple['SelectorList', ...]
+    relation: 'SelectorList'
+    rel_type: Optional[str]
+    contains: Tuple['SelectorContains', ...]
+    lang: Tuple['SelectorLang', ...]
+    flags: int
+
+    def __init__(
+        self,
+        tag: Optional['SelectorTag'],
+        ids: Tuple[str, ...],
+        classes: Tuple[str, ...],
+        attributes: Tuple['SelectorAttribute', ...],
+        nth: Tuple['SelectorNth', ...],
+        selectors: Tuple['SelectorList', ...],
+        relation: 'SelectorList',
+        rel_type: Optional[str],
+        contains: Tuple['SelectorContains', ...],
+        lang: Tuple['SelectorLang', ...],
+        flags: int
+    ):
+        """Initialize."""
+
+        super().__init__(
+            tag=tag,
+            ids=ids,
+            classes=classes,
+            attributes=attributes,
+            nth=nth,
+            selectors=selectors,
+            relation=relation,
+            rel_type=rel_type,
+            contains=contains,
+            lang=lang,
+            flags=flags
+        )
+
+
+class SelectorNull(Immutable):
+    """Null Selector."""
+
+    def __init__(self) -> None:
+        """Initialize."""
+
+        super().__init__()
+
+
+class SelectorTag(Immutable):
+    """Selector tag."""
+
+    __slots__ = ("name", "prefix", "_hash")
+
+    name: str
+    prefix: Optional[str]
+
+    def __init__(self, name: str, prefix: Optional[str]) -> None:
+        """Initialize."""
+
+        super().__init__(name=name, prefix=prefix)
+
+
+class SelectorAttribute(Immutable):
+    """Selector attribute rule."""
+
+    __slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
+
+    attribute: str
+    prefix: str
+    pattern: Optional[Pattern[str]]
+    xml_type_pattern: Optional[Pattern[str]]
+
+    def __init__(
+        self,
+        attribute: str,
+        prefix: str,
+        pattern: Optional[Pattern[str]],
+        xml_type_pattern: Optional[Pattern[str]]
+    ) -> None:
+        """Initialize."""
+
+        super().__init__(
+            attribute=attribute,
+            prefix=prefix,
+            pattern=pattern,
+            xml_type_pattern=xml_type_pattern
+        )
+
+
+class SelectorContains(Immutable):
+    """Selector contains rule."""
+
+    __slots__ = ("text", "own", "_hash")
+
+    text: Tuple[str, ...]
+    own: bool
+
+    def __init__(self, text: Iterable[str], own: bool) -> None:
+        """Initialize."""
+
+        super().__init__(text=tuple(text), own=own)
+
+
+class SelectorNth(Immutable):
+    """Selector nth type."""
+
+    __slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
+
+    a: int
+    n: bool
+    b: int
+    of_type: bool
+    last: bool
+    selectors: 'SelectorList'
+
+    def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
+        """Initialize."""
+
+        super().__init__(
+            a=a,
+            n=n,
+            b=b,
+            of_type=of_type,
+            last=last,
+            selectors=selectors
+        )
+
+
+class SelectorLang(Immutable):
+    """Selector language rules."""
+
+    __slots__ = ("languages", "_hash",)
+
+    languages: Tuple[str, ...]
+
+    def __init__(self, languages: Iterable[str]):
+        """Initialize."""
+
+        super().__init__(languages=tuple(languages))
+
+    def __iter__(self) -> Iterator[str]:
+        """Iterator."""
+
+        return iter(self.languages)
+
+    def __len__(self) -> int:  # pragma: no cover
+        """Length."""
+
+        return len(self.languages)
+
+    def __getitem__(self, index: int) -> str:  # pragma: no cover
+        """Get item."""
+
+        return self.languages[index]
+
+
+class SelectorList(Immutable):
+    """Selector list."""
+
+    __slots__ = ("selectors", "is_not", "is_html", "_hash")
+
+    selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
+    is_not: bool
+    is_html: bool
+
+    def __init__(
+        self,
+        selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
+        is_not: bool = False,
+        is_html: bool = False
+    ) -> None:
+        """Initialize."""
+
+        super().__init__(
+            selectors=tuple(selectors) if selectors is not None else tuple(),
+            is_not=is_not,
+            is_html=is_html
+        )
+
+    def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
+        """Iterator."""
+
+        return iter(self.selectors)
+
+    def __len__(self) -> int:
+        """Length."""
+
+        return len(self.selectors)
+
+    def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
+        """Get item."""
+
+        return self.selectors[index]
+
+
+def _pickle(p: Any) -> Any:
+    return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
+
+
+def pickle_register(obj: Any) -> None:
+    """Allow object to be pickled."""
+
+    copyreg.pickle(obj, _pickle)
+
+
+pickle_register(Selector)
+pickle_register(SelectorNull)
+pickle_register(SelectorTag)
+pickle_register(SelectorAttribute)
+pickle_register(SelectorContains)
+pickle_register(SelectorNth)
+pickle_register(SelectorLang)
+pickle_register(SelectorList)
diff --git a/lib/soupsieve_old/pretty.py b/lib/soupsieve_old/pretty.py
new file mode 100644
index 00000000..57d16c97
--- /dev/null
+++ b/lib/soupsieve_old/pretty.py
@@ -0,0 +1,137 @@
+"""
+Format a pretty string of a `SoupSieve` object for easy debugging.
+
+This won't necessarily support all types and such, and definitely
+not support custom outputs.
+
+It is mainly geared towards our types as the `SelectorList`
+object is a beast to look at without some indentation and newlines.
+The format and various output types is fairly known (though it
+hasn't been tested extensively to make sure we aren't missing corners).
+
+Example:
+
+```
+>>> import soupsieve as sv
+>>> sv.compile('this > that.class[name=value]').selectors.pretty()
+SelectorList(
+    selectors=(
+        Selector(
+            tag=SelectorTag(
+                name='that',
+                prefix=None),
+            ids=(),
+            classes=(
+                'class',
+                ),
+            attributes=(
+                SelectorAttribute(
+                    attribute='name',
+                    prefix='',
+                    pattern=re.compile(
+                        '^value$'),
+                    xml_type_pattern=None),
+                ),
+            nth=(),
+            selectors=(),
+            relation=SelectorList(
+                selectors=(
+                    Selector(
+                        tag=SelectorTag(
+                            name='this',
+                            prefix=None),
+                        ids=(),
+                        classes=(),
+                        attributes=(),
+                        nth=(),
+                        selectors=(),
+                        relation=SelectorList(
+                            selectors=(),
+                            is_not=False,
+                            is_html=False),
+                        rel_type='>',
+                        contains=(),
+                        lang=(),
+                        flags=0),
+                    ),
+                is_not=False,
+                is_html=False),
+            rel_type=None,
+            contains=(),
+            lang=(),
+            flags=0),
+        ),
+    is_not=False,
+    is_html=False)
+```
+"""
+import re
+from typing import Any
+
+RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
+RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
+RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
+RE_LSTRT = re.compile(r'\[')
+RE_DSTRT = re.compile(r'\{')
+RE_TSTRT = re.compile(r'\(')
+RE_LEND = re.compile(r'\]')
+RE_DEND = re.compile(r'\}')
+RE_TEND = re.compile(r'\)')
+RE_INT = re.compile(r'\d+')
+RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
+RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
+RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
+RE_SEP = re.compile(r'\s*(,)\s*')
+RE_DSEP = re.compile(r'\s*(:)\s*')
+
+TOKENS = {
+    'class': RE_CLASS,
+    'param': RE_PARAM,
+    'empty': RE_EMPTY,
+    'lstrt': RE_LSTRT,
+    'dstrt': RE_DSTRT,
+    'tstrt': RE_TSTRT,
+    'lend': RE_LEND,
+    'dend': RE_DEND,
+    'tend': RE_TEND,
+    'sqstr': RE_SQSTR,
+    'sep': RE_SEP,
+    'dsep': RE_DSEP,
+    'int': RE_INT,
+    'kword': RE_KWORD,
+    'dqstr': RE_DQSTR
+}
+
+
+def pretty(obj: Any) -> str:  # pragma: no cover
+    """Make the object output string pretty."""
+
+    sel = str(obj)
+    index = 0
+    end = len(sel) - 1
+    indent = 0
+    output = []
+
+    while index <= end:
+        m = None
+        for k, v in TOKENS.items():
+            m = v.match(sel, index)
+
+            if m:
+                name = k
+                index = m.end(0)
+                if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
+                    indent += 4
+                    output.append('{}\n{}'.format(m.group(0), " " * indent))
+                elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
+                    output.append(m.group(0))
+                elif name in ('lend', 'dend', 'tend'):
+                    indent -= 4
+                    output.append(m.group(0))
+                elif name in ('sep',):
+                    output.append('{}\n{}'.format(m.group(1), " " * indent))
+                elif name in ('dsep',):
+                    output.append('{} '.format(m.group(1)))
+                break
+
+    return ''.join(output)
diff --git a/lib/soupsieve_old/py.typed b/lib/soupsieve_old/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/lib/soupsieve_old/util.py b/lib/soupsieve_old/util.py
new file mode 100644
index 00000000..2b1ed24b
--- /dev/null
+++ b/lib/soupsieve_old/util.py
@@ -0,0 +1,116 @@
+"""Utility."""
+from functools import wraps, lru_cache
+import warnings
+import re
+from typing import Callable, Any, Optional, Tuple, List
+
+DEBUG = 0x00001
+
+RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
+
+UC_A = ord('A')
+UC_Z = ord('Z')
+
+
+@lru_cache(maxsize=512)
+def lower(string: str) -> str:
+    """Lower."""
+
+    new_string = []
+    for c in string:
+        o = ord(c)
+        new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c)
+    return ''.join(new_string)
+
+
+class SelectorSyntaxError(Exception):
+    """Syntax error in a CSS selector."""
+
+    def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
+        """Initialize."""
+
+        self.line = None
+        self.col = None
+        self.context = None
+
+        if pattern is not None and index is not None:
+            # Format pattern to show line and column position
+            self.context, self.line, self.col = get_pattern_context(pattern, index)
+            msg = '{}\n  line {}:\n{}'.format(msg, self.line, self.context)
+
+        super().__init__(msg)
+
+
+def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]:  # pragma: no cover
+    """
+    Raise a `DeprecationWarning` when wrapped function/method is called.
+
+    Usage:
+
+        @deprecated("This method will be removed in version X; use Y instead.")
+        def some_method()"
+            pass
+    """
+
+    def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
+        @wraps(func)
+        def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
+            warnings.warn(
+                f"'{func.__name__}' is deprecated. {message}",
+                category=DeprecationWarning,
+                stacklevel=stacklevel
+            )
+            return func(*args, **kwargs)
+        return _deprecated_func
+    return _wrapper
+
+
+def warn_deprecated(message: str, stacklevel: int = 2) -> None:  # pragma: no cover
+    """Warn deprecated."""
+
+    warnings.warn(
+        message,
+        category=DeprecationWarning,
+        stacklevel=stacklevel
+    )
+
+
+def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
+    """Get the pattern context."""
+
+    last = 0
+    current_line = 1
+    col = 1
+    text = []  # type: List[str]
+    line = 1
+    offset = None  # type: Optional[int]
+
+    # Split pattern by newline and handle the text before the newline
+    for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
+        linetext = pattern[last:m.start(0)]
+        if not len(m.group(0)) and not len(text):
+            indent = ''
+            offset = -1
+            col = index - last + 1
+        elif last <= index < m.end(0):
+            indent = '--> '
+            offset = (-1 if index > m.start(0) else 0) + 3
+            col = index - last + 1
+        else:
+            indent = '    '
+            offset = None
+        if len(text):
+            # Regardless of whether we are presented with `\r\n`, `\r`, or `\n`,
+            # we will render the output with just `\n`. We will still log the column
+            # correctly though.
+            text.append('\n')
+        text.append('{}{}'.format(indent, linetext))
+        if offset is not None:
+            text.append('\n')
+            text.append(' ' * (col + offset) + '^')
+            line = current_line
+
+        current_line += 1
+        last = m.end(0)
+
+    return ''.join(text), line, col