diff --git a/CHANGELOG.md b/CHANGELOG.md index 79556c15..01e9909a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,17 @@ # Changelog +## v0.6.2 + Released 26 May 2024 + +Highlights: +* Added soulseek support +* Added bandcamp support +* Changes and dependency updates to work with Python >= 3.12 + +The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.6.1...v0.6.2). + ## v0.6.1 -Released 26 November 2023 +R eleased 26 November 2023 Highlights: * Dependency updates to work with > Python 3.11 diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 9da1a1df..77d3f27c 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -310,6 +310,16 @@ +
+ Bandcamp +
+ + + Full path where raw MP3s will be stored, e.g. /Users/name/Downloads/bandcamp +
+
@@ -317,7 +327,7 @@ Black Hole Transmission uTorrent (Beta) - Deluge (Beta) + Deluge QBitTorrent
@@ -438,6 +448,11 @@ Labels shouldn't contain spaces (requires Label plugin) +
+ + + Directory where Deluge should download to +
@@ -467,7 +482,33 @@ NZBs Torrents - No Preference + Soulseek + No Preference +
+
+ + +
+ Soulseek +
+ + +
+
+ + +
+
+ + +
+
+ +
@@ -579,6 +620,19 @@ +
+ Other +
+
+ +
+
+
+
+ +
+
+
diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html index c76875e1..a93c5157 100644 --- a/data/interfaces/default/history.html +++ b/data/interfaces/default/history.html @@ -56,6 +56,8 @@ fileid = 'torrent' if item['URL'].find('codeshy') != -1: fileid = 'nzb' + if item['URL'].find('bandcamp') != -1: + fileid = 'bandcamp' folder = 'Folder: ' + item['FolderName'] diff --git a/headphones/bandcamp.py b/headphones/bandcamp.py new file mode 100644 index 00000000..ed78276f --- /dev/null +++ b/headphones/bandcamp.py @@ -0,0 +1,166 @@ +# This file is part of Headphones. +# +# Headphones is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Headphones is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Headphones. If not, see + +import headphones +import json +import os +import re + +from headphones import logger, helpers, metadata, request +from headphones.common import USER_AGENT +from headphones.types import Result + +from mediafile import MediaFile, UnreadableFileError +from bs4 import BeautifulSoup +from bs4 import FeatureNotFound + + +def search(album, albumlength=None, page=1, resultlist=None): + dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', + '"': '', ',': '', '*': '', '.': '', ':': ''} + if resultlist is None: + resultlist = [] + + cleanalbum = helpers.latinToAscii( + helpers.replace_all(album['AlbumTitle'], dic) + ).strip() + cleanartist = helpers.latinToAscii( + helpers.replace_all(album['ArtistName'], dic) + ).strip() + + headers = {'User-Agent': USER_AGENT} + params = { + "page": page, + "q": cleanalbum, + } + logger.info("Looking up https://bandcamp.com/search with {}".format( + params)) + content = request.request_content( + url='https://bandcamp.com/search', + params=params, + headers=headers + ).decode('utf8') + try: + soup = BeautifulSoup(content, "html5lib") + except FeatureNotFound: + soup = BeautifulSoup(content, "html.parser") + + for item in soup.find_all("li", class_="searchresult"): + type = item.find('div', class_='itemtype').text.strip().lower() + if type == "album": + data = parse_album(item) + + cleanartist_found = helpers.latinToAscii(data['artist']) + cleanalbum_found = helpers.latinToAscii(data['album']) + + logger.debug(u"{} - {}".format(data['album'], cleanalbum_found)) + + logger.debug("Comparing {} to {}".format( + cleanalbum, cleanalbum_found)) + if (cleanartist.lower() == cleanartist_found.lower() and + cleanalbum.lower() == cleanalbum_found.lower()): + resultlist.append(Result( + data['title'], data['size'], data['url'], + 'bandcamp', 'bandcamp', True)) + else: + continue + + if(soup.find('a', class_='next')): + page += 1 + logger.debug("Calling next page ({})".format(page)) + search(album, albumlength=albumlength, + page=page, resultlist=resultlist) + + return resultlist + + +def download(album, bestqual): + html = request.request_content(url=bestqual.url).decode('utf-8') + trackinfo = [] + try: + trackinfo = json.loads( + re.search(r"trackinfo":(\[.*?\]),", html) + .group(1) + .replace('"', '"')) + except ValueError as e: + logger.warn("Couldn't load json: {}".format(e)) + + directory = os.path.join( + headphones.CONFIG.BANDCAMP_DIR, + u'{} - {}'.format( + album['ArtistName'].replace('/', '_'), + album['AlbumTitle'].replace('/', '_'))) + directory = helpers.latinToAscii(directory) + + if not os.path.exists(directory): + try: + os.makedirs(directory) + except Exception as e: + logger.warn("Could not create directory ({})".format(e)) + + index = 1 + for track in trackinfo: + filename = helpers.replace_illegal_chars( + u'{:02d} - {}.mp3'.format(index, track['title'])) + fullname = os.path.join(directory.encode('utf-8'), + filename.encode('utf-8')) + logger.debug("Downloading to {}".format(fullname)) + + if 'file' in track and track['file'] != None and 'mp3-128' in track['file']: + content = request.request_content(track['file']['mp3-128']) + open(fullname, 'wb').write(content) + try: + f = MediaFile(fullname) + date, year = metadata._date_year(album) + f.update({ + 'artist': album['ArtistName'].encode('utf-8'), + 'album': album['AlbumTitle'].encode('utf-8'), + 'title': track['title'].encode('utf-8'), + 'track': track['track_num'], + 'tracktotal': len(trackinfo), + 'year': year, + }) + f.save() + except UnreadableFileError as ex: + logger.warn("MediaFile couldn't parse: %s (%s)", + fullname, + str(ex)) + + index += 1 + + return directory + + +def parse_album(item): + album = item.find('div', class_='heading').text.strip() + artist = item.find('div', class_='subhead').text.strip().replace("by ", "") + released = item.find('div', class_='released').text.strip().replace( + "released ", "") + year = re.search(r"(\d{4})", released).group(1) + + url = item.find('div', class_='heading').find('a')['href'].split("?")[0] + + length = item.find('div', class_='length').text.strip() + tracks, minutes = length.split(",") + tracks = tracks.replace(" tracks", "").replace(" track", "").strip() + minutes = minutes.replace(" minutes", "").strip() + # bandcamp offers mp3 128b with should be 960KB/minute + size = int(minutes) * 983040 + + data = {"title": u'{} - {} [{}]'.format(artist, album, year), + "artist": artist, "album": album, + "url": url, "size": size} + + return data diff --git a/headphones/common.py b/headphones/common.py index a311860a..3461a18e 100644 --- a/headphones/common.py +++ b/headphones/common.py @@ -102,36 +102,6 @@ class Quality: return (anyQualities, bestQualities) - @staticmethod - def nameQuality(name): - - def checkName(list, func): - return func([re.search(x, name, re.I) for x in list]) - - name = os.path.basename(name) - - # if we have our exact text then assume we put it there - for x in Quality.qualityStrings: - if x == Quality.UNKNOWN: - continue - - regex = '\W' + Quality.qualityStrings[x].replace(' ', '\W') + '\W' - regex_match = re.search(regex, name, re.I) - if regex_match: - return x - - # TODO: fix quality checking here - if checkName(["mp3", "192"], any) and not checkName(["flac"], all): - return Quality.B192 - elif checkName(["mp3", "256"], any) and not checkName(["flac"], all): - return Quality.B256 - elif checkName(["mp3", "vbr"], any) and not checkName(["flac"], all): - return Quality.VBR - elif checkName(["mp3", "320"], any) and not checkName(["flac"], all): - return Quality.B320 - else: - return Quality.UNKNOWN - @staticmethod def assumeQuality(name): if name.lower().endswith(".mp3"): @@ -158,13 +128,6 @@ class Quality: return (Quality.NONE, status) - @staticmethod - def statusFromName(name, assume=True): - quality = Quality.nameQuality(name) - if assume and quality == Quality.UNKNOWN: - quality = Quality.assumeQuality(name) - return Quality.compositeStatus(DOWNLOADED, quality) - DOWNLOADED = None SNATCHED = None SNATCHED_PROPER = None diff --git a/headphones/config.py b/headphones/config.py index d7a19f32..e7f579b2 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -80,6 +80,7 @@ _CONFIG_DEFINITIONS = { 'DELUGE_PASSWORD': (str, 'Deluge', ''), 'DELUGE_LABEL': (str, 'Deluge', ''), 'DELUGE_DONE_DIRECTORY': (str, 'Deluge', ''), + 'DELUGE_DOWNLOAD_DIRECTORY': (str, 'Deluge', ''), 'DELUGE_PAUSED': (int, 'Deluge', 0), 'DESTINATION_DIR': (str, 'General', ''), 'DETECT_BITRATE': (int, 'General', 0), @@ -269,6 +270,11 @@ _CONFIG_DEFINITIONS = { 'SONGKICK_ENABLED': (int, 'Songkick', 1), 'SONGKICK_FILTER_ENABLED': (int, 'Songkick', 0), 'SONGKICK_LOCATION': (str, 'Songkick', ''), + 'SOULSEEK_API_URL': (str, 'Soulseek', ''), + 'SOULSEEK_API_KEY': (str, 'Soulseek', ''), + 'SOULSEEK_DOWNLOAD_DIR': (str, 'Soulseek', ''), + 'SOULSEEK_INCOMPLETE_DOWNLOAD_DIR': (str, 'Soulseek', ''), + 'SOULSEEK': (int, 'Soulseek', 0), 'SUBSONIC_ENABLED': (int, 'Subsonic', 0), 'SUBSONIC_HOST': (str, 'Subsonic', ''), 'SUBSONIC_PASSWORD': (str, 'Subsonic', ''), @@ -317,7 +323,9 @@ _CONFIG_DEFINITIONS = { 'XBMC_PASSWORD': (str, 'XBMC', ''), 'XBMC_UPDATE': (int, 'XBMC', 0), 'XBMC_USERNAME': (str, 'XBMC', ''), - 'XLDPROFILE': (str, 'General', '') + 'XLDPROFILE': (str, 'General', ''), + 'BANDCAMP': (int, 'General', 0), + 'BANDCAMP_DIR': (path, 'General', '') } diff --git a/headphones/deluge.py b/headphones/deluge.py index 578adc6d..5130fb96 100644 --- a/headphones/deluge.py +++ b/headphones/deluge.py @@ -58,12 +58,12 @@ def _scrubber(text): if scrub_logs: try: # URL parameter values - text = re.sub('=[0-9a-zA-Z]*', '=REMOVED', text) + text = re.sub(r'=[0-9a-zA-Z]*', r'=REMOVED', text) # Local host with port # text = re.sub('\:\/\/.*\:', '://REMOVED:', text) # just host - text = re.sub('\:\/\/.*\:[0-9]*', '://REMOVED:', text) + text = re.sub(r'\:\/\/.*\:[0-9]*', r'://REMOVED:', text) # Session cookie - text = re.sub("_session_id'\: '.*'", "_session_id': 'REMOVED'", text) + text = re.sub(r"_session_id'\: '.*'", r"_session_id': 'REMOVED'", text) # Local Windows user path if text.lower().startswith('c:\\users\\'): k = text.split('\\') @@ -128,9 +128,9 @@ def addTorrent(link, data=None, name=None): # Extract torrent name from .torrent try: logger.debug('Deluge: Getting torrent name length') - name_length = int(re.findall('name([0-9]*)\:.*?\:', str(torrentfile))[0]) + name_length = int(re.findall(r'name([0-9]*)\:.*?\:', str(torrentfile))[0]) logger.debug('Deluge: Getting torrent name') - name = re.findall('name[0-9]*\:(.*?)\:', str(torrentfile))[0][:name_length] + name = re.findall(r'name[0-9]*\:(.*?)\:', str(torrentfile))[0][:name_length] except Exception as e: logger.debug('Deluge: Could not get torrent name, getting file name') # get last part of link/path (name only) @@ -160,9 +160,9 @@ def addTorrent(link, data=None, name=None): # Extract torrent name from .torrent try: logger.debug('Deluge: Getting torrent name length') - name_length = int(re.findall('name([0-9]*)\:.*?\:', str(torrentfile))[0]) + name_length = int(re.findall(r'name([0-9]*)\:.*?\:', str(torrentfile))[0]) logger.debug('Deluge: Getting torrent name') - name = re.findall('name[0-9]*\:(.*?)\:', str(torrentfile))[0][:name_length] + name = re.findall(r'name[0-9]*\:(.*?)\:', str(torrentfile))[0][:name_length] except Exception as e: logger.debug('Deluge: Could not get torrent name, getting file name') # get last part of link/path (name only) @@ -466,19 +466,56 @@ def _add_torrent_url(result): def _add_torrent_file(result): logger.debug('Deluge: Adding file') + + options = {} + + if headphones.CONFIG.DELUGE_DOWNLOAD_DIRECTORY: + options['download_location'] = headphones.CONFIG.DELUGE_DOWNLOAD_DIRECTORY + + if headphones.CONFIG.DELUGE_DONE_DIRECTORY or headphones.CONFIG.DOWNLOAD_TORRENT_DIR: + options['move_completed'] = 1 + if headphones.CONFIG.DELUGE_DONE_DIRECTORY: + options['move_completed_path'] = headphones.CONFIG.DELUGE_DONE_DIRECTORY + else: + options['move_completed_path'] = headphones.CONFIG.DOWNLOAD_TORRENT_DIR + + if headphones.CONFIG.DELUGE_PAUSED: + options['add_paused'] = headphones.CONFIG.DELUGE_PAUSED + if not any(delugeweb_auth): _get_auth() try: # content is torrent file contents that needs to be encoded to base64 post_data = json.dumps({"method": "core.add_torrent_file", "params": [result['name'] + '.torrent', - b64encode(result['content']).decode(), {}], + b64encode(result['content'].encode('utf8')), + options], "id": 2}) response = requests.post(delugeweb_url, data=post_data.encode('utf-8'), cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers) result['hash'] = json.loads(response.text)['result'] logger.debug('Deluge: Response was %s' % str(json.loads(response.text))) return json.loads(response.text)['result'] + except UnicodeDecodeError: + try: + # content is torrent file contents that needs to be encoded to base64 + # this time let's try leaving the encoding as is + logger.debug('Deluge: There was a decoding issue, let\'s try again') + post_data = json.dumps({"method": "core.add_torrent_file", + "params": [result['name'].decode('utf8') + '.torrent', + b64encode(result['content']), + options], + "id": 22}) + response = requests.post(delugeweb_url, data=post_data.encode('utf-8'), cookies=delugeweb_auth, + verify=deluge_verify_cert, headers=headers) + result['hash'] = json.loads(response.text)['result'] + logger.debug('Deluge: Response was %s' % str(json.loads(response.text))) + return json.loads(response.text)['result'] + except Exception as e: + logger.error('Deluge: Adding torrent file failed after decode: %s' % str(e)) + formatted_lines = traceback.format_exc().splitlines() + logger.error('; '.join(formatted_lines)) + return False except Exception as e: logger.error('Deluge: Adding torrent file failed: %s' % str(e)) formatted_lines = traceback.format_exc().splitlines() @@ -566,61 +603,3 @@ def setSeedRatio(result): return None -def setTorrentPath(result): - logger.debug('Deluge: Setting download path') - if not any(delugeweb_auth): - _get_auth() - - try: - if headphones.CONFIG.DELUGE_DONE_DIRECTORY or headphones.CONFIG.DOWNLOAD_TORRENT_DIR: - post_data = json.dumps({"method": "core.set_torrent_move_completed", - "params": [result['hash'], True], - "id": 7}) - response = requests.post(delugeweb_url, data=post_data.encode('utf-8'), cookies=delugeweb_auth, - verify=deluge_verify_cert, headers=headers) - - if headphones.CONFIG.DELUGE_DONE_DIRECTORY: - move_to = headphones.CONFIG.DELUGE_DONE_DIRECTORY - else: - move_to = headphones.CONFIG.DOWNLOAD_TORRENT_DIR - - if not os.path.exists(move_to): - logger.debug('Deluge: %s directory doesn\'t exist, let\'s create it' % move_to) - os.makedirs(move_to) - post_data = json.dumps({"method": "core.set_torrent_move_completed_path", - "params": [result['hash'], move_to], - "id": 8}) - response = requests.post(delugeweb_url, data=post_data.encode('utf-8'), cookies=delugeweb_auth, - verify=deluge_verify_cert, headers=headers) - - return not json.loads(response.text)['error'] - - return True - except Exception as e: - logger.error('Deluge: Setting torrent move-to directory failed: %s' % str(e)) - formatted_lines = traceback.format_exc().splitlines() - logger.error('; '.join(formatted_lines)) - return None - - -def setTorrentPause(result): - logger.debug('Deluge: Pausing torrent') - if not any(delugeweb_auth): - _get_auth() - - try: - if headphones.CONFIG.DELUGE_PAUSED: - post_data = json.dumps({"method": "core.pause_torrent", - "params": [[result['hash']]], - "id": 9}) - response = requests.post(delugeweb_url, data=post_data.encode('utf-8'), cookies=delugeweb_auth, - verify=deluge_verify_cert, headers=headers) - - return not json.loads(response.text)['error'] - - return True - except Exception as e: - logger.error('Deluge: Setting torrent paused failed: %s' % str(e)) - formatted_lines = traceback.format_exc().splitlines() - logger.error('; '.join(formatted_lines)) - return None diff --git a/headphones/helpers.py b/headphones/helpers.py index c8f53c64..ea8960f2 100644 --- a/headphones/helpers.py +++ b/headphones/helpers.py @@ -184,7 +184,7 @@ def bytes_to_mb(bytes): def mb_to_bytes(mb_str): - result = re.search('^(\d+(?:\.\d+)?)\s?(?:mb)?', mb_str, flags=re.I) + result = re.search(r"^(\d+(?:\.\d+)?)\s?(?:mb)?", mb_str, flags=re.I) if result: return int(float(result.group(1)) * 1048576) @@ -253,9 +253,9 @@ def replace_all(text, dic): def replace_illegal_chars(string, type="file"): if type == "file": - string = re.sub('[\?"*:|<>/]', '_', string) + string = re.sub(r"[\?\"*:|<>/]", "_", string) if type == "folder": - string = re.sub('[:\?<>"|*]', '_', string) + string = re.sub(r"[:\?<>\"|*]", "_", string) return string @@ -386,7 +386,7 @@ def clean_musicbrainz_name(s, return_as_string=True): def cleanTitle(title): - title = re.sub('[\.\-\/\_]', ' ', title).lower() + title = re.sub(r"[\.\-\/\_]", " ", title).lower() # Strip out extra whitespace title = ' '.join(title.split()) @@ -1050,3 +1050,10 @@ def have_pct_have_total(db_artist): have_pct = have_tracks / total_tracks if total_tracks else 0 return (have_pct, total_tracks) + +def has_token(title, token): + return bool( + re.search(rf'(?:\W|^)+{token}(?:\W|$)+', + title, + re.IGNORECASE | re.UNICODE) + ) diff --git a/headphones/helpers_test.py b/headphones/helpers_test.py index 16753f91..09a1783c 100644 --- a/headphones/helpers_test.py +++ b/headphones/helpers_test.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from .unittestcompat import TestCase -from headphones.helpers import clean_name, is_valid_date, age +from headphones.helpers import clean_name, is_valid_date, age, has_token class HelpersTest(TestCase): @@ -56,3 +56,18 @@ class HelpersTest(TestCase): ] for input, expected, desc in test_cases: self.assertEqual(is_valid_date(input), expected, desc) + + def test_has_token(self): + """helpers: has_token()""" + self.assertEqual( + has_token("a cat ran", "cat"), + True, + "return True if token is in string" + ) + self.assertEqual( + has_token("acatran", "cat"), + False, + "return False if token is part of another word" + ) + + diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index dc7ac271..b0b67cc2 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -27,7 +27,7 @@ from beets import config as beetsconfig from beets import logging as beetslogging from mediafile import MediaFile, FileTypeError, UnreadableFileError from beetsplug import lyrics as beetslyrics -from headphones import notifiers, utorrent, transmission, deluge, qbittorrent +from headphones import notifiers, utorrent, transmission, deluge, qbittorrent, soulseek from headphones import db, albumart, librarysync from headphones import logger, helpers, mb, music_encoder from headphones import metadata @@ -36,18 +36,44 @@ postprocessor_lock = threading.Lock() def checkFolder(): - logger.debug("Checking download folder for completed downloads (only snatched ones).") + logger.info("Checking download folder for completed downloads (only snatched ones).") with postprocessor_lock: myDB = db.DBConnection() snatched = myDB.select('SELECT * from snatched WHERE Status="Snatched"') + # If soulseek is used, this part will get the status from the soulseek api and return completed and errored albums + completed_albums, errored_albums = set(), set() + if any(album['Kind'] == 'soulseek' for album in snatched): + completed_albums, errored_albums = soulseek.download_completed() + for album in snatched: if album['FolderName']: folder_name = album['FolderName'] single = False - if album['Kind'] == 'nzb': - download_dir = headphones.CONFIG.DOWNLOAD_DIR + if album['Kind'] == 'soulseek': + if folder_name in errored_albums: + # If the album had any tracks with errors in it, the whole download is considered faulty. Status will be reset to wanted. + logger.info(f"Album with folder '{folder_name}' had errors during download. Setting status to 'Wanted'.") + myDB.action('UPDATE albums SET Status="Wanted" WHERE AlbumID=? AND Status="Snatched"', (album['AlbumID'],)) + + # Folder will be removed from configured complete and Incomplete directory + complete_path = os.path.join(headphones.CONFIG.SOULSEEK_DOWNLOAD_DIR, folder_name) + incomplete_path = os.path.join(headphones.CONFIG.SOULSEEK_INCOMPLETE_DOWNLOAD_DIR, folder_name) + for path in [complete_path, incomplete_path]: + try: + shutil.rmtree(path) + except Exception as e: + pass + continue + elif folder_name in completed_albums: + download_dir = headphones.CONFIG.SOULSEEK_DOWNLOAD_DIR + else: + continue + elif album['Kind'] == 'nzb': + download_dir = headphones.CONFIG.DOWNLOAD_DIR + elif album['Kind'] == 'bandcamp': + download_dir = headphones.CONFIG.BANDCAMP_DIR else: if headphones.CONFIG.DELUGE_DONE_DIRECTORY and headphones.CONFIG.TORRENT_DOWNLOADER == 3: download_dir = headphones.CONFIG.DELUGE_DONE_DIRECTORY @@ -289,7 +315,7 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal logger.debug('Metadata check failed. Verifying filenames...') for downloaded_track in downloaded_track_list: track_name = os.path.splitext(downloaded_track)[0] - split_track_name = re.sub('[\.\-\_]', ' ', track_name).lower() + split_track_name = re.sub(r'[\.\-\_]', r' ', track_name).lower() for track in tracks: if not track['TrackTitle']: @@ -1170,8 +1196,14 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig download_dirs.append(dir) if headphones.CONFIG.DOWNLOAD_DIR and not dir: download_dirs.append(headphones.CONFIG.DOWNLOAD_DIR) + if headphones.CONFIG.SOULSEEK_DOWNLOAD_DIR and not dir: + download_dirs.append(headphones.CONFIG.SOULSEEK_DOWNLOAD_DIR) if headphones.CONFIG.DOWNLOAD_TORRENT_DIR and not dir: - download_dirs.append(headphones.CONFIG.DOWNLOAD_TORRENT_DIR) + download_dirs.append( + headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace')) + if headphones.CONFIG.BANDCAMP and not dir: + download_dirs.append( + headphones.CONFIG.BANDCAMP_DIR.encode(headphones.SYS_ENCODING, 'replace')) # If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice. download_dirs = list(set(download_dirs)) diff --git a/headphones/rutracker.py b/headphones/rutracker.py index 988e05b1..21d2abd4 100644 --- a/headphones/rutracker.py +++ b/headphones/rutracker.py @@ -42,19 +42,22 @@ class Rutracker(object): 'login_password': headphones.CONFIG.RUTRACKER_PASSWORD, 'login': b'\xc2\xf5\xee\xe4' # '%C2%F5%EE%E4' } + headers = { + 'User-Agent' : 'Headphones' + } logger.info("Attempting to log in to rutracker...") try: - r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False) + r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False, headers=headers) # try again if not self.has_bb_session_cookie(r): time.sleep(10) if headphones.CONFIG.RUTRACKER_COOKIE: logger.info("Attempting to log in using predefined cookie...") - r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False, cookies={'bb_session': headphones.CONFIG.RUTRACKER_COOKIE}) + r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False, headers=headers, cookies={'bb_session': headphones.CONFIG.RUTRACKER_COOKIE}) else: - r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False) + r = self.session.post(loginpage, data=post_params, timeout=self.timeout, allow_redirects=False, headers=headers) if self.has_bb_session_cookie(r): self.loggedin = True logger.info("Successfully logged in to rutracker") @@ -113,7 +116,10 @@ class Rutracker(object): Parse the search results and return valid torrent list """ try: - headers = {'Referer': self.search_referer} + headers = { + 'Referer': self.search_referer, + 'User-Agent' : 'Headphones' + } r = self.session.get(url=searchurl, headers=headers, timeout=self.timeout) soup = BeautifulSoup(r.content, 'html.parser') @@ -183,7 +189,10 @@ class Rutracker(object): downloadurl = 'https://rutracker.org/forum/dl.php?t=' + torrent_id cookie = {'bb_dl': torrent_id} try: - headers = {'Referer': url} + headers = { + 'Referer': url, + 'User-Agent' : 'Headphones' + } r = self.session.post(url=downloadurl, cookies=cookie, headers=headers, timeout=self.timeout) return r.content diff --git a/headphones/searcher.py b/headphones/searcher.py index e4762373..d6c8478e 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -37,10 +37,29 @@ from unidecode import unidecode import headphones from headphones.common import USER_AGENT +from headphones.helpers import ( + bytes_to_mb, + has_token, + piratesize, + replace_all, + replace_illegal_chars, + sab_replace_dots, + sab_replace_spaces, + sab_sanitize_foldername, + split_string + ) from headphones.types import Result -from headphones import logger, db, helpers, classes, sab, nzbget, request -from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent - +from headphones import logger, db, classes, sab, nzbget, request +from headphones import ( + bandcamp, + deluge, + notifiers, + qbittorrent, + rutracker, + soulseek, + transmission, + utorrent, + ) # Magnet to torrent services, for Black hole. Stolen from CouchPotato. TORRENT_TO_MAGNET_SERVICES = [ @@ -137,7 +156,7 @@ def calculate_torrent_hash(link, data=None): """ if link.startswith("magnet:"): - torrent_hash = re.findall("urn:btih:([\w]{32,40})", link)[0] + torrent_hash = re.findall(r"urn:btih:([\w]{32,40})", link)[0] if len(torrent_hash) == 32: torrent_hash = b16encode(b32decode(torrent_hash)).lower() elif data: @@ -261,6 +280,8 @@ def strptime_musicbrainz(date_str): def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): + + NZB_PROVIDERS = (headphones.CONFIG.HEADPHONES_INDEXER or headphones.CONFIG.NEWZNAB or headphones.CONFIG.NZBSORG or @@ -284,25 +305,33 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): [album['AlbumID']])[0][0] if headphones.CONFIG.PREFER_TORRENTS == 0 and not choose_specific_download: - if NZB_PROVIDERS and NZB_DOWNLOADERS: results = searchNZB(album, new, losslessOnly, albumlength) if not results and TORRENT_PROVIDERS: results = searchTorrent(album, new, losslessOnly, albumlength) - elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download: + if not results and headphones.CONFIG.BANDCAMP: + results = searchBandcamp(album, new, albumlength) + elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download: if TORRENT_PROVIDERS: results = searchTorrent(album, new, losslessOnly, albumlength) if not results and NZB_PROVIDERS and NZB_DOWNLOADERS: results = searchNZB(album, new, losslessOnly, albumlength) + if not results and headphones.CONFIG.BANDCAMP: + results = searchBandcamp(album, new, albumlength) + + elif headphones.CONFIG.PREFER_TORRENTS == 2 and not choose_specific_download: + results = searchSoulseek(album, new, losslessOnly, albumlength) + else: nzb_results = None torrent_results = None + bandcamp_results = None if NZB_PROVIDERS and NZB_DOWNLOADERS: nzb_results = searchNZB(album, new, losslessOnly, albumlength, choose_specific_download) @@ -311,13 +340,16 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): torrent_results = searchTorrent(album, new, losslessOnly, albumlength, choose_specific_download) + if headphones.CONFIG.BANDCAMP: + bandcamp_results = searchBandcamp(album, new, albumlength) + if not nzb_results: nzb_results = [] if not torrent_results: torrent_results = [] - results = nzb_results + torrent_results + results = nzb_results + torrent_results + bandcamp_results if choose_specific_download: return results @@ -338,6 +370,7 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): (data, result) = preprocess(sorted_search_results) if data and result: + #print(f'going to send stuff to downloader. data: {data}, album: {album}') send_to_downloader(data, result, album) @@ -360,7 +393,7 @@ def more_filtering(results, album, albumlength, new): logger.debug('Target bitrate: %s kbps' % headphones.CONFIG.PREFERRED_BITRATE) if albumlength: targetsize = albumlength / 1000 * int(headphones.CONFIG.PREFERRED_BITRATE) * 128 - logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) + logger.info('Target size: %s' % bytes_to_mb(targetsize)) if headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER: low_size_limit = targetsize * int( headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER) / 100 @@ -377,14 +410,14 @@ def more_filtering(results, album, albumlength, new): if low_size_limit and result.size < low_size_limit: logger.info( f"{result.title} from {result.provider} is too small for this album. " - f"(Size: {result.size}, MinSize: {helpers.bytes_to_mb(low_size_limit)})" + f"(Size: {result.size}, MinSize: {bytes_to_mb(low_size_limit)})" ) continue if high_size_limit and result.size > high_size_limit: logger.info( f"{result.title} from {result.provider} is too large for this album. " - f"(Size: {result.size}, MaxSize: {helpers.bytes_to_mb(high_size_limit)})" + f"(Size: {result.size}, MaxSize: {bytes_to_mb(high_size_limit)})" ) # Keep lossless results if there are no good lossy matches if not (allow_lossless and 'flac' in result.title.lower()): @@ -424,7 +457,7 @@ def sort_search_results(resultlist, album, new, albumlength): # Add a priority if it has any of the preferred words results_with_priority = [] - preferred_words = helpers.split_string(headphones.CONFIG.PREFERRED_WORDS) + preferred_words = split_string(headphones.CONFIG.PREFERRED_WORDS) for result in resultlist: priority = 0 for word in preferred_words: @@ -502,6 +535,10 @@ def get_year_from_release_date(release_date): return year +def searchBandcamp(album, new=False, albumlength=None): + return bandcamp.search(album) + + def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False): reldate = album['ReleaseDate'] @@ -521,8 +558,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, ':': '' } - cleanalbum = unidecode(helpers.replace_all(album['AlbumTitle'], replacements)).strip() - cleanartist = unidecode(helpers.replace_all(album['ArtistName'], replacements)).strip() + cleanalbum = unidecode(replace_all(album['AlbumTitle'], replacements)).strip() + cleanartist = unidecode(replace_all(album['ArtistName'], replacements)).strip() # Use the provided search term if available, otherwise build a search term if album['SearchTerm']: @@ -542,8 +579,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, term = cleanartist + ' ' + cleanalbum # Replace bad characters in the term - term = re.sub('[\.\-\/]', ' ', term) - artistterm = re.sub('[\.\-\/]', ' ', cleanartist) + term = re.sub(r'[\.\-\/]', r' ', term) + artistterm = re.sub(r'[\.\-\/]', r' ', cleanartist) # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: @@ -599,7 +636,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, size = int(item.links[1]['length']) resultlist.append(Result(title, size, url, provider, 'nzb', True)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + logger.info('Found %s. Size: %s' % (title, bytes_to_mb(size))) except Exception as e: logger.error("An unknown error occurred trying to parse the feed: %s" % e) @@ -670,7 +707,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, size = int(item.links[1]['length']) if all(word.lower() in title.lower() for word in term.split()): logger.info( - 'Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + 'Found %s. Size: %s' % (title, bytes_to_mb(size))) resultlist.append(Result(title, size, url, provider, 'nzb', True)) else: logger.info('Skipping %s, not all search term words found' % title) @@ -720,7 +757,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, size = int(item.links[1]['length']) resultlist.append(Result(title, size, url, provider, 'nzb', True)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + logger.info('Found %s. Size: %s' % (title, bytes_to_mb(size))) except Exception as e: logger.exception("Unhandled exception while parsing feed") @@ -767,7 +804,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, size = int(item['sizebytes']) resultlist.append(Result(title, size, url, provider, 'nzb', True)) - logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size)) + logger.info('Found %s. Size: %s', title, bytes_to_mb(size)) except Exception as e: logger.exception("Unhandled exception") @@ -790,7 +827,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, def send_to_downloader(data, result, album): logger.info( f"Found best result from {result.provider}: " - f"{result.title} - {helpers.bytes_to_mb(result.size)}" + f"{result.title} - {bytes_to_mb(result.size)}" ) # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads kind = result.kind @@ -798,7 +835,7 @@ def send_to_downloader(data, result, album): torrentid = None if kind == 'nzb': - folder_name = helpers.sab_sanitize_foldername(result.title) + folder_name = sab_sanitize_foldername(result.title) if headphones.CONFIG.NZB_DOWNLOADER == 1: @@ -820,9 +857,9 @@ def send_to_downloader(data, result, album): (replace_spaces, replace_dots) = sab.checkConfig() if replace_dots: - folder_name = helpers.sab_replace_dots(folder_name) + folder_name = sab_replace_dots(folder_name) if replace_spaces: - folder_name = helpers.sab_replace_spaces(folder_name) + folder_name = sab_replace_spaces(folder_name) else: nzb_name = folder_name + '.nzb' @@ -839,6 +876,15 @@ def send_to_downloader(data, result, album): except Exception as e: logger.error('Couldn\'t write NZB file: %s', e) return + elif kind == 'bandcamp': + folder_name = bandcamp.download(album, result) + logger.info("Setting folder_name to: {}".format(folder_name)) + + + elif kind == 'soulseek': + soulseek.download(user=result.user, filelist=result.files) + folder_name = result.folder + else: folder_name = '%s - %s [%s]' % ( unidecode(album['ArtistName']).replace('/', '_'), @@ -849,7 +895,7 @@ def send_to_downloader(data, result, album): if headphones.CONFIG.TORRENT_DOWNLOADER == 0: # Get torrent name from .torrent, this is usually used by the torrent client as the folder name - torrent_name = helpers.replace_illegal_chars(folder_name) + '.torrent' + torrent_name = replace_illegal_chars(folder_name) + '.torrent' download_path = os.path.join(headphones.CONFIG.TORRENTBLACKHOLE_DIR, torrent_name) if result.url.lower().startswith("magnet:"): @@ -954,10 +1000,6 @@ def send_to_downloader(data, result, album): logger.error("Error sending torrent to Deluge. Are you sure it's running? Maybe the torrent already exists?") return - # This pauses the torrent right after it is added - if headphones.CONFIG.DELUGE_PAUSED: - deluge.setTorrentPause({'hash': torrentid}) - # Set Label if headphones.CONFIG.DELUGE_LABEL: deluge.setTorrentLabel({'hash': torrentid}) @@ -967,10 +1009,6 @@ def send_to_downloader(data, result, album): if seed_ratio is not None: deluge.setSeedRatio({'hash': torrentid, 'ratio': seed_ratio}) - # Set move-to directory - if headphones.CONFIG.DELUGE_DONE_DIRECTORY or headphones.CONFIG.DOWNLOAD_TORRENT_DIR: - deluge.setTorrentPath({'hash': torrentid}) - # Get folder name from Deluge, it's usually the torrent name folder_name = deluge.getTorrentFolder({'hash': torrentid}) if folder_name: @@ -1156,7 +1194,7 @@ def send_to_downloader(data, result, album): def verifyresult(title, artistterm, term, lossless): - title = re.sub('[\.\-\/\_]', ' ', title) + title = re.sub(r'[\.\-\/\_]', r' ', title) # if artistterm != 'Various Artists': # @@ -1188,16 +1226,16 @@ def verifyresult(title, artistterm, term, lossless): return False if headphones.CONFIG.IGNORED_WORDS: - for each_word in helpers.split_string(headphones.CONFIG.IGNORED_WORDS): + for each_word in split_string(headphones.CONFIG.IGNORED_WORDS): if each_word.lower() in title.lower(): logger.info("Removed '%s' from results because it contains ignored word: '%s'", title, each_word) return False if headphones.CONFIG.REQUIRED_WORDS: - for each_word in helpers.split_string(headphones.CONFIG.REQUIRED_WORDS): + for each_word in split_string(headphones.CONFIG.REQUIRED_WORDS): if ' OR ' in each_word: - or_words = helpers.split_string(each_word, 'OR') + or_words = split_string(each_word, 'OR') if any(word.lower() in title.lower() for word in or_words): continue else: @@ -1219,23 +1257,23 @@ def verifyresult(title, artistterm, term, lossless): title, each_word) return False - tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE) + tokens = re.split(r'\W', term, re.IGNORECASE | re.UNICODE) + for token in tokens: if not token: continue if token == 'Various' or token == 'Artists' or token == 'VA': continue - if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + if not has_token(title, token): cleantoken = ''.join(c for c in token if c not in string.punctuation) - if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, - re.IGNORECASE | re.UNICODE): + if not has_token(title, cleantoken): dic = {'!': 'i', '$': 's'} - dumbtoken = helpers.replace_all(token, dic) - if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, - re.IGNORECASE | re.UNICODE): - logger.info("Removed from results: %s (missing tokens: %s and %s)", title, - token, cleantoken) + dumbtoken = replace_all(token, dic) + if not has_token(title, dumbtoken): + logger.info( + "Removed from results: %s (missing tokens: [%s, %s, %s])", + title, token, cleantoken, dumbtoken) return False return True @@ -1264,9 +1302,9 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, '*': '' } - semi_cleanalbum = helpers.replace_all(album['AlbumTitle'], replacements) + semi_cleanalbum = replace_all(album['AlbumTitle'], replacements) cleanalbum = unidecode(semi_cleanalbum) - semi_cleanartist = helpers.replace_all(album['ArtistName'], replacements) + semi_cleanartist = replace_all(album['ArtistName'], replacements) cleanartist = unidecode(semi_cleanartist) # Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot @@ -1293,12 +1331,12 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, else: usersearchterm = '' - semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist) - semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum) + semi_clean_artist_term = re.sub(r'[\.\-\/]', r' ', semi_cleanartist) + semi_clean_album_term = re.sub(r'[\.\-\/]', r' ', semi_cleanalbum) # Replace bad characters in the term - term = re.sub('[\.\-\/]', ' ', term) - artistterm = re.sub('[\.\-\/]', ' ', cleanartist) - albumterm = re.sub('[\.\-\/]', ' ', cleanalbum) + term = re.sub(r'[\.\-\/]', r' ', term) + artistterm = re.sub(r'[\.\-\/]', r' ', cleanartist) + albumterm = re.sub(r'[\.\-\/]', r' ', cleanalbum) # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: @@ -1401,7 +1439,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, if all(word.lower() in title.lower() for word in term.split()): if size < maxsize and minimumseeders < seeders: - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + logger.info('Found %s. Size: %s' % (title, bytes_to_mb(size))) resultlist.append(Result(title, size, url, provider, 'torrent', True)) else: logger.info( @@ -1477,7 +1515,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, size = int(desc_match.group(1)) url = item.link resultlist.append(Result(title, size, url, provider, 'torrent', True)) - logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size)) + logger.info('Found %s. Size: %s', title, bytes_to_mb(size)) except Exception as e: logger.error( "An error occurred while trying to parse the response from Waffles.ch: %s", @@ -1761,7 +1799,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # Pirate Bay if headphones.CONFIG.PIRATEBAY: provider = "The Pirate Bay" - tpb_term = term.replace("!", "").replace("'", " ") + tpb_term = term.replace("!", "").replace("'", " ").replace(" ", "%20") # Use proxy if specified if headphones.CONFIG.PIRATEBAY_PROXY_URL: @@ -1793,6 +1831,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # Process content if data: rows = data.select('table tbody tr') + if not rows: + rows = data.select('table tr') if not rows: logger.info("No results found from The Pirate Bay using term: %s" % tpb_term) @@ -1820,7 +1860,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, formatted_size = re.search('Size (.*),', str(item)).group(1).replace( '\xa0', ' ') - size = helpers.piratesize(formatted_size) + size = piratesize(formatted_size) if size < maxsize and minimumseeders < seeds and url is not None: match = True @@ -1874,7 +1914,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, "href"] # Magnet link. The actual download link is not based on the URL formatted_size = item.select("td.size-row")[0].text - size = helpers.piratesize(formatted_size) + size = piratesize(formatted_size) if size < maxsize and minimumseeders < seeds and url is not None: match = True @@ -1901,22 +1941,49 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, return results +def searchSoulseek(album, new=False, losslessOnly=False, albumlength=None): + # Not using some of the input stuff for now or ever + replacements = { + '...': '', + ' & ': ' ', + ' = ': ' ', + '?': '', + '$': '', + ' + ': ' ', + '"': '', + ',': '', + '*': '', + '.': '', + ':': '' + } + + num_tracks = get_album_track_count(album['AlbumID']) + year = get_year_from_release_date(album['ReleaseDate']) + cleanalbum = unidecode(helpers.replace_all(album['AlbumTitle'], replacements)).strip() + cleanartist = unidecode(helpers.replace_all(album['ArtistName'], replacements)).strip() + + results = soulseek.search(artist=cleanartist, album=cleanalbum, year=year, losslessOnly=losslessOnly, num_tracks=num_tracks) + + return results + + +def get_album_track_count(album_id): + # Not sure if this should be considered a helper function. + myDB = db.DBConnection() + track_count = myDB.select('SELECT COUNT(*) as count FROM tracks WHERE AlbumID=?', [album_id])[0]['count'] + return track_count + + # THIS IS KIND OF A MESS AND PROBABLY NEEDS TO BE CLEANED UP def preprocess(resultlist): for result in resultlist: + headers = {'User-Agent': USER_AGENT} - if result.provider in ["The Pirate Bay", "Old Pirate Bay"]: - headers = { - 'User-Agent': - 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) \ - AppleWebKit/537.36 (KHTML, like Gecko) \ - Chrome/41.0.2243.2 Safari/537.36' - } - else: - headers = {'User-Agent': USER_AGENT} - + if result.kind == 'soulseek': + return True, result + if result.kind == 'torrent': # rutracker always needs the torrent data @@ -1962,12 +2029,24 @@ def preprocess(resultlist): return True, result # Download the torrent file + + if result.provider in ["The Pirate Bay", "Old Pirate Bay"]: + headers = { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) \ + AppleWebKit/537.36 (KHTML, like Gecko) \ + Chrome/41.0.2243.2 Safari/537.36' + } + return request.request_content(url=result.url, headers=headers), result - if result.kind == 'magnet': + elif result.kind == 'magnet': magnet_link = result.url return "d10:magnet-uri%d:%se" % (len(magnet_link), magnet_link), result + elif result.kind == 'bandcamp': + return True, result + else: if result.provider == 'headphones': return request.request_content( diff --git a/headphones/soulseek.py b/headphones/soulseek.py new file mode 100644 index 00000000..2802c35c --- /dev/null +++ b/headphones/soulseek.py @@ -0,0 +1,185 @@ +from collections import defaultdict, namedtuple +import os +import time +import slskd_api +import headphones +from headphones import logger +from datetime import datetime, timedelta + +Result = namedtuple('Result', ['title', 'size', 'user', 'provider', 'type', 'matches', 'bandwidth', 'hasFreeUploadSlot', 'queueLength', 'files', 'kind', 'url', 'folder']) + +def initialize_soulseek_client(): + host = headphones.CONFIG.SOULSEEK_API_URL + api_key = headphones.CONFIG.SOULSEEK_API_KEY + return slskd_api.SlskdClient(host=host, api_key=api_key) + + # Search logic, calling search and processing fucntions +def search(artist, album, year, num_tracks, losslessOnly): + client = initialize_soulseek_client() + + # Stage 1: Search with artist, album, year, and num_tracks + results = execute_search(client, artist, album, year, losslessOnly) + processed_results = process_results(results, losslessOnly, num_tracks) + if processed_results: + return processed_results + + # Stage 2: If Stage 1 fails, search with artist, album, and num_tracks (excluding year) + logger.info("Soulseek search stage 1 did not meet criteria. Retrying without year...") + results = execute_search(client, artist, album, None, losslessOnly) + processed_results = process_results(results, losslessOnly, num_tracks) + if processed_results: + return processed_results + + # Stage 3: Final attempt, search only with artist and album + logger.info("Soulseek search stage 2 did not meet criteria. Final attempt with only artist and album.") + results = execute_search(client, artist, album, None, losslessOnly) + processed_results = process_results(results, losslessOnly, num_tracks, ignore_track_count=True) + + return processed_results + +def execute_search(client, artist, album, year, losslessOnly): + search_text = f"{artist} {album}" + if year: + search_text += f" {year}" + if losslessOnly: + search_text += ".flac" + + # Actual search + search_response = client.searches.search_text(searchText=search_text, filterResponses=True) + search_id = search_response.get('id') + + # Wait for search completion and return response + while not client.searches.state(id=search_id).get('isComplete'): + time.sleep(2) + + return client.searches.search_responses(id=search_id) + +# Processing the search result passed +def process_results(results, losslessOnly, num_tracks, ignore_track_count=False): + valid_extensions = {'.flac'} if losslessOnly else {'.mp3', '.flac'} + albums = defaultdict(lambda: {'files': [], 'user': None, 'hasFreeUploadSlot': None, 'queueLength': None, 'uploadSpeed': None}) + + # Extract info from the api response and combine files at album level + for result in results: + user = result.get('username') + hasFreeUploadSlot = result.get('hasFreeUploadSlot') + queueLength = result.get('queueLength') + uploadSpeed = result.get('uploadSpeed') + + # Only handle .mp3 and .flac + for file in result.get('files', []): + filename = file.get('filename') + file_extension = os.path.splitext(filename)[1].lower() + if file_extension in valid_extensions: + album_directory = os.path.dirname(filename) + albums[album_directory]['files'].append(file) + + # Update metadata only once per album_directory + if albums[album_directory]['user'] is None: + albums[album_directory].update({ + 'user': user, + 'hasFreeUploadSlot': hasFreeUploadSlot, + 'queueLength': queueLength, + 'uploadSpeed': uploadSpeed, + }) + + # Filter albums based on num_tracks, add bunch of useful info to the compiled album + final_results = [] + for directory, album_data in albums.items(): + if ignore_track_count or len(album_data['files']) == num_tracks: + album_title = os.path.basename(directory) + total_size = sum(file.get('size', 0) for file in album_data['files']) + final_results.append(Result( + title=album_title, + size=int(total_size), + user=album_data['user'], + provider="soulseek", + type="soulseek", + matches=True, + bandwidth=album_data['uploadSpeed'], + hasFreeUploadSlot=album_data['hasFreeUploadSlot'], + queueLength=album_data['queueLength'], + files=album_data['files'], + kind='soulseek', + url='http://thisisnot.needed', # URL is needed in other parts of the program. + folder=os.path.basename(directory) + )) + + return final_results + + +def download(user, filelist): + client = initialize_soulseek_client() + client.transfers.enqueue(username=user, files=filelist) + + +def download_completed(): + client = initialize_soulseek_client() + all_downloads = client.transfers.get_all_downloads(includeRemoved=False) + album_completion_tracker = {} # Tracks completion state of each album's songs + album_errored_tracker = {} # Tracks albums with errored downloads + + # Anything older than 24 hours will be canceled + cutoff_time = datetime.now() - timedelta(hours=24) + + # Identify errored and completed albums + for download in all_downloads: + directories = download.get('directories', []) + for directory in directories: + album_part = directory.get('directory', '').split('\\')[-1] + files = directory.get('files', []) + for file_data in files: + state = file_data.get('state', '') + requested_at_str = file_data.get('requestedAt', '1900-01-01 00:00:00') + requested_at = parse_datetime(requested_at_str) + + # Initialize or update album entry in trackers + if album_part not in album_completion_tracker: + album_completion_tracker[album_part] = {'total': 0, 'completed': 0, 'errored': 0} + if album_part not in album_errored_tracker: + album_errored_tracker[album_part] = False + + album_completion_tracker[album_part]['total'] += 1 + + if 'Completed, Succeeded' in state: + album_completion_tracker[album_part]['completed'] += 1 + elif 'Completed, Errored' in state or requested_at < cutoff_time: + album_completion_tracker[album_part]['errored'] += 1 + album_errored_tracker[album_part] = True # Mark album as having errored downloads + + # Identify errored albums + errored_albums = {album for album, errored in album_errored_tracker.items() if errored} + + # Cancel downloads for errored albums + for download in all_downloads: + directories = download.get('directories', []) + for directory in directories: + album_part = directory.get('directory', '').split('\\')[-1] + files = directory.get('files', []) + for file_data in files: + if album_part in errored_albums: + # Extract 'id' and 'username' for each file to cancel the download + file_id = file_data.get('id', '') + username = file_data.get('username', '') + success = client.transfers.cancel_download(username, file_id) + if not success: + print(f"Failed to cancel download for file ID: {file_id}") + + # Clear completed/canceled/errored stuff from client downloads + try: + client.transfers.remove_completed_downloads() + except Exception as e: + print(f"Failed to remove completed downloads: {e}") + + # Identify completed albums + completed_albums = {album for album, counts in album_completion_tracker.items() if counts['total'] == counts['completed']} + + # Return both completed and errored albums + return completed_albums, errored_albums + + +def parse_datetime(datetime_string): + # Parse the datetime api response + if '.' in datetime_string: + datetime_string = datetime_string[:datetime_string.index('.')+7] + return datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%S.%f') \ No newline at end of file diff --git a/headphones/webserve.py b/headphones/webserve.py index 758b2267..005f4146 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -1183,6 +1183,7 @@ class WebInterface(object): "deluge_password": headphones.CONFIG.DELUGE_PASSWORD, "deluge_label": headphones.CONFIG.DELUGE_LABEL, "deluge_done_directory": headphones.CONFIG.DELUGE_DONE_DIRECTORY, + "deluge_download_directory": headphones.CONFIG.DELUGE_DOWNLOAD_DIRECTORY, "deluge_paused": checked(headphones.CONFIG.DELUGE_PAUSED), "utorrent_host": headphones.CONFIG.UTORRENT_HOST, "utorrent_username": headphones.CONFIG.UTORRENT_USERNAME, @@ -1197,6 +1198,8 @@ class WebInterface(object): "torrent_downloader_deluge": radio(headphones.CONFIG.TORRENT_DOWNLOADER, 3), "torrent_downloader_qbittorrent": radio(headphones.CONFIG.TORRENT_DOWNLOADER, 4), "download_dir": headphones.CONFIG.DOWNLOAD_DIR, + "soulseek_download_dir": headphones.CONFIG.SOULSEEK_DOWNLOAD_DIR, + "soulseek_incomplete_download_dir": headphones.CONFIG.SOULSEEK_INCOMPLETE_DOWNLOAD_DIR, "use_blackhole": checked(headphones.CONFIG.BLACKHOLE), "blackhole_dir": headphones.CONFIG.BLACKHOLE_DIR, "usenet_retention": headphones.CONFIG.USENET_RETENTION, @@ -1296,6 +1299,7 @@ class WebInterface(object): "prefer_torrents_0": radio(headphones.CONFIG.PREFER_TORRENTS, 0), "prefer_torrents_1": radio(headphones.CONFIG.PREFER_TORRENTS, 1), "prefer_torrents_2": radio(headphones.CONFIG.PREFER_TORRENTS, 2), + "prefer_torrents_3": radio(headphones.CONFIG.PREFER_TORRENTS, 3), "magnet_links_0": radio(headphones.CONFIG.MAGNET_LINKS, 0), "magnet_links_1": radio(headphones.CONFIG.MAGNET_LINKS, 1), "magnet_links_2": radio(headphones.CONFIG.MAGNET_LINKS, 2), @@ -1413,7 +1417,12 @@ class WebInterface(object): "join_enabled": checked(headphones.CONFIG.JOIN_ENABLED), "join_onsnatch": checked(headphones.CONFIG.JOIN_ONSNATCH), "join_apikey": headphones.CONFIG.JOIN_APIKEY, - "join_deviceid": headphones.CONFIG.JOIN_DEVICEID + "join_deviceid": headphones.CONFIG.JOIN_DEVICEID, + "use_bandcamp": checked(headphones.CONFIG.BANDCAMP), + "bandcamp_dir": headphones.CONFIG.BANDCAMP_DIR, + 'soulseek_api_url': headphones.CONFIG.SOULSEEK_API_URL, + 'soulseek_api_key': headphones.CONFIG.SOULSEEK_API_KEY, + 'use_soulseek': checked(headphones.CONFIG.SOULSEEK) } for k, v in config.items(): @@ -1482,7 +1491,7 @@ class WebInterface(object): "songkick_enabled", "songkick_filter_enabled", "mpc_enabled", "email_enabled", "email_ssl", "email_tls", "email_onsnatch", "customauth", "idtag", "deluge_paused", - "join_enabled", "join_onsnatch" + "join_enabled", "join_onsnatch", "use_bandcamp" ] for checked_config in checked_configs: if checked_config not in kwargs: diff --git a/lib/cherrypy/__init__.py b/lib/cherrypy/__init__.py index 8e27c812..49e955f8 100644 --- a/lib/cherrypy/__init__.py +++ b/lib/cherrypy/__init__.py @@ -57,9 +57,11 @@ These API's are described in the `CherryPy specification """ try: - import pkg_resources + import importlib.metadata as importlib_metadata except ImportError: - pass + # fall back for python <= 3.7 + # This try/except can be removed with py <= 3.7 support + import importlib_metadata from threading import local as _local @@ -109,7 +111,7 @@ tree = _cptree.Tree() try: - __version__ = pkg_resources.require('cherrypy')[0].version + __version__ = importlib_metadata.version('cherrypy') except Exception: __version__ = 'unknown' @@ -181,24 +183,28 @@ def quickstart(root=None, script_name='', config=None): class _Serving(_local): """An interface for registering request and response objects. - Rather than have a separate "thread local" object for the request and - the response, this class works as a single threadlocal container for - both objects (and any others which developers wish to define). In this - way, we can easily dump those objects when we stop/start a new HTTP - conversation, yet still refer to them as module-level globals in a - thread-safe way. + Rather than have a separate "thread local" object for the request + and the response, this class works as a single threadlocal container + for both objects (and any others which developers wish to define). + In this way, we can easily dump those objects when we stop/start a + new HTTP conversation, yet still refer to them as module-level + globals in a thread-safe way. """ request = _cprequest.Request(_httputil.Host('127.0.0.1', 80), _httputil.Host('127.0.0.1', 1111)) + """The request object for the current thread. + + In the main thread, and any threads which are not receiving HTTP + requests, this is None. """ - The request object for the current thread. In the main thread, - and any threads which are not receiving HTTP requests, this is None.""" response = _cprequest.Response() + """The response object for the current thread. + + In the main thread, and any threads which are not receiving HTTP + requests, this is None. """ - The response object for the current thread. In the main thread, - and any threads which are not receiving HTTP requests, this is None.""" def load(self, request, response): self.request = request @@ -316,8 +322,8 @@ class _GlobalLogManager(_cplogging.LogManager): def __call__(self, *args, **kwargs): """Log the given message to the app.log or global log. - Log the given message to the app.log or global - log as appropriate. + Log the given message to the app.log or global log as + appropriate. """ # Do NOT use try/except here. See # https://github.com/cherrypy/cherrypy/issues/945 @@ -330,8 +336,8 @@ class _GlobalLogManager(_cplogging.LogManager): def access(self): """Log an access message to the app.log or global log. - Log the given message to the app.log or global - log as appropriate. + Log the given message to the app.log or global log as + appropriate. """ try: return request.app.log.access() diff --git a/lib/cherrypy/_cpchecker.py b/lib/cherrypy/_cpchecker.py index f26f319c..096b19c3 100644 --- a/lib/cherrypy/_cpchecker.py +++ b/lib/cherrypy/_cpchecker.py @@ -313,7 +313,10 @@ class Checker(object): # -------------------- Specific config warnings -------------------- # def check_localhost(self): - """Warn if any socket_host is 'localhost'. See #711.""" + """Warn if any socket_host is 'localhost'. + + See #711. + """ for k, v in cherrypy.config.items(): if k == 'server.socket_host' and v == 'localhost': warnings.warn("The use of 'localhost' as a socket host can " diff --git a/lib/cherrypy/_cpconfig.py b/lib/cherrypy/_cpconfig.py index 8e3fd612..c22937d3 100644 --- a/lib/cherrypy/_cpconfig.py +++ b/lib/cherrypy/_cpconfig.py @@ -1,5 +1,4 @@ -""" -Configuration system for CherryPy. +"""Configuration system for CherryPy. Configuration in CherryPy is implemented via dictionaries. Keys are strings which name the mapped value, which may be of any type. @@ -132,8 +131,8 @@ def _if_filename_register_autoreload(ob): def merge(base, other): """Merge one app config (from a dict, file, or filename) into another. - If the given config is a filename, it will be appended to - the list of files to monitor for "autoreload" changes. + If the given config is a filename, it will be appended to the list + of files to monitor for "autoreload" changes. """ _if_filename_register_autoreload(other) diff --git a/lib/cherrypy/_cpdispatch.py b/lib/cherrypy/_cpdispatch.py index 5c506e99..5a3a8ad6 100644 --- a/lib/cherrypy/_cpdispatch.py +++ b/lib/cherrypy/_cpdispatch.py @@ -1,9 +1,10 @@ """CherryPy dispatchers. A 'dispatcher' is the object which looks up the 'page handler' callable -and collects config for the current request based on the path_info, other -request attributes, and the application architecture. The core calls the -dispatcher as early as possible, passing it a 'path_info' argument. +and collects config for the current request based on the path_info, +other request attributes, and the application architecture. The core +calls the dispatcher as early as possible, passing it a 'path_info' +argument. The default dispatcher discovers the page handler by matching path_info to a hierarchical arrangement of objects, starting at request.app.root. @@ -21,7 +22,6 @@ import cherrypy class PageHandler(object): - """Callable which sets response.body.""" def __init__(self, callable, *args, **kwargs): @@ -64,8 +64,7 @@ class PageHandler(object): def test_callable_spec(callable, callable_args, callable_kwargs): - """ - Inspect callable and test to see if the given args are suitable for it. + """Inspect callable and test to see if the given args are suitable for it. When an error occurs during the handler's invoking stage there are 2 erroneous cases: @@ -252,16 +251,16 @@ else: class Dispatcher(object): - """CherryPy Dispatcher which walks a tree of objects to find a handler. - The tree is rooted at cherrypy.request.app.root, and each hierarchical - component in the path_info argument is matched to a corresponding nested - attribute of the root object. Matching handlers must have an 'exposed' - attribute which evaluates to True. The special method name "index" - matches a URI which ends in a slash ("/"). The special method name - "default" may match a portion of the path_info (but only when no longer - substring of the path_info matches some other object). + The tree is rooted at cherrypy.request.app.root, and each + hierarchical component in the path_info argument is matched to a + corresponding nested attribute of the root object. Matching handlers + must have an 'exposed' attribute which evaluates to True. The + special method name "index" matches a URI which ends in a slash + ("/"). The special method name "default" may match a portion of the + path_info (but only when no longer substring of the path_info + matches some other object). This is the default, built-in dispatcher for CherryPy. """ @@ -306,9 +305,9 @@ class Dispatcher(object): The second object returned will be a list of names which are 'virtual path' components: parts of the URL which are dynamic, - and were not used when looking up the handler. - These virtual path components are passed to the handler as - positional arguments. + and were not used when looking up the handler. These virtual + path components are passed to the handler as positional + arguments. """ request = cherrypy.serving.request app = request.app @@ -448,13 +447,11 @@ class Dispatcher(object): class MethodDispatcher(Dispatcher): - """Additional dispatch based on cherrypy.request.method.upper(). - Methods named GET, POST, etc will be called on an exposed class. - The method names must be all caps; the appropriate Allow header - will be output showing all capitalized method names as allowable - HTTP verbs. + Methods named GET, POST, etc will be called on an exposed class. The + method names must be all caps; the appropriate Allow header will be + output showing all capitalized method names as allowable HTTP verbs. Note that the containing class must be exposed, not the methods. """ @@ -492,16 +489,14 @@ class MethodDispatcher(Dispatcher): class RoutesDispatcher(object): - """A Routes based dispatcher for CherryPy.""" def __init__(self, full_result=False, **mapper_options): - """ - Routes dispatcher + """Routes dispatcher. - Set full_result to True if you wish the controller - and the action to be passed on to the page handler - parameters. By default they won't be. + Set full_result to True if you wish the controller and the + action to be passed on to the page handler parameters. By + default they won't be. """ import routes self.full_result = full_result @@ -617,8 +612,7 @@ def XMLRPCDispatcher(next_dispatcher=Dispatcher()): def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domains): - """ - Select a different handler based on the Host header. + """Select a different handler based on the Host header. This can be useful when running multiple sites within one CP server. It allows several domains to point to different parts of a single diff --git a/lib/cherrypy/_cperror.py b/lib/cherrypy/_cperror.py index f6ff2913..203fabf5 100644 --- a/lib/cherrypy/_cperror.py +++ b/lib/cherrypy/_cperror.py @@ -136,19 +136,17 @@ from cherrypy.lib import httputil as _httputil class CherryPyException(Exception): - """A base class for CherryPy exceptions.""" pass class InternalRedirect(CherryPyException): - """Exception raised to switch to the handler for a different URL. - This exception will redirect processing to another path within the site - (without informing the client). Provide the new path as an argument when - raising the exception. Provide any params in the querystring for the new - URL. + This exception will redirect processing to another path within the + site (without informing the client). Provide the new path as an + argument when raising the exception. Provide any params in the + querystring for the new URL. """ def __init__(self, path, query_string=''): @@ -173,7 +171,6 @@ class InternalRedirect(CherryPyException): class HTTPRedirect(CherryPyException): - """Exception raised when the request should be redirected. This exception will force a HTTP redirect to the URL or URL's you give it. @@ -202,7 +199,7 @@ class HTTPRedirect(CherryPyException): """The list of URL's to emit.""" encoding = 'utf-8' - """The encoding when passed urls are not native strings""" + """The encoding when passed urls are not native strings.""" def __init__(self, urls, status=None, encoding=None): self.urls = abs_urls = [ @@ -230,8 +227,7 @@ class HTTPRedirect(CherryPyException): @classproperty def default_status(cls): - """ - The default redirect status for the request. + """The default redirect status for the request. RFC 2616 indicates a 301 response code fits our goal; however, browser support for 301 is quite messy. Use 302/303 instead. See @@ -249,8 +245,9 @@ class HTTPRedirect(CherryPyException): """Modify cherrypy.response status, headers, and body to represent self. - CherryPy uses this internally, but you can also use it to create an - HTTPRedirect object and set its output without *raising* the exception. + CherryPy uses this internally, but you can also use it to create + an HTTPRedirect object and set its output without *raising* the + exception. """ response = cherrypy.serving.response response.status = status = self.status @@ -339,7 +336,6 @@ def clean_headers(status): class HTTPError(CherryPyException): - """Exception used to return an HTTP error code (4xx-5xx) to the client. This exception can be used to automatically send a response using a @@ -358,7 +354,9 @@ class HTTPError(CherryPyException): """ status = None - """The HTTP status code. May be of type int or str (with a Reason-Phrase). + """The HTTP status code. + + May be of type int or str (with a Reason-Phrase). """ code = None @@ -386,8 +384,9 @@ class HTTPError(CherryPyException): """Modify cherrypy.response status, headers, and body to represent self. - CherryPy uses this internally, but you can also use it to create an - HTTPError object and set its output without *raising* the exception. + CherryPy uses this internally, but you can also use it to create + an HTTPError object and set its output without *raising* the + exception. """ response = cherrypy.serving.response @@ -426,11 +425,10 @@ class HTTPError(CherryPyException): class NotFound(HTTPError): - """Exception raised when a URL could not be mapped to any handler (404). - This is equivalent to raising - :class:`HTTPError("404 Not Found") `. + This is equivalent to raising :class:`HTTPError("404 Not Found") + `. """ def __init__(self, path=None): @@ -477,8 +475,8 @@ _HTTPErrorTemplate = ''' cherrypy.server -> HTTPServer. + cheroot has been designed to not reference CherryPy in any way, so + that it can be used in other frameworks and applications. Therefore, + we wrap it here, so we can apply some attributes from config -> + cherrypy.server -> HTTPServer. """ def __init__(self, server_adapter=cherrypy.server): diff --git a/lib/cherrypy/_cpreqbody.py b/lib/cherrypy/_cpreqbody.py index 4d3cefe7..7e0d98be 100644 --- a/lib/cherrypy/_cpreqbody.py +++ b/lib/cherrypy/_cpreqbody.py @@ -248,7 +248,10 @@ def process_multipart_form_data(entity): def _old_process_multipart(entity): - """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3.""" + """The behavior of 3.2 and lower. + + Deprecated and will be changed in 3.3. + """ process_multipart(entity) params = entity.params @@ -277,7 +280,6 @@ def _old_process_multipart(entity): # -------------------------------- Entities --------------------------------- # class Entity(object): - """An HTTP request body, or MIME multipart body. This class collects information about the HTTP request entity. When a @@ -346,13 +348,15 @@ class Entity(object): content_type = None """The value of the Content-Type request header. - If the Entity is part of a multipart payload, this will be the Content-Type - given in the MIME headers for this part. + If the Entity is part of a multipart payload, this will be the + Content-Type given in the MIME headers for this part. """ default_content_type = 'application/x-www-form-urlencoded' """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the + is given. + + The empty string is used for RequestBody, which results in the request body not being read or parsed at all. This is by design; a missing ``Content-Type`` header in the HTTP request entity is an error at best, and a security hole at worst. For multipart parts, however, the MIME spec @@ -402,8 +406,8 @@ class Entity(object): part_class = None """The class used for multipart parts. - You can replace this with custom subclasses to alter the processing of - multipart parts. + You can replace this with custom subclasses to alter the processing + of multipart parts. """ def __init__(self, fp, headers, params=None, parts=None): @@ -509,7 +513,8 @@ class Entity(object): """Return a file-like object into which the request body will be read. By default, this will return a TemporaryFile. Override as needed. - See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`.""" + See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`. + """ return tempfile.TemporaryFile() def fullvalue(self): @@ -525,7 +530,7 @@ class Entity(object): return value def decode_entity(self, value): - """Return a given byte encoded value as a string""" + """Return a given byte encoded value as a string.""" for charset in self.attempt_charsets: try: value = value.decode(charset) @@ -569,7 +574,6 @@ class Entity(object): class Part(Entity): - """A MIME part entity, part of a multipart entity.""" # "The default character set, which must be assumed in the absence of a @@ -653,8 +657,8 @@ class Part(Entity): def read_lines_to_boundary(self, fp_out=None): """Read bytes from self.fp and return or write them to a file. - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. + If the 'fp_out' argument is None (the default), all bytes read + are returned in a single byte string. If the 'fp_out' argument is not None, it must be a file-like object that supports the 'write' method; all bytes read will be @@ -755,15 +759,15 @@ class SizedReader: def read(self, size=None, fp_out=None): """Read bytes from the request body and return or write them to a file. - A number of bytes less than or equal to the 'size' argument are read - off the socket. The actual number of bytes read are tracked in - self.bytes_read. The number may be smaller than 'size' when 1) the - client sends fewer bytes, 2) the 'Content-Length' request header - specifies fewer bytes than requested, or 3) the number of bytes read - exceeds self.maxbytes (in which case, 413 is raised). + A number of bytes less than or equal to the 'size' argument are + read off the socket. The actual number of bytes read are tracked + in self.bytes_read. The number may be smaller than 'size' when + 1) the client sends fewer bytes, 2) the 'Content-Length' request + header specifies fewer bytes than requested, or 3) the number of + bytes read exceeds self.maxbytes (in which case, 413 is raised). - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. + If the 'fp_out' argument is None (the default), all bytes read + are returned in a single byte string. If the 'fp_out' argument is not None, it must be a file-like object that supports the 'write' method; all bytes read will be @@ -918,7 +922,6 @@ class SizedReader: class RequestBody(Entity): - """The entity of the HTTP request.""" bufsize = 8 * 1024 diff --git a/lib/cherrypy/_cprequest.py b/lib/cherrypy/_cprequest.py index a661112c..a4ad298b 100644 --- a/lib/cherrypy/_cprequest.py +++ b/lib/cherrypy/_cprequest.py @@ -16,7 +16,6 @@ from cherrypy.lib import httputil, reprconf, encoding class Hook(object): - """A callback and its metadata: failsafe, priority, and kwargs.""" callback = None @@ -30,10 +29,12 @@ class Hook(object): from the same call point raise exceptions.""" priority = 50 + """Defines the order of execution for a list of Hooks. + + Priority numbers should be limited to the closed interval [0, 100], + but values outside this range are acceptable, as are fractional + values. """ - Defines the order of execution for a list of Hooks. Priority numbers - should be limited to the closed interval [0, 100], but values outside - this range are acceptable, as are fractional values.""" kwargs = {} """ @@ -74,7 +75,6 @@ class Hook(object): class HookMap(dict): - """A map of call points to lists of callbacks (Hook objects).""" def __new__(cls, points=None): @@ -190,23 +190,23 @@ hookpoints = ['on_start_resource', 'before_request_body', class Request(object): - """An HTTP request. - This object represents the metadata of an HTTP request message; - that is, it contains attributes which describe the environment - in which the request URL, headers, and body were sent (if you - want tools to interpret the headers and body, those are elsewhere, - mostly in Tools). This 'metadata' consists of socket data, - transport characteristics, and the Request-Line. This object - also contains data regarding the configuration in effect for - the given URL, and the execution plan for generating a response. + This object represents the metadata of an HTTP request message; that + is, it contains attributes which describe the environment in which + the request URL, headers, and body were sent (if you want tools to + interpret the headers and body, those are elsewhere, mostly in + Tools). This 'metadata' consists of socket data, transport + characteristics, and the Request-Line. This object also contains + data regarding the configuration in effect for the given URL, and + the execution plan for generating a response. """ prev = None + """The previous Request object (if any). + + This should be None unless we are processing an InternalRedirect. """ - The previous Request object (if any). This should be None - unless we are processing an InternalRedirect.""" # Conversation/connection attributes local = httputil.Host('127.0.0.1', 80) @@ -216,9 +216,10 @@ class Request(object): 'An httputil.Host(ip, port, hostname) object for the client socket.' scheme = 'http' + """The protocol used between client and server. + + In most cases, this will be either 'http' or 'https'. """ - The protocol used between client and server. In most cases, - this will be either 'http' or 'https'.""" server_protocol = 'HTTP/1.1' """ @@ -227,25 +228,30 @@ class Request(object): base = '' """The (scheme://host) portion of the requested URL. + In some cases (e.g. when proxying via mod_rewrite), this may contain path segments which cherrypy.url uses when constructing url's, but - which otherwise are ignored by CherryPy. Regardless, this value - MUST NOT end in a slash.""" + which otherwise are ignored by CherryPy. Regardless, this value MUST + NOT end in a slash. + """ # Request-Line attributes request_line = '' + """The complete Request-Line received from the client. + + This is a single string consisting of the request method, URI, and + protocol version (joined by spaces). Any final CRLF is removed. """ - The complete Request-Line received from the client. This is a - single string consisting of the request method, URI, and protocol - version (joined by spaces). Any final CRLF is removed.""" method = 'GET' + """Indicates the HTTP method to be performed on the resource identified by + the Request-URI. + + Common methods include GET, HEAD, POST, PUT, and DELETE. CherryPy + allows any extension method; however, various HTTP servers and + gateways may restrict the set of allowable methods. CherryPy + applications SHOULD restrict the set (on a per-URI basis). """ - Indicates the HTTP method to be performed on the resource identified - by the Request-URI. Common methods include GET, HEAD, POST, PUT, and - DELETE. CherryPy allows any extension method; however, various HTTP - servers and gateways may restrict the set of allowable methods. - CherryPy applications SHOULD restrict the set (on a per-URI basis).""" query_string = '' """ @@ -277,22 +283,26 @@ class Request(object): A dict which combines query string (GET) and request entity (POST) variables. This is populated in two stages: GET params are added before the 'on_start_resource' hook, and POST params are added - between the 'before_request_body' and 'before_handler' hooks.""" + between the 'before_request_body' and 'before_handler' hooks. + """ # Message attributes header_list = [] + """A list of the HTTP request headers as (name, value) tuples. + + In general, you should use request.headers (a dict) instead. """ - A list of the HTTP request headers as (name, value) tuples. - In general, you should use request.headers (a dict) instead.""" headers = httputil.HeaderMap() - """ - A dict-like object containing the request headers. Keys are header + """A dict-like object containing the request headers. + + Keys are header names (in Title-Case format); however, you may get and set them in a case-insensitive manner. That is, headers['Content-Type'] and headers['content-type'] refer to the same value. Values are header values (decoded according to :rfc:`2047` if necessary). See also: - httputil.HeaderMap, httputil.HeaderElement.""" + httputil.HeaderMap, httputil.HeaderElement. + """ cookie = SimpleCookie() """See help(Cookie).""" @@ -336,7 +346,8 @@ class Request(object): or multipart, this will be None. Otherwise, this will be an instance of :class:`RequestBody` (which you can .read()); this value is set between the 'before_request_body' and - 'before_handler' hooks (assuming that process_request_body is True).""" + 'before_handler' hooks (assuming that process_request_body is True). + """ # Dispatch attributes dispatch = cherrypy.dispatch.Dispatcher() @@ -347,23 +358,24 @@ class Request(object): calls the dispatcher as early as possible, passing it a 'path_info' argument. - The default dispatcher discovers the page handler by matching path_info - to a hierarchical arrangement of objects, starting at request.app.root. - See help(cherrypy.dispatch) for more information.""" + The default dispatcher discovers the page handler by matching + path_info to a hierarchical arrangement of objects, starting at + request.app.root. See help(cherrypy.dispatch) for more information. + """ script_name = '' - """ - The 'mount point' of the application which is handling this request. + """The 'mount point' of the application which is handling this request. This attribute MUST NOT end in a slash. If the script_name refers to the root of the URI, it MUST be an empty string (not "/"). """ path_info = '/' + """The 'relative path' portion of the Request-URI. + + This is relative to the script_name ('mount point') of the + application which is handling this request. """ - The 'relative path' portion of the Request-URI. This is relative - to the script_name ('mount point') of the application which is - handling this request.""" login = None """ @@ -391,14 +403,16 @@ class Request(object): of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" config = None + """A flat dict of all configuration entries which apply to the current + request. + + These entries are collected from global config, application config + (based on request.path_info), and from handler config (exactly how + is governed by the request.dispatch object in effect for this + request; by default, handler config can be attached anywhere in the + tree between request.app.root and the final handler, and inherits + downward). """ - A flat dict of all configuration entries which apply to the - current request. These entries are collected from global config, - application config (based on request.path_info), and from handler - config (exactly how is governed by the request.dispatch object in - effect for this request; by default, handler config can be attached - anywhere in the tree between request.app.root and the final handler, - and inherits downward).""" is_index = None """ @@ -409,13 +423,14 @@ class Request(object): the trailing slash. See cherrypy.tools.trailing_slash.""" hooks = HookMap(hookpoints) - """ - A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. + """A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. + Each key is a str naming the hook point, and each value is a list of hooks which will be called at that hook point during this request. The list of hooks is generally populated as early as possible (mostly from Tools specified in config), but may be extended at any time. - See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" + See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools. + """ error_response = cherrypy.HTTPError(500).set_response """ @@ -428,12 +443,11 @@ class Request(object): error response to the user-agent.""" error_page = {} - """ - A dict of {error code: response filename or callable} pairs. + """A dict of {error code: response filename or callable} pairs. The error code must be an int representing a given HTTP error code, - or the string 'default', which will be used if no matching entry - is found for a given numeric code. + or the string 'default', which will be used if no matching entry is + found for a given numeric code. If a filename is provided, the file should contain a Python string- formatting template, and can expect by default to receive format @@ -447,8 +461,8 @@ class Request(object): iterable of strings which will be set to response.body. It may also override headers or perform any other processing. - If no entry is given for an error code, and no 'default' entry exists, - a default template will be used. + If no entry is given for an error code, and no 'default' entry + exists, a default template will be used. """ show_tracebacks = True @@ -473,9 +487,10 @@ class Request(object): """True once the close method has been called, False otherwise.""" stage = None + """A string containing the stage reached in the request-handling process. + + This is useful when debugging a live server with hung requests. """ - A string containing the stage reached in the request-handling process. - This is useful when debugging a live server with hung requests.""" unique_id = None """A lazy object generating and memorizing UUID4 on ``str()`` render.""" @@ -492,9 +507,10 @@ class Request(object): server_protocol='HTTP/1.1'): """Populate a new Request object. - local_host should be an httputil.Host object with the server info. - remote_host should be an httputil.Host object with the client info. - scheme should be a string, either "http" or "https". + local_host should be an httputil.Host object with the server + info. remote_host should be an httputil.Host object with the + client info. scheme should be a string, either "http" or + "https". """ self.local = local_host self.remote = remote_host @@ -514,7 +530,10 @@ class Request(object): self.unique_id = LazyUUID4() def close(self): - """Run cleanup code. (Core)""" + """Run cleanup code. + + (Core) + """ if not self.closed: self.closed = True self.stage = 'on_end_request' @@ -551,7 +570,6 @@ class Request(object): Consumer code (HTTP servers) should then access these response attributes to build the outbound stream. - """ response = cherrypy.serving.response self.stage = 'run' @@ -631,7 +649,10 @@ class Request(object): return response def respond(self, path_info): - """Generate a response for the resource at self.path_info. (Core)""" + """Generate a response for the resource at self.path_info. + + (Core) + """ try: try: try: @@ -702,7 +723,10 @@ class Request(object): response.finalize() def process_query_string(self): - """Parse the query string into Python structures. (Core)""" + """Parse the query string into Python structures. + + (Core) + """ try: p = httputil.parse_query_string( self.query_string, encoding=self.query_string_encoding) @@ -715,7 +739,10 @@ class Request(object): self.params.update(p) def process_headers(self): - """Parse HTTP header data into Python structures. (Core)""" + """Parse HTTP header data into Python structures. + + (Core) + """ # Process the headers into self.headers headers = self.headers for name, value in self.header_list: @@ -751,7 +778,10 @@ class Request(object): self.base = '%s://%s' % (self.scheme, host) def get_resource(self, path): - """Call a dispatcher (which sets self.handler and .config). (Core)""" + """Call a dispatcher (which sets self.handler and .config). + + (Core) + """ # First, see if there is a custom dispatch at this URI. Custom # dispatchers can only be specified in app.config, not in _cp_config # (since custom dispatchers may not even have an app.root). @@ -762,7 +792,10 @@ class Request(object): dispatch(path) def handle_error(self): - """Handle the last unanticipated exception. (Core)""" + """Handle the last unanticipated exception. + + (Core) + """ try: self.hooks.run('before_error_response') if self.error_response: @@ -776,7 +809,6 @@ class Request(object): class ResponseBody(object): - """The body of the HTTP response (the response entity).""" unicode_err = ('Page handlers MUST return bytes. Use tools.encode ' @@ -802,18 +834,18 @@ class ResponseBody(object): class Response(object): - """An HTTP Response, including status, headers, and body.""" status = '' """The HTTP Status-Code and Reason-Phrase.""" header_list = [] - """ - A list of the HTTP response headers as (name, value) tuples. + """A list of the HTTP response headers as (name, value) tuples. + In general, you should use response.headers (a dict) instead. This attribute is generated from response.headers and is not valid until - after the finalize phase.""" + after the finalize phase. + """ headers = httputil.HeaderMap() """ @@ -833,7 +865,10 @@ class Response(object): """The body (entity) of the HTTP response.""" time = None - """The value of time.time() when created. Use in HTTP dates.""" + """The value of time.time() when created. + + Use in HTTP dates. + """ stream = False """If False, buffer the response body.""" @@ -861,15 +896,15 @@ class Response(object): return new_body def _flush_body(self): - """ - Discard self.body but consume any generator such that - any finalization can occur, such as is required by - caching.tee_output(). - """ + """Discard self.body but consume any generator such that any + finalization can occur, such as is required by caching.tee_output().""" consume(iter(self.body)) def finalize(self): - """Transform headers (and cookies) into self.header_list. (Core)""" + """Transform headers (and cookies) into self.header_list. + + (Core) + """ try: code, reason, _ = httputil.valid_status(self.status) except ValueError: diff --git a/lib/cherrypy/_cpserver.py b/lib/cherrypy/_cpserver.py index 5f8d98fa..62331673 100644 --- a/lib/cherrypy/_cpserver.py +++ b/lib/cherrypy/_cpserver.py @@ -50,7 +50,8 @@ class Server(ServerAdapter): """If given, the name of the UNIX socket to use instead of TCP/IP. When this option is not None, the `socket_host` and `socket_port` options - are ignored.""" + are ignored. + """ socket_queue_size = 5 """The 'backlog' argument to socket.listen(); specifies the maximum number @@ -79,17 +80,24 @@ class Server(ServerAdapter): """The number of worker threads to start up in the pool.""" thread_pool_max = -1 - """The maximum size of the worker-thread pool. Use -1 to indicate no limit. + """The maximum size of the worker-thread pool. + + Use -1 to indicate no limit. """ max_request_header_size = 500 * 1024 """The maximum number of bytes allowable in the request headers. - If exceeded, the HTTP server should return "413 Request Entity Too Large". + + If exceeded, the HTTP server should return "413 Request Entity Too + Large". """ max_request_body_size = 100 * 1024 * 1024 - """The maximum number of bytes allowable in the request body. If exceeded, - the HTTP server should return "413 Request Entity Too Large".""" + """The maximum number of bytes allowable in the request body. + + If exceeded, the HTTP server should return "413 Request Entity Too + Large". + """ instance = None """If not None, this should be an HTTP server instance (such as @@ -119,7 +127,8 @@ class Server(ServerAdapter): the builtin WSGI server. Builtin options are: 'builtin' (to use the SSL library built into recent versions of Python). You may also register your own classes in the - cheroot.server.ssl_adapters dict.""" + cheroot.server.ssl_adapters dict. + """ statistics = False """Turns statistics-gathering on or off for aware HTTP servers.""" @@ -129,11 +138,13 @@ class Server(ServerAdapter): wsgi_version = (1, 0) """The WSGI version tuple to use with the builtin WSGI server. - The provided options are (1, 0) [which includes support for PEP 3333, - which declares it covers WSGI version 1.0.1 but still mandates the - wsgi.version (1, 0)] and ('u', 0), an experimental unicode version. - You may create and register your own experimental versions of the WSGI - protocol by adding custom classes to the cheroot.server.wsgi_gateways dict. + + The provided options are (1, 0) [which includes support for PEP + 3333, which declares it covers WSGI version 1.0.1 but still mandates + the wsgi.version (1, 0)] and ('u', 0), an experimental unicode + version. You may create and register your own experimental versions + of the WSGI protocol by adding custom classes to the + cheroot.server.wsgi_gateways dict. """ peercreds = False @@ -184,7 +195,8 @@ class Server(ServerAdapter): def bind_addr(self): """Return bind address. - A (host, port) tuple for TCP sockets or a str for Unix domain sockts. + A (host, port) tuple for TCP sockets or a str for Unix domain + sockets. """ if self.socket_file: return self.socket_file diff --git a/lib/cherrypy/_cptools.py b/lib/cherrypy/_cptools.py index 716f99a4..e47c046e 100644 --- a/lib/cherrypy/_cptools.py +++ b/lib/cherrypy/_cptools.py @@ -1,7 +1,7 @@ """CherryPy tools. A "tool" is any helper, adapted to CP. -Tools are usually designed to be used in a variety of ways (although some -may only offer one if they choose): +Tools are usually designed to be used in a variety of ways (although +some may only offer one if they choose): Library calls All tools are callables that can be used wherever needed. @@ -48,10 +48,10 @@ _attr_error = ( class Tool(object): - """A registered function for use with CherryPy request-processing hooks. - help(tool.callable) should give you more information about this Tool. + help(tool.callable) should give you more information about this + Tool. """ namespace = 'tools' @@ -135,8 +135,8 @@ class Tool(object): def _setup(self): """Hook this tool into cherrypy.request. - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. + The standard CherryPy request object will automatically call + this method when the tool is "turned on" in config. """ conf = self._merged_args() p = conf.pop('priority', None) @@ -147,15 +147,15 @@ class Tool(object): class HandlerTool(Tool): - """Tool which is called 'before main', that may skip normal handlers. - If the tool successfully handles the request (by setting response.body), - if should return True. This will cause CherryPy to skip any 'normal' page - handler. If the tool did not handle the request, it should return False - to tell CherryPy to continue on and call the normal page handler. If the - tool is declared AS a page handler (see the 'handler' method), returning - False will raise NotFound. + If the tool successfully handles the request (by setting + response.body), if should return True. This will cause CherryPy to + skip any 'normal' page handler. If the tool did not handle the + request, it should return False to tell CherryPy to continue on and + call the normal page handler. If the tool is declared AS a page + handler (see the 'handler' method), returning False will raise + NotFound. """ def __init__(self, callable, name=None): @@ -185,8 +185,8 @@ class HandlerTool(Tool): def _setup(self): """Hook this tool into cherrypy.request. - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. + The standard CherryPy request object will automatically call + this method when the tool is "turned on" in config. """ conf = self._merged_args() p = conf.pop('priority', None) @@ -197,7 +197,6 @@ class HandlerTool(Tool): class HandlerWrapperTool(Tool): - """Tool which wraps request.handler in a provided wrapper function. The 'newhandler' arg must be a handler wrapper function that takes a @@ -232,7 +231,6 @@ class HandlerWrapperTool(Tool): class ErrorTool(Tool): - """Tool which is used to replace the default request.error_response.""" def __init__(self, callable, name=None): @@ -244,8 +242,8 @@ class ErrorTool(Tool): def _setup(self): """Hook this tool into cherrypy.request. - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. + The standard CherryPy request object will automatically call + this method when the tool is "turned on" in config. """ cherrypy.serving.request.error_response = self._wrapper @@ -254,7 +252,6 @@ class ErrorTool(Tool): class SessionTool(Tool): - """Session Tool for CherryPy. sessions.locking @@ -282,8 +279,8 @@ class SessionTool(Tool): def _setup(self): """Hook this tool into cherrypy.request. - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. + The standard CherryPy request object will automatically call + this method when the tool is "turned on" in config. """ hooks = cherrypy.serving.request.hooks @@ -325,7 +322,6 @@ class SessionTool(Tool): class XMLRPCController(object): - """A Controller (page handler collection) for XML-RPC. To use it, have your controllers subclass this base class (it will @@ -392,7 +388,6 @@ class SessionAuthTool(HandlerTool): class CachingTool(Tool): - """Caching Tool for CherryPy.""" def _wrapper(self, **kwargs): @@ -416,11 +411,11 @@ class CachingTool(Tool): class Toolbox(object): - """A collection of Tools. This object also functions as a config namespace handler for itself. - Custom toolboxes should be added to each Application's toolboxes dict. + Custom toolboxes should be added to each Application's toolboxes + dict. """ def __init__(self, namespace): diff --git a/lib/cherrypy/_cptree.py b/lib/cherrypy/_cptree.py index 917c5b1a..3dea1c29 100644 --- a/lib/cherrypy/_cptree.py +++ b/lib/cherrypy/_cptree.py @@ -10,19 +10,22 @@ from cherrypy.lib import httputil, reprconf class Application(object): """A CherryPy Application. - Servers and gateways should not instantiate Request objects directly. - Instead, they should ask an Application object for a request object. + Servers and gateways should not instantiate Request objects + directly. Instead, they should ask an Application object for a + request object. - An instance of this class may also be used as a WSGI callable - (WSGI application object) for itself. + An instance of this class may also be used as a WSGI callable (WSGI + application object) for itself. """ root = None - """The top-most container of page handlers for this app. Handlers should - be arranged in a hierarchy of attributes, matching the expected URI - hierarchy; the default dispatcher then searches this hierarchy for a - matching handler. When using a dispatcher other than the default, - this value may be None.""" + """The top-most container of page handlers for this app. + + Handlers should be arranged in a hierarchy of attributes, matching + the expected URI hierarchy; the default dispatcher then searches + this hierarchy for a matching handler. When using a dispatcher other + than the default, this value may be None. + """ config = {} """A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict @@ -32,10 +35,16 @@ class Application(object): toolboxes = {'tools': cherrypy.tools} log = None - """A LogManager instance. See _cplogging.""" + """A LogManager instance. + + See _cplogging. + """ wsgiapp = None - """A CPWSGIApp instance. See _cpwsgi.""" + """A CPWSGIApp instance. + + See _cpwsgi. + """ request_class = _cprequest.Request response_class = _cprequest.Response @@ -82,12 +91,15 @@ class Application(object): def script_name(self): # noqa: D401; irrelevant for properties """The URI "mount point" for this app. - A mount point is that portion of the URI which is constant for all URIs - that are serviced by this application; it does not include scheme, - host, or proxy ("virtual host") portions of the URI. + A mount point is that portion of the URI which is constant for + all URIs that are serviced by this application; it does not + include scheme, host, or proxy ("virtual host") portions of the + URI. - For example, if script_name is "/my/cool/app", then the URL - "http://www.example.com/my/cool/app/page1" might be handled by a + For example, if script_name is "/my/cool/app", then the URL " + + http://www.example.com/my/cool/app/page1" + might be handled by a "page1" method on the root object. The value of script_name MUST NOT end in a slash. If the script_name @@ -171,9 +183,9 @@ class Application(object): class Tree(object): """A registry of CherryPy applications, mounted at diverse points. - An instance of this class may also be used as a WSGI callable - (WSGI application object), in which case it dispatches to all - mounted apps. + An instance of this class may also be used as a WSGI callable (WSGI + application object), in which case it dispatches to all mounted + apps. """ apps = {} diff --git a/lib/cherrypy/_cpwsgi.py b/lib/cherrypy/_cpwsgi.py index b4f55fd6..b2a6da52 100644 --- a/lib/cherrypy/_cpwsgi.py +++ b/lib/cherrypy/_cpwsgi.py @@ -1,10 +1,10 @@ """WSGI interface (see PEP 333 and 3333). Note that WSGI environ keys and values are 'native strings'; that is, -whatever the type of "" is. For Python 2, that's a byte string; for Python 3, -it's a unicode string. But PEP 3333 says: "even if Python's str type is -actually Unicode "under the hood", the content of native strings must -still be translatable to bytes via the Latin-1 encoding!" +whatever the type of "" is. For Python 2, that's a byte string; for +Python 3, it's a unicode string. But PEP 3333 says: "even if Python's +str type is actually Unicode "under the hood", the content of native +strings must still be translatable to bytes via the Latin-1 encoding!" """ import sys as _sys @@ -34,7 +34,6 @@ def downgrade_wsgi_ux_to_1x(environ): class VirtualHost(object): - """Select a different WSGI application based on the Host header. This can be useful when running multiple sites within one CP server. @@ -56,7 +55,10 @@ class VirtualHost(object): cherrypy.tree.graft(vhost) """ default = None - """Required. The default WSGI application.""" + """Required. + + The default WSGI application. + """ use_x_forwarded_host = True """If True (the default), any "X-Forwarded-Host" @@ -65,11 +67,12 @@ class VirtualHost(object): domains = {} """A dict of {host header value: application} pairs. - The incoming "Host" request header is looked up in this dict, - and, if a match is found, the corresponding WSGI application - will be called instead of the default. Note that you often need - separate entries for "example.com" and "www.example.com". - In addition, "Host" headers may contain the port number. + + The incoming "Host" request header is looked up in this dict, and, + if a match is found, the corresponding WSGI application will be + called instead of the default. Note that you often need separate + entries for "example.com" and "www.example.com". In addition, "Host" + headers may contain the port number. """ def __init__(self, default, domains=None, use_x_forwarded_host=True): @@ -89,7 +92,6 @@ class VirtualHost(object): class InternalRedirector(object): - """WSGI middleware that handles raised cherrypy.InternalRedirect.""" def __init__(self, nextapp, recursive=False): @@ -137,7 +139,6 @@ class InternalRedirector(object): class ExceptionTrapper(object): - """WSGI middleware that traps exceptions.""" def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)): @@ -226,7 +227,6 @@ class _TrappedResponse(object): class AppResponse(object): - """WSGI response iterable for CherryPy applications.""" def __init__(self, environ, start_response, cpapp): @@ -277,7 +277,10 @@ class AppResponse(object): return next(self.iter_response) def close(self): - """Close and de-reference the current request and response. (Core)""" + """Close and de-reference the current request and response. + + (Core) + """ streaming = _cherrypy.serving.response.stream self.cpapp.release_serving() @@ -380,18 +383,20 @@ class AppResponse(object): class CPWSGIApp(object): - """A WSGI application object for a CherryPy Application.""" pipeline = [ ('ExceptionTrapper', ExceptionTrapper), ('InternalRedirector', InternalRedirector), ] - """A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a - constructor that takes an initial, positional 'nextapp' argument, - plus optional keyword arguments, and returns a WSGI application - (that takes environ and start_response arguments). The 'name' can - be any you choose, and will correspond to keys in self.config.""" + """A list of (name, wsgiapp) pairs. + + Each 'wsgiapp' MUST be a constructor that takes an initial, + positional 'nextapp' argument, plus optional keyword arguments, and + returns a WSGI application (that takes environ and start_response + arguments). The 'name' can be any you choose, and will correspond to + keys in self.config. + """ head = None """Rather than nest all apps in the pipeline on each call, it's only @@ -399,9 +404,12 @@ class CPWSGIApp(object): this to None again if you change self.pipeline after calling self.""" config = {} - """A dict whose keys match names listed in the pipeline. Each - value is a further dict which will be passed to the corresponding - named WSGI callable (from the pipeline) as keyword arguments.""" + """A dict whose keys match names listed in the pipeline. + + Each value is a further dict which will be passed to the + corresponding named WSGI callable (from the pipeline) as keyword + arguments. + """ response_class = AppResponse """The class to instantiate and return as the next app in the WSGI chain. @@ -417,8 +425,8 @@ class CPWSGIApp(object): def tail(self, environ, start_response): """WSGI application callable for the actual CherryPy application. - You probably shouldn't call this; call self.__call__ instead, - so that any WSGI middleware in self.pipeline can run first. + You probably shouldn't call this; call self.__call__ instead, so + that any WSGI middleware in self.pipeline can run first. """ return self.response_class(environ, start_response, self.cpapp) diff --git a/lib/cherrypy/_cpwsgi_server.py b/lib/cherrypy/_cpwsgi_server.py index 11dd846a..b8e96deb 100644 --- a/lib/cherrypy/_cpwsgi_server.py +++ b/lib/cherrypy/_cpwsgi_server.py @@ -1,7 +1,7 @@ -""" -WSGI server interface (see PEP 333). +"""WSGI server interface (see PEP 333). -This adds some CP-specific bits to the framework-agnostic cheroot package. +This adds some CP-specific bits to the framework-agnostic cheroot +package. """ import sys @@ -35,10 +35,11 @@ class CPWSGIHTTPRequest(cheroot.server.HTTPRequest): class CPWSGIServer(cheroot.wsgi.Server): """Wrapper for cheroot.wsgi.Server. - cheroot has been designed to not reference CherryPy in any way, - so that it can be used in other frameworks and applications. Therefore, - we wrap it here, so we can set our own mount points from cherrypy.tree - and apply some attributes from config -> cherrypy.server -> wsgi.Server. + cheroot has been designed to not reference CherryPy in any way, so + that it can be used in other frameworks and applications. Therefore, + we wrap it here, so we can set our own mount points from + cherrypy.tree and apply some attributes from config -> + cherrypy.server -> wsgi.Server. """ fmt = 'CherryPy/{cherrypy.__version__} {cheroot.wsgi.Server.version}' diff --git a/lib/cherrypy/_helper.py b/lib/cherrypy/_helper.py index d57cd1f9..497438eb 100644 --- a/lib/cherrypy/_helper.py +++ b/lib/cherrypy/_helper.py @@ -137,7 +137,6 @@ def popargs(*args, **kwargs): class Root: def index(self): #... - """ # Since keyword arg comes after *args, we have to process it ourselves # for lower versions of python. @@ -201,16 +200,17 @@ def url(path='', qs='', script_name=None, base=None, relative=None): If it does not start with a slash, this returns (base + script_name [+ request.path_info] + path + qs). - If script_name is None, cherrypy.request will be used - to find a script_name, if available. + If script_name is None, cherrypy.request will be used to find a + script_name, if available. If base is None, cherrypy.request.base will be used (if available). Note that you can use cherrypy.tools.proxy to change this. - Finally, note that this function can be used to obtain an absolute URL - for the current request path (minus the querystring) by passing no args. - If you call url(qs=cherrypy.request.query_string), you should get the - original browser URL (assuming no internal redirections). + Finally, note that this function can be used to obtain an absolute + URL for the current request path (minus the querystring) by passing + no args. If you call url(qs=cherrypy.request.query_string), you + should get the original browser URL (assuming no internal + redirections). If relative is None or not provided, request.app.relative_urls will be used (if available, else False). If False, the output will be an @@ -320,8 +320,8 @@ def normalize_path(path): class _ClassPropertyDescriptor(object): """Descript for read-only class-based property. - Turns a classmethod-decorated func into a read-only property of that class - type (means the value cannot be set). + Turns a classmethod-decorated func into a read-only property of that + class type (means the value cannot be set). """ def __init__(self, fget, fset=None): diff --git a/lib/cherrypy/_json.py b/lib/cherrypy/_json.py index 0c2a0f0e..4ef85580 100644 --- a/lib/cherrypy/_json.py +++ b/lib/cherrypy/_json.py @@ -1,5 +1,4 @@ -""" -JSON support. +"""JSON support. Expose preferred json module as json and provide encode/decode convenience functions. diff --git a/lib/cherrypy/lib/__init__.py b/lib/cherrypy/lib/__init__.py index 0edaaf20..9788ffdf 100644 --- a/lib/cherrypy/lib/__init__.py +++ b/lib/cherrypy/lib/__init__.py @@ -6,8 +6,8 @@ def is_iterator(obj): (i.e. like a generator). - This will return False for objects which are iterable, - but not iterators themselves. + This will return False for objects which are iterable, but not + iterators themselves. """ from types import GeneratorType if isinstance(obj, GeneratorType): diff --git a/lib/cherrypy/lib/auth_basic.py b/lib/cherrypy/lib/auth_basic.py index ad379a26..b938a560 100644 --- a/lib/cherrypy/lib/auth_basic.py +++ b/lib/cherrypy/lib/auth_basic.py @@ -18,7 +18,6 @@ as the credentials store:: 'tools.auth_basic.accept_charset': 'UTF-8', } app_config = { '/' : basic_auth } - """ import binascii diff --git a/lib/cherrypy/lib/auth_digest.py b/lib/cherrypy/lib/auth_digest.py index 981e9a5d..46749268 100644 --- a/lib/cherrypy/lib/auth_digest.py +++ b/lib/cherrypy/lib/auth_digest.py @@ -55,7 +55,7 @@ def TRACE(msg): def get_ha1_dict_plain(user_password_dict): - """Returns a get_ha1 function which obtains a plaintext password from a + """Return a get_ha1 function which obtains a plaintext password from a dictionary of the form: {username : password}. If you want a simple dictionary-based authentication scheme, with plaintext @@ -72,7 +72,7 @@ def get_ha1_dict_plain(user_password_dict): def get_ha1_dict(user_ha1_dict): - """Returns a get_ha1 function which obtains a HA1 password hash from a + """Return a get_ha1 function which obtains a HA1 password hash from a dictionary of the form: {username : HA1}. If you want a dictionary-based authentication scheme, but with @@ -87,7 +87,7 @@ def get_ha1_dict(user_ha1_dict): def get_ha1_file_htdigest(filename): - """Returns a get_ha1 function which obtains a HA1 password hash from a + """Return a get_ha1 function which obtains a HA1 password hash from a flat file with lines of the same format as that produced by the Apache htdigest utility. For example, for realm 'wonderland', username 'alice', and password '4x5istwelve', the htdigest line would be:: @@ -135,7 +135,7 @@ def synthesize_nonce(s, key, timestamp=None): def H(s): - """The hash function H""" + """The hash function H.""" return md5_hex(s) @@ -259,10 +259,11 @@ class HttpDigestAuthorization(object): return False def is_nonce_stale(self, max_age_seconds=600): - """Returns True if a validated nonce is stale. The nonce contains a - timestamp in plaintext and also a secure hash of the timestamp. - You should first validate the nonce to ensure the plaintext - timestamp is not spoofed. + """Return True if a validated nonce is stale. + + The nonce contains a timestamp in plaintext and also a secure + hash of the timestamp. You should first validate the nonce to + ensure the plaintext timestamp is not spoofed. """ try: timestamp, hashpart = self.nonce.split(':', 1) @@ -275,7 +276,10 @@ class HttpDigestAuthorization(object): return True def HA2(self, entity_body=''): - """Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3.""" + """Return the H(A2) string. + + See :rfc:`2617` section 3.2.2.3. + """ # RFC 2617 3.2.2.3 # If the "qop" directive's value is "auth" or is unspecified, # then A2 is: @@ -306,7 +310,6 @@ class HttpDigestAuthorization(object): 4.3. This refers to the entity the user agent sent in the request which has the Authorization header. Typically GET requests don't have an entity, and POST requests do. - """ ha2 = self.HA2(entity_body) # Request-Digest -- RFC 2617 3.2.2.1 @@ -395,7 +398,6 @@ def digest_auth(realm, get_ha1, key, debug=False, accept_charset='utf-8'): key A secret string known only to the server, used in the synthesis of nonces. - """ request = cherrypy.serving.request @@ -447,9 +449,7 @@ def digest_auth(realm, get_ha1, key, debug=False, accept_charset='utf-8'): def _respond_401(realm, key, accept_charset, debug, **kwargs): - """ - Respond with 401 status and a WWW-Authenticate header - """ + """Respond with 401 status and a WWW-Authenticate header.""" header = www_authenticate( realm, key, accept_charset=accept_charset, diff --git a/lib/cherrypy/lib/caching.py b/lib/cherrypy/lib/caching.py index 08d2d8e4..89cb0442 100644 --- a/lib/cherrypy/lib/caching.py +++ b/lib/cherrypy/lib/caching.py @@ -42,7 +42,6 @@ from cherrypy.lib import cptools, httputil class Cache(object): - """Base class for Cache implementations.""" def get(self): @@ -64,17 +63,16 @@ class Cache(object): # ------------------------------ Memory Cache ------------------------------- # class AntiStampedeCache(dict): - """A storage system for cached items which reduces stampede collisions.""" def wait(self, key, timeout=5, debug=False): """Return the cached value for the given key, or None. - If timeout is not None, and the value is already - being calculated by another thread, wait until the given timeout has - elapsed. If the value is available before the timeout expires, it is - returned. If not, None is returned, and a sentinel placed in the cache - to signal other threads to wait. + If timeout is not None, and the value is already being + calculated by another thread, wait until the given timeout has + elapsed. If the value is available before the timeout expires, + it is returned. If not, None is returned, and a sentinel placed + in the cache to signal other threads to wait. If timeout is None, no waiting is performed nor sentinels used. """ @@ -127,7 +125,6 @@ class AntiStampedeCache(dict): class MemoryCache(Cache): - """An in-memory cache for varying response content. Each key in self.store is a URI, and each value is an AntiStampedeCache. @@ -381,7 +378,10 @@ def get(invalid_methods=('POST', 'PUT', 'DELETE'), debug=False, **kwargs): def tee_output(): - """Tee response output to cache storage. Internal.""" + """Tee response output to cache storage. + + Internal. + """ # Used by CachingTool by attaching to request.hooks request = cherrypy.serving.request @@ -441,7 +441,6 @@ def expires(secs=0, force=False, debug=False): * Expires If any are already present, none of the above response headers are set. - """ response = cherrypy.serving.response diff --git a/lib/cherrypy/lib/cpstats.py b/lib/cherrypy/lib/cpstats.py index 111af063..5dff319b 100644 --- a/lib/cherrypy/lib/cpstats.py +++ b/lib/cherrypy/lib/cpstats.py @@ -184,7 +184,6 @@ To report statistics:: To format statistics reports:: See 'Reporting', above. - """ import logging @@ -254,7 +253,6 @@ def proc_time(s): class ByteCountWrapper(object): - """Wraps a file-like object, counting the number of bytes read.""" def __init__(self, rfile): @@ -307,7 +305,6 @@ def _get_threading_ident(): class StatsTool(cherrypy.Tool): - """Record various information about the current request.""" def __init__(self): @@ -316,8 +313,8 @@ class StatsTool(cherrypy.Tool): def _setup(self): """Hook this tool into cherrypy.request. - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. + The standard CherryPy request object will automatically call + this method when the tool is "turned on" in config. """ if appstats.get('Enabled', False): cherrypy.Tool._setup(self) diff --git a/lib/cherrypy/lib/cptools.py b/lib/cherrypy/lib/cptools.py index 613a8995..61d4d36b 100644 --- a/lib/cherrypy/lib/cptools.py +++ b/lib/cherrypy/lib/cptools.py @@ -94,8 +94,8 @@ def validate_etags(autotags=False, debug=False): def validate_since(): """Validate the current Last-Modified against If-Modified-Since headers. - If no code has set the Last-Modified response header, then no validation - will be performed. + If no code has set the Last-Modified response header, then no + validation will be performed. """ response = cherrypy.serving.response lastmod = response.headers.get('Last-Modified') @@ -123,9 +123,9 @@ def validate_since(): def allow(methods=None, debug=False): """Raise 405 if request.method not in methods (default ['GET', 'HEAD']). - The given methods are case-insensitive, and may be in any order. - If only one method is allowed, you may supply a single string; - if more than one, supply a list of strings. + The given methods are case-insensitive, and may be in any order. If + only one method is allowed, you may supply a single string; if more + than one, supply a list of strings. Regardless of whether the current method is allowed or not, this also emits an 'Allow' response header, containing the given methods. @@ -154,22 +154,23 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', scheme='X-Forwarded-Proto', debug=False): """Change the base URL (scheme://host[:port][/path]). - For running a CP server behind Apache, lighttpd, or other HTTP server. + For running a CP server behind Apache, lighttpd, or other HTTP + server. - For Apache and lighttpd, you should leave the 'local' argument at the - default value of 'X-Forwarded-Host'. For Squid, you probably want to set - tools.proxy.local = 'Origin'. + For Apache and lighttpd, you should leave the 'local' argument at + the default value of 'X-Forwarded-Host'. For Squid, you probably + want to set tools.proxy.local = 'Origin'. - If you want the new request.base to include path info (not just the host), - you must explicitly set base to the full base path, and ALSO set 'local' - to '', so that the X-Forwarded-Host request header (which never includes - path info) does not override it. Regardless, the value for 'base' MUST - NOT end in a slash. + If you want the new request.base to include path info (not just the + host), you must explicitly set base to the full base path, and ALSO + set 'local' to '', so that the X-Forwarded-Host request header + (which never includes path info) does not override it. Regardless, + the value for 'base' MUST NOT end in a slash. cherrypy.request.remote.ip (the IP address of the client) will be - rewritten if the header specified by the 'remote' arg is valid. - By default, 'remote' is set to 'X-Forwarded-For'. If you do not - want to rewrite remote.ip, set the 'remote' arg to an empty string. + rewritten if the header specified by the 'remote' arg is valid. By + default, 'remote' is set to 'X-Forwarded-For'. If you do not want to + rewrite remote.ip, set the 'remote' arg to an empty string. """ request = cherrypy.serving.request @@ -217,8 +218,8 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', def ignore_headers(headers=('Range',), debug=False): """Delete request headers whose field names are included in 'headers'. - This is a useful tool for working behind certain HTTP servers; - for example, Apache duplicates the work that CP does for 'Range' + This is a useful tool for working behind certain HTTP servers; for + example, Apache duplicates the work that CP does for 'Range' headers, and will doubly-truncate the response. """ request = cherrypy.serving.request @@ -281,7 +282,6 @@ def referer(pattern, accept=True, accept_missing=False, error=403, class SessionAuth(object): - """Assert that the user is logged in.""" session_key = 'username' @@ -319,7 +319,10 @@ Message: %(error_msg)s """) % vars()).encode('utf-8') def do_login(self, username, password, from_page='..', **kwargs): - """Login. May raise redirect, or return True if request handled.""" + """Login. + + May raise redirect, or return True if request handled. + """ response = cherrypy.serving.response error_msg = self.check_username_and_password(username, password) if error_msg: @@ -336,7 +339,10 @@ Message: %(error_msg)s raise cherrypy.HTTPRedirect(from_page or '/') def do_logout(self, from_page='..', **kwargs): - """Logout. May raise redirect, or return True if request handled.""" + """Logout. + + May raise redirect, or return True if request handled. + """ sess = cherrypy.session username = sess.get(self.session_key) sess[self.session_key] = None @@ -346,7 +352,9 @@ Message: %(error_msg)s raise cherrypy.HTTPRedirect(from_page) def do_check(self): - """Assert username. Raise redirect, or return True if request handled. + """Assert username. + + Raise redirect, or return True if request handled. """ sess = cherrypy.session request = cherrypy.serving.request @@ -408,8 +416,7 @@ def session_auth(**kwargs): Any attribute of the SessionAuth class may be overridden via a keyword arg to this function: - - """ + '\n '.join( + """ + '\n' + '\n '.join( '{!s}: {!s}'.format(k, type(getattr(SessionAuth, k)).__name__) for k in dir(SessionAuth) if not k.startswith('__') @@ -490,8 +497,8 @@ def trailing_slash(missing=True, extra=False, status=None, debug=False): def flatten(debug=False): """Wrap response.body in a generator that recursively iterates over body. - This allows cherrypy.response.body to consist of 'nested generators'; - that is, a set of generators that yield generators. + This allows cherrypy.response.body to consist of 'nested + generators'; that is, a set of generators that yield generators. """ def flattener(input): numchunks = 0 @@ -622,13 +629,15 @@ def autovary(ignore=None, debug=False): def convert_params(exception=ValueError, error=400): - """Convert request params based on function annotations, with error handling. + """Convert request params based on function annotations. - exception - Exception class to catch. + This function also processes errors that are subclasses of ``exception``. - status - The HTTP error code to return to the client on failure. + :param BaseException exception: Exception class to catch. + :type exception: BaseException + + :param error: The HTTP status code to return to the client on failure. + :type error: int """ request = cherrypy.serving.request types = request.handler.callable.__annotations__ diff --git a/lib/cherrypy/lib/encoding.py b/lib/cherrypy/lib/encoding.py index c2c478a5..068c88d2 100644 --- a/lib/cherrypy/lib/encoding.py +++ b/lib/cherrypy/lib/encoding.py @@ -261,9 +261,7 @@ class ResponseEncoder: def prepare_iter(value): - """ - Ensure response body is iterable and resolves to False when empty. - """ + """Ensure response body is iterable and resolves to False when empty.""" if isinstance(value, text_or_bytes): # strings get wrapped in a list because iterating over a single # item list is much faster than iterating over every character @@ -360,7 +358,6 @@ def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header * No 'gzip' or 'x-gzip' with a qvalue > 0 is present * The 'identity' value is given with a qvalue > 0. - """ request = cherrypy.serving.request response = cherrypy.serving.response diff --git a/lib/cherrypy/lib/gctools.py b/lib/cherrypy/lib/gctools.py index 26746d78..14d3ba7e 100644 --- a/lib/cherrypy/lib/gctools.py +++ b/lib/cherrypy/lib/gctools.py @@ -14,7 +14,6 @@ from cherrypy.process.plugins import SimplePlugin class ReferrerTree(object): - """An object which gathers all referrers of an object to a given depth.""" peek_length = 40 @@ -132,7 +131,6 @@ def get_context(obj): class GCRoot(object): - """A CherryPy page handler for testing reference leaks.""" classes = [ diff --git a/lib/cherrypy/lib/httputil.py b/lib/cherrypy/lib/httputil.py index ced310a0..62bc1bd0 100644 --- a/lib/cherrypy/lib/httputil.py +++ b/lib/cherrypy/lib/httputil.py @@ -71,10 +71,10 @@ def protocol_from_http(protocol_str): def get_ranges(headervalue, content_length): """Return a list of (start, stop) indices from a Range header, or None. - Each (start, stop) tuple will be composed of two ints, which are suitable - for use in a slicing operation. That is, the header "Range: bytes=3-6", - if applied against a Python string, is requesting resource[3:7]. This - function will return the list [(3, 7)]. + Each (start, stop) tuple will be composed of two ints, which are + suitable for use in a slicing operation. That is, the header "Range: + bytes=3-6", if applied against a Python string, is requesting + resource[3:7]. This function will return the list [(3, 7)]. If this function returns an empty list, you should return HTTP 416. """ @@ -127,7 +127,6 @@ def get_ranges(headervalue, content_length): class HeaderElement(object): - """An element (with parameters) from an HTTP header's element list.""" def __init__(self, value, params=None): @@ -169,14 +168,14 @@ q_separator = re.compile(r'; *q *=') class AcceptElement(HeaderElement): - """An element (with parameters) from an Accept* header's element list. - AcceptElement objects are comparable; the more-preferred object will be - "less than" the less-preferred object. They are also therefore sortable; - if you sort a list of AcceptElement objects, they will be listed in - priority order; the most preferred value will be first. Yes, it should - have been the other way around, but it's too late to fix now. + AcceptElement objects are comparable; the more-preferred object will + be "less than" the less-preferred object. They are also therefore + sortable; if you sort a list of AcceptElement objects, they will be + listed in priority order; the most preferred value will be first. + Yes, it should have been the other way around, but it's too late to + fix now. """ @classmethod @@ -249,8 +248,7 @@ def header_elements(fieldname, fieldvalue): def decode_TEXT(value): - r""" - Decode :rfc:`2047` TEXT + r"""Decode :rfc:`2047` TEXT. >>> decode_TEXT("=?utf-8?q?f=C3=BCr?=") == b'f\xfcr'.decode('latin-1') True @@ -265,9 +263,7 @@ def decode_TEXT(value): def decode_TEXT_maybe(value): - """ - Decode the text but only if '=?' appears in it. - """ + """Decode the text but only if '=?' appears in it.""" return decode_TEXT(value) if '=?' in value else value @@ -388,7 +384,6 @@ def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): class CaseInsensitiveDict(jaraco.collections.KeyTransformingDict): - """A case-insensitive dict subclass. Each key is changed on entry to title case. @@ -417,7 +412,6 @@ else: class HeaderMap(CaseInsensitiveDict): - """A dict subclass for HTTP request and response headers. Each key is changed on entry to str(key).title(). This allows headers @@ -494,7 +488,6 @@ class HeaderMap(CaseInsensitiveDict): class Host(object): - """An internet address. name diff --git a/lib/cherrypy/lib/locking.py b/lib/cherrypy/lib/locking.py index 317fb58c..ea76450a 100644 --- a/lib/cherrypy/lib/locking.py +++ b/lib/cherrypy/lib/locking.py @@ -7,22 +7,22 @@ class NeverExpires(object): class Timer(object): - """ - A simple timer that will indicate when an expiration time has passed. - """ + """A simple timer that will indicate when an expiration time has passed.""" def __init__(self, expiration): 'Create a timer that expires at `expiration` (UTC datetime)' self.expiration = expiration @classmethod def after(cls, elapsed): - """ - Return a timer that will expire after `elapsed` passes. - """ - return cls(datetime.datetime.utcnow() + elapsed) + """Return a timer that will expire after `elapsed` passes.""" + return cls( + datetime.datetime.now(datetime.timezone.utc) + elapsed, + ) def expired(self): - return datetime.datetime.utcnow() >= self.expiration + return datetime.datetime.now( + datetime.timezone.utc, + ) >= self.expiration class LockTimeout(Exception): @@ -30,9 +30,7 @@ class LockTimeout(Exception): class LockChecker(object): - """ - Keep track of the time and detect if a timeout has expired - """ + """Keep track of the time and detect if a timeout has expired.""" def __init__(self, session_id, timeout): self.session_id = session_id if timeout: diff --git a/lib/cherrypy/lib/profiler.py b/lib/cherrypy/lib/profiler.py index fccf2eb8..9cda41d8 100644 --- a/lib/cherrypy/lib/profiler.py +++ b/lib/cherrypy/lib/profiler.py @@ -30,7 +30,6 @@ to get a quick sanity-check on overall CP performance. Use the ``--profile`` flag when running the test suite. Then, use the ``serve()`` function to browse the results in a web browser. If you run this module from the command line, it will call ``serve()`` for you. - """ import io @@ -47,7 +46,9 @@ try: import pstats def new_func_strip_path(func_name): - """Make profiler output more readable by adding `__init__` modules' parents + """Add ``__init__`` modules' parents. + + This makes the profiler output more readable. """ filename, line, name = func_name if filename.endswith('__init__.py'): diff --git a/lib/cherrypy/lib/reprconf.py b/lib/cherrypy/lib/reprconf.py index 76381d7b..53261094 100644 --- a/lib/cherrypy/lib/reprconf.py +++ b/lib/cherrypy/lib/reprconf.py @@ -27,18 +27,17 @@ from cherrypy._cpcompat import text_or_bytes class NamespaceSet(dict): - """A dict of config namespace names and handlers. - Each config entry should begin with a namespace name; the corresponding - namespace handler will be called once for each config entry in that - namespace, and will be passed two arguments: the config key (with the - namespace removed) and the config value. + Each config entry should begin with a namespace name; the + corresponding namespace handler will be called once for each config + entry in that namespace, and will be passed two arguments: the + config key (with the namespace removed) and the config value. Namespace handlers may be any Python callable; they may also be - context managers, in which case their __enter__ - method should return a callable to be used as the handler. - See cherrypy.tools (the Toolbox class) for an example. + context managers, in which case their __enter__ method should return + a callable to be used as the handler. See cherrypy.tools (the + Toolbox class) for an example. """ def __call__(self, config): @@ -48,9 +47,10 @@ class NamespaceSet(dict): A flat dict, where keys use dots to separate namespaces, and values are arbitrary. - The first name in each config key is used to look up the corresponding - namespace handler. For example, a config entry of {'tools.gzip.on': v} - will call the 'tools' namespace handler with the args: ('gzip.on', v) + The first name in each config key is used to look up the + corresponding namespace handler. For example, a config entry of + {'tools.gzip.on': v} will call the 'tools' namespace handler + with the args: ('gzip.on', v) """ # Separate the given config into namespaces ns_confs = {} @@ -103,7 +103,6 @@ class NamespaceSet(dict): class Config(dict): - """A dict-like set of configuration data, with defaults and namespaces. May take a file, filename, or dict. @@ -167,7 +166,7 @@ class Parser(configparser.ConfigParser): self._read(fp, filename) def as_dict(self, raw=False, vars=None): - """Convert an INI file to a dictionary""" + """Convert an INI file to a dictionary.""" # Load INI file into a dict result = {} for section in self.sections(): @@ -188,7 +187,7 @@ class Parser(configparser.ConfigParser): def dict_from_file(self, file): if hasattr(file, 'read'): - self.readfp(file) + self.read_file(file) else: self.read(file) return self.as_dict() diff --git a/lib/cherrypy/lib/sessions.py b/lib/cherrypy/lib/sessions.py index 0f56a4fa..68badd93 100644 --- a/lib/cherrypy/lib/sessions.py +++ b/lib/cherrypy/lib/sessions.py @@ -120,7 +120,6 @@ missing = object() class Session(object): - """A CherryPy dict-like Session object (one per request).""" _id = None @@ -148,9 +147,11 @@ class Session(object): to session data.""" loaded = False + """If True, data has been retrieved from storage. + + This should happen automatically on the first attempt to access + session data. """ - If True, data has been retrieved from storage. This should happen - automatically on the first attempt to access session data.""" clean_thread = None 'Class-level Monitor which calls self.clean_up.' @@ -165,9 +166,10 @@ class Session(object): 'True if the session requested by the client did not exist.' regenerated = False + """True if the application called session.regenerate(). + + This is not set by internal calls to regenerate the session id. """ - True if the application called session.regenerate(). This is not set by - internal calls to regenerate the session id.""" debug = False 'If True, log debug information.' @@ -335,8 +337,9 @@ class Session(object): def pop(self, key, default=missing): """Remove the specified key and return the corresponding value. - If key is not found, default is returned if given, - otherwise KeyError is raised. + + If key is not found, default is returned if given, otherwise + KeyError is raised. """ if not self.loaded: self.load() @@ -351,13 +354,19 @@ class Session(object): return key in self._data def get(self, key, default=None): - """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" + """D.get(k[,d]) -> D[k] if k in D, else d. + + d defaults to None. + """ if not self.loaded: self.load() return self._data.get(key, default) def update(self, d): - """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" + """D.update(E) -> None. + + Update D from E: for k in E: D[k] = E[k]. + """ if not self.loaded: self.load() self._data.update(d) @@ -369,7 +378,10 @@ class Session(object): return self._data.setdefault(key, default) def clear(self): - """D.clear() -> None. Remove all items from D.""" + """D.clear() -> None. + + Remove all items from D. + """ if not self.loaded: self.load() self._data.clear() @@ -492,7 +504,8 @@ class FileSession(Session): """Set up the storage system for file-based sessions. This should only be called once per process; this will be done - automatically when using sessions.init (as the built-in Tool does). + automatically when using sessions.init (as the built-in Tool + does). """ # The 'storage_path' arg is required for file-based sessions. kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) @@ -616,7 +629,8 @@ class MemcachedSession(Session): """Set up the storage system for memcached-based sessions. This should only be called once per process; this will be done - automatically when using sessions.init (as the built-in Tool does). + automatically when using sessions.init (as the built-in Tool + does). """ for k, v in kwargs.items(): setattr(cls, k, v) diff --git a/lib/cherrypy/lib/static.py b/lib/cherrypy/lib/static.py index 66a5a947..a942b1b5 100644 --- a/lib/cherrypy/lib/static.py +++ b/lib/cherrypy/lib/static.py @@ -1,19 +1,18 @@ """Module with helpers for serving static files.""" +import mimetypes import os import platform import re import stat -import mimetypes -import urllib.parse import unicodedata - +import urllib.parse from email.generator import _make_boundary as make_boundary from io import UnsupportedOperation import cherrypy from cherrypy._cpcompat import ntob -from cherrypy.lib import cptools, httputil, file_generator_limited +from cherrypy.lib import cptools, file_generator_limited, httputil def _setup_mimetypes(): @@ -57,15 +56,15 @@ def serve_file(path, content_type=None, disposition=None, name=None, debug=False): """Set status, headers, and body in order to serve the given path. - The Content-Type header will be set to the content_type arg, if provided. - If not provided, the Content-Type will be guessed by the file extension - of the 'path' argument. + The Content-Type header will be set to the content_type arg, if + provided. If not provided, the Content-Type will be guessed by the + file extension of the 'path' argument. - If disposition is not None, the Content-Disposition header will be set - to "; filename=; filename*=utf-8''" - as described in :rfc:`6266#appendix-D`. - If name is None, it will be set to the basename of path. - If disposition is None, no Content-Disposition header will be written. + If disposition is not None, the Content-Disposition header will be + set to "; filename=; filename*=utf-8''" as + described in :rfc:`6266#appendix-D`. If name is None, it will be set + to the basename of path. If disposition is None, no Content- + Disposition header will be written. """ response = cherrypy.serving.response @@ -185,7 +184,10 @@ def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, def _serve_fileobj(fileobj, content_type, content_length, debug=False): - """Internal. Set response.body to the given file object, perhaps ranged.""" + """Set ``response.body`` to the given file object, perhaps ranged. + + Internal helper. + """ response = cherrypy.serving.response # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code diff --git a/lib/cherrypy/process/plugins.py b/lib/cherrypy/process/plugins.py index e96fb1ce..20bf1c66 100644 --- a/lib/cherrypy/process/plugins.py +++ b/lib/cherrypy/process/plugins.py @@ -31,7 +31,6 @@ _module__file__base = os.getcwd() class SimplePlugin(object): - """Plugin base class which auto-subscribes methods for known channels.""" bus = None @@ -59,7 +58,6 @@ class SimplePlugin(object): class SignalHandler(object): - """Register bus channels (and listeners) for system signals. You can modify what signals your application listens for, and what it does @@ -171,8 +169,8 @@ class SignalHandler(object): If the optional 'listener' argument is provided, it will be subscribed as a listener for the given signal's channel. - If the given signal name or number is not available on the current - platform, ValueError is raised. + If the given signal name or number is not available on the + current platform, ValueError is raised. """ if isinstance(signal, text_or_bytes): signum = getattr(_signal, signal, None) @@ -218,11 +216,10 @@ except ImportError: class DropPrivileges(SimplePlugin): - """Drop privileges. uid/gid arguments not available on Windows. Special thanks to `Gavin Baker - `_ + `_. """ def __init__(self, bus, umask=None, uid=None, gid=None): @@ -234,7 +231,10 @@ class DropPrivileges(SimplePlugin): @property def uid(self): - """The uid under which to run. Availability: Unix.""" + """The uid under which to run. + + Availability: Unix. + """ return self._uid @uid.setter @@ -250,7 +250,10 @@ class DropPrivileges(SimplePlugin): @property def gid(self): - """The gid under which to run. Availability: Unix.""" + """The gid under which to run. + + Availability: Unix. + """ return self._gid @gid.setter @@ -332,7 +335,6 @@ class DropPrivileges(SimplePlugin): class Daemonizer(SimplePlugin): - """Daemonize the running script. Use this with a Web Site Process Bus via:: @@ -423,7 +425,6 @@ class Daemonizer(SimplePlugin): class PIDFile(SimplePlugin): - """Maintain a PID file via a WSPBus.""" def __init__(self, bus, pidfile): @@ -453,12 +454,11 @@ class PIDFile(SimplePlugin): class PerpetualTimer(threading.Timer): - """A responsive subclass of threading.Timer whose run() method repeats. Use this timer only when you really need a very interruptible timer; - this checks its 'finished' condition up to 20 times a second, which can - results in pretty high CPU usage + this checks its 'finished' condition up to 20 times a second, which + can results in pretty high CPU usage """ def __init__(self, *args, **kwargs): @@ -483,14 +483,14 @@ class PerpetualTimer(threading.Timer): class BackgroundTask(threading.Thread): - """A subclass of threading.Thread whose run() method repeats. - Use this class for most repeating tasks. It uses time.sleep() to wait - for each interval, which isn't very responsive; that is, even if you call - self.cancel(), you'll have to wait until the sleep() call finishes before - the thread stops. To compensate, it defaults to being daemonic, which means - it won't delay stopping the whole process. + Use this class for most repeating tasks. It uses time.sleep() to + wait for each interval, which isn't very responsive; that is, even + if you call self.cancel(), you'll have to wait until the sleep() + call finishes before the thread stops. To compensate, it defaults to + being daemonic, which means it won't delay stopping the whole + process. """ def __init__(self, interval, function, args=[], kwargs={}, bus=None): @@ -525,7 +525,6 @@ class BackgroundTask(threading.Thread): class Monitor(SimplePlugin): - """WSPBus listener to periodically run a callback in its own thread.""" callback = None @@ -582,7 +581,6 @@ class Monitor(SimplePlugin): class Autoreloader(Monitor): - """Monitor which re-executes the process when files change. This :ref:`plugin` restarts the process (via :func:`os.execv`) @@ -699,20 +697,20 @@ class Autoreloader(Monitor): class ThreadManager(SimplePlugin): - """Manager for HTTP request threads. If you have control over thread creation and destruction, publish to - the 'acquire_thread' and 'release_thread' channels (for each thread). - This will register/unregister the current thread and publish to - 'start_thread' and 'stop_thread' listeners in the bus as needed. + the 'acquire_thread' and 'release_thread' channels (for each + thread). This will register/unregister the current thread and + publish to 'start_thread' and 'stop_thread' listeners in the bus as + needed. If threads are created and destroyed by code you do not control (e.g., Apache), then, at the beginning of every HTTP request, publish to 'acquire_thread' only. You should not publish to - 'release_thread' in this case, since you do not know whether - the thread will be re-used or not. The bus will call - 'stop_thread' listeners for you when it stops. + 'release_thread' in this case, since you do not know whether the + thread will be re-used or not. The bus will call 'stop_thread' + listeners for you when it stops. """ threads = None diff --git a/lib/cherrypy/process/servers.py b/lib/cherrypy/process/servers.py index 717a8de0..fdd4bb68 100644 --- a/lib/cherrypy/process/servers.py +++ b/lib/cherrypy/process/servers.py @@ -132,7 +132,6 @@ class Timeouts: class ServerAdapter(object): - """Adapter for an HTTP server. If you need to start more than one HTTP server (to serve on multiple @@ -188,9 +187,7 @@ class ServerAdapter(object): @property def description(self): - """ - A description about where this server is bound. - """ + """A description about where this server is bound.""" if self.bind_addr is None: on_what = 'unknown interface (dynamic?)' elif isinstance(self.bind_addr, tuple): @@ -292,7 +289,6 @@ class ServerAdapter(object): class FlupCGIServer(object): - """Adapter for a flup.server.cgi.WSGIServer.""" def __init__(self, *args, **kwargs): @@ -316,7 +312,6 @@ class FlupCGIServer(object): class FlupFCGIServer(object): - """Adapter for a flup.server.fcgi.WSGIServer.""" def __init__(self, *args, **kwargs): @@ -362,7 +357,6 @@ class FlupFCGIServer(object): class FlupSCGIServer(object): - """Adapter for a flup.server.scgi.WSGIServer.""" def __init__(self, *args, **kwargs): diff --git a/lib/cherrypy/process/win32.py b/lib/cherrypy/process/win32.py index b7a79b1b..305596ba 100644 --- a/lib/cherrypy/process/win32.py +++ b/lib/cherrypy/process/win32.py @@ -1,4 +1,7 @@ -"""Windows service. Requires pywin32.""" +"""Windows service. + +Requires pywin32. +""" import os import win32api @@ -11,7 +14,6 @@ from cherrypy.process import wspbus, plugins class ConsoleCtrlHandler(plugins.SimplePlugin): - """A WSPBus plugin for handling Win32 console events (like Ctrl-C).""" def __init__(self, bus): @@ -69,10 +71,10 @@ class ConsoleCtrlHandler(plugins.SimplePlugin): class Win32Bus(wspbus.Bus): - """A Web Site Process Bus implementation for Win32. - Instead of time.sleep, this bus blocks using native win32event objects. + Instead of time.sleep, this bus blocks using native win32event + objects. """ def __init__(self): @@ -120,7 +122,6 @@ class Win32Bus(wspbus.Bus): class _ControlCodes(dict): - """Control codes used to "signal" a service via ControlService. User-defined control codes are in the range 128-255. We generally use @@ -152,7 +153,6 @@ def signal_child(service, command): class PyWebService(win32serviceutil.ServiceFramework): - """Python Web Service.""" _svc_name_ = 'Python Web Service' diff --git a/lib/cherrypy/process/wspbus.py b/lib/cherrypy/process/wspbus.py index 1d2789b1..57b08b83 100644 --- a/lib/cherrypy/process/wspbus.py +++ b/lib/cherrypy/process/wspbus.py @@ -57,7 +57,6 @@ the new state.:: | \ | | V V STARTED <-- STARTING - """ import atexit @@ -65,7 +64,7 @@ import atexit try: import ctypes except ImportError: - """Google AppEngine is shipped without ctypes + """Google AppEngine is shipped without ctypes. :seealso: http://stackoverflow.com/a/6523777/70170 """ @@ -165,8 +164,8 @@ class Bus(object): All listeners for a given channel are guaranteed to be called even if others at the same channel fail. Each failure is logged, but execution proceeds on to the next listener. The only way to stop all - processing from inside a listener is to raise SystemExit and stop the - whole server. + processing from inside a listener is to raise SystemExit and stop + the whole server. """ states = states @@ -312,8 +311,9 @@ class Bus(object): def restart(self): """Restart the process (may close connections). - This method does not restart the process from the calling thread; - instead, it stops the bus and asks the main thread to call execv. + This method does not restart the process from the calling + thread; instead, it stops the bus and asks the main thread to + call execv. """ self.execv = True self.exit() @@ -327,10 +327,11 @@ class Bus(object): """Wait for the EXITING state, KeyboardInterrupt or SystemExit. This function is intended to be called only by the main thread. - After waiting for the EXITING state, it also waits for all threads - to terminate, and then calls os.execv if self.execv is True. This - design allows another thread to call bus.restart, yet have the main - thread perform the actual execv call (required on some platforms). + After waiting for the EXITING state, it also waits for all + threads to terminate, and then calls os.execv if self.execv is + True. This design allows another thread to call bus.restart, yet + have the main thread perform the actual execv call (required on + some platforms). """ try: self.wait(states.EXITING, interval=interval, channel='main') @@ -379,13 +380,14 @@ class Bus(object): def _do_execv(self): """Re-execute the current process. - This must be called from the main thread, because certain platforms - (OS X) don't allow execv to be called in a child thread very well. + This must be called from the main thread, because certain + platforms (OS X) don't allow execv to be called in a child + thread very well. """ try: args = self._get_true_argv() except NotImplementedError: - """It's probably win32 or GAE""" + """It's probably win32 or GAE.""" args = [sys.executable] + self._get_interpreter_argv() + sys.argv self.log('Re-spawning %s' % ' '.join(args)) @@ -472,7 +474,7 @@ class Bus(object): c_ind = None if is_module: - """It's containing `-m -m` sequence of arguments""" + """It's containing `-m -m` sequence of arguments.""" if is_command and c_ind < m_ind: """There's `-c -c` before `-m`""" raise RuntimeError( @@ -481,7 +483,7 @@ class Bus(object): # Survive module argument here original_module = sys.argv[0] if not os.access(original_module, os.R_OK): - """There's no such module exist""" + """There's no such module exist.""" raise AttributeError( "{} doesn't seem to be a module " 'accessible by current user'.format(original_module)) @@ -489,12 +491,12 @@ class Bus(object): # ... and substitute it with the original module path: _argv.insert(m_ind, original_module) elif is_command: - """It's containing just `-c -c` sequence of arguments""" + """It's containing just `-c -c` sequence of arguments.""" raise RuntimeError( "Cannot reconstruct command from '-c'. " 'Ref: https://github.com/cherrypy/cherrypy/issues/1545') except AttributeError: - """It looks Py_GetArgcArgv is completely absent in some environments + """It looks Py_GetArgcArgv's completely absent in some environments It is known, that there's no Py_GetArgcArgv in MS Windows and ``ctypes`` module is completely absent in Google AppEngine @@ -512,13 +514,13 @@ class Bus(object): """Prepend current working dir to PATH environment variable if needed. If sys.path[0] is an empty string, the interpreter was likely - invoked with -m and the effective path is about to change on - re-exec. Add the current directory to $PYTHONPATH to ensure - that the new process sees the same path. + invoked with -m and the effective path is about to change on re- + exec. Add the current directory to $PYTHONPATH to ensure that + the new process sees the same path. This issue cannot be addressed in the general case because - Python cannot reliably reconstruct the - original command line (http://bugs.python.org/issue14208). + Python cannot reliably reconstruct the original command line ( + http://bugs.python.org/issue14208). (This idea filched from tornado.autoreload) """ @@ -536,10 +538,10 @@ class Bus(object): """Set the CLOEXEC flag on all open files (except stdin/out/err). If self.max_cloexec_files is an integer (the default), then on - platforms which support it, it represents the max open files setting - for the operating system. This function will be called just before - the process is restarted via os.execv() to prevent open files - from persisting into the new process. + platforms which support it, it represents the max open files + setting for the operating system. This function will be called + just before the process is restarted via os.execv() to prevent + open files from persisting into the new process. Set self.max_cloexec_files to 0 to disable this behavior. """ @@ -578,7 +580,10 @@ class Bus(object): return t def log(self, msg='', level=20, traceback=False): - """Log the given message. Append the last traceback if requested.""" + """Log the given message. + + Append the last traceback if requested. + """ if traceback: msg += '\n' + ''.join(_traceback.format_exception(*sys.exc_info())) self.publish('log', msg, level) diff --git a/lib/cherrypy/scaffold/__init__.py b/lib/cherrypy/scaffold/__init__.py index bcddba2d..1fa26bee 100644 --- a/lib/cherrypy/scaffold/__init__.py +++ b/lib/cherrypy/scaffold/__init__.py @@ -9,7 +9,6 @@ Even before any tweaking, this should serve a few demonstration pages. Change to this directory and run: cherryd -c site.conf - """ import cherrypy diff --git a/lib/munkres.py b/lib/munkres.py index ab982d7e..2f2edbc5 100644 --- a/lib/munkres.py +++ b/lib/munkres.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- - -# Documentation is intended to be processed by Epydoc. - """ Introduction ============ @@ -11,266 +6,10 @@ The Munkres module provides an implementation of the Munkres algorithm (also called the Hungarian algorithm or the Kuhn-Munkres algorithm), useful for solving the Assignment Problem. -Assignment Problem -================== - -Let *C* be an *n*\ x\ *n* matrix representing the costs of each of *n* workers -to perform any of *n* jobs. The assignment problem is to assign jobs to -workers in a way that minimizes the total cost. Since each worker can perform -only one job and each job can be assigned to only one worker the assignments -represent an independent set of the matrix *C*. - -One way to generate the optimal set is to create all permutations of -the indexes necessary to traverse the matrix so that no row and column -are used more than once. For instance, given this matrix (expressed in -Python):: - - matrix = [[5, 9, 1], - [10, 3, 2], - [8, 7, 4]] - -You could use this code to generate the traversal indexes:: - - def permute(a, results): - if len(a) == 1: - results.insert(len(results), a) - - else: - for i in range(0, len(a)): - element = a[i] - a_copy = [a[j] for j in range(0, len(a)) if j != i] - subresults = [] - permute(a_copy, subresults) - for subresult in subresults: - result = [element] + subresult - results.insert(len(results), result) - - results = [] - permute(range(len(matrix)), results) # [0, 1, 2] for a 3x3 matrix - -After the call to permute(), the results matrix would look like this:: - - [[0, 1, 2], - [0, 2, 1], - [1, 0, 2], - [1, 2, 0], - [2, 0, 1], - [2, 1, 0]] - -You could then use that index matrix to loop over the original cost matrix -and calculate the smallest cost of the combinations:: - - n = len(matrix) - minval = sys.maxsize - for row in range(n): - cost = 0 - for col in range(n): - cost += matrix[row][col] - minval = min(cost, minval) - - print minval - -While this approach works fine for small matrices, it does not scale. It -executes in O(*n*!) time: Calculating the permutations for an *n*\ x\ *n* -matrix requires *n*! operations. For a 12x12 matrix, that's 479,001,600 -traversals. Even if you could manage to perform each traversal in just one -millisecond, it would still take more than 133 hours to perform the entire -traversal. A 20x20 matrix would take 2,432,902,008,176,640,000 operations. At -an optimistic millisecond per operation, that's more than 77 million years. - -The Munkres algorithm runs in O(*n*\ ^3) time, rather than O(*n*!). This -package provides an implementation of that algorithm. - -This version is based on -http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html. - -This version was written for Python by Brian Clapper from the (Ada) algorithm -at the above web site. (The ``Algorithm::Munkres`` Perl version, in CPAN, was -clearly adapted from the same web site.) - -Usage -===== - -Construct a Munkres object:: - - from munkres import Munkres - - m = Munkres() - -Then use it to compute the lowest cost assignment from a cost matrix. Here's -a sample program:: - - from munkres import Munkres, print_matrix - - matrix = [[5, 9, 1], - [10, 3, 2], - [8, 7, 4]] - m = Munkres() - indexes = m.compute(matrix) - print_matrix(matrix, msg='Lowest cost through this matrix:') - total = 0 - for row, column in indexes: - value = matrix[row][column] - total += value - print '(%d, %d) -> %d' % (row, column, value) - print 'total cost: %d' % total - -Running that program produces:: - - Lowest cost through this matrix: - [5, 9, 1] - [10, 3, 2] - [8, 7, 4] - (0, 0) -> 5 - (1, 1) -> 3 - (2, 2) -> 4 - total cost=12 - -The instantiated Munkres object can be used multiple times on different -matrices. - -Non-square Cost Matrices -======================== - -The Munkres algorithm assumes that the cost matrix is square. However, it's -possible to use a rectangular matrix if you first pad it with 0 values to make -it square. This module automatically pads rectangular cost matrices to make -them square. - -Notes: - -- The module operates on a *copy* of the caller's matrix, so any padding will - not be seen by the caller. -- The cost matrix must be rectangular or square. An irregular matrix will - *not* work. - -Calculating Profit, Rather than Cost -==================================== - -The cost matrix is just that: A cost matrix. The Munkres algorithm finds -the combination of elements (one from each row and column) that results in -the smallest cost. It's also possible to use the algorithm to maximize -profit. To do that, however, you have to convert your profit matrix to a -cost matrix. The simplest way to do that is to subtract all elements from a -large value. For example:: - - from munkres import Munkres, print_matrix - - matrix = [[5, 9, 1], - [10, 3, 2], - [8, 7, 4]] - cost_matrix = [] - for row in matrix: - cost_row = [] - for col in row: - cost_row += [sys.maxsize - col] - cost_matrix += [cost_row] - - m = Munkres() - indexes = m.compute(cost_matrix) - print_matrix(matrix, msg='Highest profit through this matrix:') - total = 0 - for row, column in indexes: - value = matrix[row][column] - total += value - print '(%d, %d) -> %d' % (row, column, value) - - print 'total profit=%d' % total - -Running that program produces:: - - Highest profit through this matrix: - [5, 9, 1] - [10, 3, 2] - [8, 7, 4] - (0, 1) -> 9 - (1, 0) -> 10 - (2, 2) -> 4 - total profit=23 - -The ``munkres`` module provides a convenience method for creating a cost -matrix from a profit matrix. Since it doesn't know whether the matrix contains -floating point numbers, decimals, or integers, you have to provide the -conversion function; but the convenience method takes care of the actual -creation of the cost matrix:: - - import munkres - - cost_matrix = munkres.make_cost_matrix(matrix, - lambda cost: sys.maxsize - cost) - -So, the above profit-calculation program can be recast as:: - - from munkres import Munkres, print_matrix, make_cost_matrix - - matrix = [[5, 9, 1], - [10, 3, 2], - [8, 7, 4]] - cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost) - m = Munkres() - indexes = m.compute(cost_matrix) - print_matrix(matrix, msg='Lowest cost through this matrix:') - total = 0 - for row, column in indexes: - value = matrix[row][column] - total += value - print '(%d, %d) -> %d' % (row, column, value) - print 'total profit=%d' % total - -References -========== - -1. http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html - -2. Harold W. Kuhn. The Hungarian Method for the assignment problem. - *Naval Research Logistics Quarterly*, 2:83-97, 1955. - -3. Harold W. Kuhn. Variants of the Hungarian method for assignment - problems. *Naval Research Logistics Quarterly*, 3: 253-258, 1956. - -4. Munkres, J. Algorithms for the Assignment and Transportation Problems. - *Journal of the Society of Industrial and Applied Mathematics*, - 5(1):32-38, March, 1957. - -5. http://en.wikipedia.org/wiki/Hungarian_algorithm - -Copyright and License -===================== - -This software is released under a BSD license, adapted from - - -Copyright (c) 2008 Brian M. Clapper -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name "clapper.org" nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. +For complete usage documentation, see: https://software.clapper.org/munkres/ """ -__docformat__ = 'restructuredtext' +__docformat__ = 'markdown' # --------------------------------------------------------------------------- # Imports @@ -278,23 +17,43 @@ __docformat__ = 'restructuredtext' import sys import copy +from typing import Union, NewType, Sequence, Tuple, Optional, Callable # --------------------------------------------------------------------------- # Exports # --------------------------------------------------------------------------- -__all__ = ['Munkres', 'make_cost_matrix'] +__all__ = ['Munkres', 'make_cost_matrix', 'DISALLOWED'] # --------------------------------------------------------------------------- # Globals # --------------------------------------------------------------------------- +AnyNum = NewType('AnyNum', Union[int, float]) +Matrix = NewType('Matrix', Sequence[Sequence[AnyNum]]) + # Info about the module -__version__ = "1.0.6" +__version__ = "1.1.4" __author__ = "Brian Clapper, bmc@clapper.org" -__url__ = "http://software.clapper.org/munkres/" -__copyright__ = "(c) 2008 Brian M. Clapper" -__license__ = "BSD-style license" +__url__ = "https://software.clapper.org/munkres/" +__copyright__ = "(c) 2008-2020 Brian M. Clapper" +__license__ = "Apache Software License" + +# Constants +class DISALLOWED_OBJ(object): + pass +DISALLOWED = DISALLOWED_OBJ() +DISALLOWED_PRINTVAL = "D" + +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + +class UnsolvableMatrix(Exception): + """ + Exception raised for unsolvable matrices + """ + pass # --------------------------------------------------------------------------- # Classes @@ -317,30 +76,18 @@ class Munkres: self.marked = None self.path = None - def make_cost_matrix(profit_matrix, inversion_function): - """ - **DEPRECATED** - - Please use the module function ``make_cost_matrix()``. - """ - import munkres - return munkres.make_cost_matrix(profit_matrix, inversion_function) - - make_cost_matrix = staticmethod(make_cost_matrix) - - def pad_matrix(self, matrix, pad_value=0): + def pad_matrix(self, matrix: Matrix, pad_value: int=0) -> Matrix: """ Pad a possibly non-square matrix to make it square. - :Parameters: - matrix : list of lists - matrix to pad + **Parameters** - pad_value : int - value to use to pad the matrix + - `matrix` (list of lists of numbers): matrix to pad + - `pad_value` (`int`): value to use to pad the matrix - :rtype: list of lists - :return: a new, possibly padded, matrix + **Returns** + + a new, possibly padded, matrix """ max_columns = 0 total_rows = len(matrix) @@ -356,34 +103,35 @@ class Munkres: new_row = row[:] if total_rows > row_len: # Row too short. Pad it. - new_row += [0] * (total_rows - row_len) + new_row += [pad_value] * (total_rows - row_len) new_matrix += [new_row] while len(new_matrix) < total_rows: - new_matrix += [[0] * total_rows] + new_matrix += [[pad_value] * total_rows] return new_matrix - def compute(self, cost_matrix): + def compute(self, cost_matrix: Matrix) -> Sequence[Tuple[int, int]]: """ Compute the indexes for the lowest-cost pairings between rows and - columns in the database. Returns a list of (row, column) tuples + columns in the database. Returns a list of `(row, column)` tuples that can be used to traverse the matrix. - :Parameters: - cost_matrix : list of lists - The cost matrix. If this cost matrix is not square, it - will be padded with zeros, via a call to ``pad_matrix()``. - (This method does *not* modify the caller's matrix. It - operates on a copy of the matrix.) + **WARNING**: This code handles square and rectangular matrices. It + does *not* handle irregular matrices. - **WARNING**: This code handles square and rectangular - matrices. It does *not* handle irregular matrices. + **Parameters** - :rtype: list - :return: A list of ``(row, column)`` tuples that describe the lowest - cost path through the matrix + - `cost_matrix` (list of lists of numbers): The cost matrix. If this + cost matrix is not square, it will be padded with zeros, via a call + to `pad_matrix()`. (This method does *not* modify the caller's + matrix. It operates on a copy of the matrix.) + + **Returns** + + A list of `(row, column)` tuples that describe the lowest cost path + through the matrix """ self.C = self.pad_matrix(cost_matrix) self.n = len(self.C) @@ -422,18 +170,18 @@ class Munkres: return results - def __copy_matrix(self, matrix): + def __copy_matrix(self, matrix: Matrix) -> Matrix: """Return an exact copy of the supplied matrix""" return copy.deepcopy(matrix) - def __make_matrix(self, n, val): + def __make_matrix(self, n: int, val: AnyNum) -> Matrix: """Create an *n*x*n* matrix, populating it with the specific value.""" matrix = [] for i in range(n): matrix += [[val for j in range(n)]] return matrix - def __step1(self): + def __step1(self) -> int: """ For each row of the matrix, find the smallest element and subtract it from every element in its row. Go to Step 2. @@ -441,15 +189,22 @@ class Munkres: C = self.C n = self.n for i in range(n): - minval = min(self.C[i]) + vals = [x for x in self.C[i] if x is not DISALLOWED] + if len(vals) == 0: + # All values in this row are DISALLOWED. This matrix is + # unsolvable. + raise UnsolvableMatrix( + "Row {0} is entirely DISALLOWED.".format(i) + ) + minval = min(vals) # Find the minimum value for this row and subtract that minimum # from every element in the row. for j in range(n): - self.C[i][j] -= minval - + if self.C[i][j] is not DISALLOWED: + self.C[i][j] -= minval return 2 - def __step2(self): + def __step2(self) -> int: """ Find a zero (Z) in the resulting matrix. If there is no starred zero in its row or column, star Z. Repeat for each element in the @@ -464,11 +219,12 @@ class Munkres: self.marked[i][j] = 1 self.col_covered[j] = True self.row_covered[i] = True + break self.__clear_covers() return 3 - def __step3(self): + def __step3(self) -> int: """ Cover each column containing a starred zero. If K columns are covered, the starred zeros describe a complete set of unique @@ -478,7 +234,7 @@ class Munkres: count = 0 for i in range(n): for j in range(n): - if self.marked[i][j] == 1: + if self.marked[i][j] == 1 and not self.col_covered[j]: self.col_covered[j] = True count += 1 @@ -489,7 +245,7 @@ class Munkres: return step - def __step4(self): + def __step4(self) -> int: """ Find a noncovered zero and prime it. If there is no starred zero in the row containing this primed zero, Go to Step 5. Otherwise, @@ -499,11 +255,11 @@ class Munkres: """ step = 0 done = False - row = -1 - col = -1 + row = 0 + col = 0 star_col = -1 while not done: - (row, col) = self.__find_a_zero() + (row, col) = self.__find_a_zero(row, col) if row < 0: done = True step = 6 @@ -522,7 +278,7 @@ class Munkres: return step - def __step5(self): + def __step5(self) -> int: """ Construct a series of alternating primed and starred zeros as follows. Let Z0 represent the uncovered primed zero found in Step 4. @@ -558,7 +314,7 @@ class Munkres: self.__erase_primes() return 3 - def __step6(self): + def __step6(self) -> int: """ Add the value found in Step 4 to every element of each covered row, and subtract it from every element of each uncovered column. @@ -566,34 +322,44 @@ class Munkres: lines. """ minval = self.__find_smallest() + events = 0 # track actual changes to matrix for i in range(self.n): for j in range(self.n): + if self.C[i][j] is DISALLOWED: + continue if self.row_covered[i]: self.C[i][j] += minval + events += 1 if not self.col_covered[j]: self.C[i][j] -= minval + events += 1 + if self.row_covered[i] and not self.col_covered[j]: + events -= 2 # change reversed, no real difference + if (events == 0): + raise UnsolvableMatrix("Matrix cannot be solved!") return 4 - def __find_smallest(self): + def __find_smallest(self) -> AnyNum: """Find the smallest uncovered value in the matrix.""" minval = sys.maxsize for i in range(self.n): for j in range(self.n): if (not self.row_covered[i]) and (not self.col_covered[j]): - if minval > self.C[i][j]: + if self.C[i][j] is not DISALLOWED and minval > self.C[i][j]: minval = self.C[i][j] return minval - def __find_a_zero(self): + + def __find_a_zero(self, i0: int = 0, j0: int = 0) -> Tuple[int, int]: """Find the first uncovered element with value 0""" row = -1 col = -1 - i = 0 + i = i0 n = self.n done = False while not done: - j = 0 + j = j0 while True: if (self.C[i][j] == 0) and \ (not self.row_covered[i]) and \ @@ -601,16 +367,16 @@ class Munkres: row = i col = j done = True - j += 1 - if j >= n: + j = (j + 1) % n + if j == j0: break - i += 1 - if i >= n: + i = (i + 1) % n + if i == i0: done = True return (row, col) - def __find_star_in_row(self, row): + def __find_star_in_row(self, row: Sequence[AnyNum]) -> int: """ Find the first starred element in the specified row. Returns the column index, or -1 if no starred element was found. @@ -623,7 +389,7 @@ class Munkres: return col - def __find_star_in_col(self, col): + def __find_star_in_col(self, col: Sequence[AnyNum]) -> int: """ Find the first starred element in the specified row. Returns the row index, or -1 if no starred element was found. @@ -636,7 +402,7 @@ class Munkres: return row - def __find_prime_in_row(self, row): + def __find_prime_in_row(self, row) -> int: """ Find the first prime element in the specified row. Returns the column index, or -1 if no starred element was found. @@ -649,20 +415,22 @@ class Munkres: return col - def __convert_path(self, path, count): + def __convert_path(self, + path: Sequence[Sequence[int]], + count: int) -> None: for i in range(count+1): if self.marked[path[i][0]][path[i][1]] == 1: self.marked[path[i][0]][path[i][1]] = 0 else: self.marked[path[i][0]][path[i][1]] = 1 - def __clear_covers(self): + def __clear_covers(self) -> None: """Clear all covered matrix cells""" for i in range(self.n): self.row_covered[i] = False self.col_covered[i] = False - def __erase_primes(self): + def __erase_primes(self) -> None: """Erase all prime markings""" for i in range(self.n): for j in range(self.n): @@ -673,51 +441,56 @@ class Munkres: # Functions # --------------------------------------------------------------------------- -def make_cost_matrix(profit_matrix, inversion_function): +def make_cost_matrix( + profit_matrix: Matrix, + inversion_function: Optional[Callable[[AnyNum], AnyNum]] = None + ) -> Matrix: """ - Create a cost matrix from a profit matrix by calling - 'inversion_function' to invert each value. The inversion - function must take one numeric argument (of any type) and return - another numeric argument which is presumed to be the cost inverse - of the original profit. + Create a cost matrix from a profit matrix by calling `inversion_function()` + to invert each value. The inversion function must take one numeric argument + (of any type) and return another numeric argument which is presumed to be + the cost inverse of the original profit value. If the inversion function + is not provided, a given cell's inverted value is calculated as + `max(matrix) - value`. This is a static method. Call it like this: - .. python:: - + from munkres import Munkres cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func) For example: - .. python:: - + from munkres import Munkres cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxsize - x) - :Parameters: - profit_matrix : list of lists - The matrix to convert from a profit to a cost matrix + **Parameters** - inversion_function : function - The function to use to invert each entry in the profit matrix + - `profit_matrix` (list of lists of numbers): The matrix to convert from + profit to cost values. + - `inversion_function` (`function`): The function to use to invert each + entry in the profit matrix. - :rtype: list of lists - :return: The converted matrix + **Returns** + + A new matrix representing the inversion of `profix_matrix`. """ + if not inversion_function: + maximum = max(max(row) for row in profit_matrix) + inversion_function = lambda x: maximum - x + cost_matrix = [] for row in profit_matrix: cost_matrix.append([inversion_function(value) for value in row]) return cost_matrix -def print_matrix(matrix, msg=None): +def print_matrix(matrix: Matrix, msg: Optional[str] = None) -> None: """ - Convenience function: Displays the contents of a matrix of integers. + Convenience function: Displays the contents of a matrix. - :Parameters: - matrix : list of lists - Matrix to print + **Parameters** - msg : str - Optional message to print before displaying the matrix + - `matrix` (list of lists of numbers): The matrix to print + - `msg` (`str`): Optional message to print before displaying the matrix """ import math @@ -728,16 +501,21 @@ def print_matrix(matrix, msg=None): width = 0 for row in matrix: for val in row: - width = max(width, int(math.log10(val)) + 1) + if val is DISALLOWED: + val = DISALLOWED_PRINTVAL + width = max(width, len(str(val))) # Make the format string - format = '%%%dd' % width + format = ('%%%d' % width) # Print the matrix for row in matrix: sep = '[' for val in row: - sys.stdout.write(sep + format % val) + if val is DISALLOWED: + val = DISALLOWED_PRINTVAL + formatted = ((format + 's') % val) + sys.stdout.write(sep + formatted) sep = ', ' sys.stdout.write(']\n') @@ -767,11 +545,51 @@ if __name__ == '__main__': [9, 7, 4]], 18), + # Square variant with floating point value + ([[10.1, 10.2, 8.3], + [9.4, 8.5, 1.6], + [9.7, 7.8, 4.9]], + 19.5), + # Rectangular variant ([[10, 10, 8, 11], [9, 8, 1, 1], [9, 7, 4, 10]], - 15)] + 15), + + # Rectangular variant with floating point value + ([[10.01, 10.02, 8.03, 11.04], + [9.05, 8.06, 1.07, 1.08], + [9.09, 7.1, 4.11, 10.12]], + 15.2), + + # Rectangular with DISALLOWED + ([[4, 5, 6, DISALLOWED], + [1, 9, 12, 11], + [DISALLOWED, 5, 4, DISALLOWED], + [12, 12, 12, 10]], + 20), + + # Rectangular variant with DISALLOWED and floating point value + ([[4.001, 5.002, 6.003, DISALLOWED], + [1.004, 9.005, 12.006, 11.007], + [DISALLOWED, 5.008, 4.009, DISALLOWED], + [12.01, 12.011, 12.012, 10.013]], + 20.028), + + # DISALLOWED to force pairings + ([[1, DISALLOWED, DISALLOWED, DISALLOWED], + [DISALLOWED, 2, DISALLOWED, DISALLOWED], + [DISALLOWED, DISALLOWED, 3, DISALLOWED], + [DISALLOWED, DISALLOWED, DISALLOWED, 4]], + 10), + + # DISALLOWED to force pairings with floating point value + ([[1.1, DISALLOWED, DISALLOWED, DISALLOWED], + [DISALLOWED, 2.2, DISALLOWED, DISALLOWED], + [DISALLOWED, DISALLOWED, 3.3, DISALLOWED], + [DISALLOWED, DISALLOWED, DISALLOWED, 4.4]], + 11.0)] m = Munkres() for cost_matrix, expected_total in matrices: @@ -781,6 +599,6 @@ if __name__ == '__main__': for r, c in indexes: x = cost_matrix[r][c] total_cost += x - print(('(%d, %d) -> %d' % (r, c, x))) - print(('lowest cost=%d' % total_cost)) + print(('(%d, %d) -> %s' % (r, c, x))) + print(('lowest cost=%s' % total_cost)) assert expected_total == total_cost diff --git a/lib/pkg_resources/__init__.py b/lib/pkg_resources/__init__.py index f98516d1..ab6afe95 100644 --- a/lib/pkg_resources/__init__.py +++ b/lib/pkg_resources/__init__.py @@ -12,6 +12,9 @@ The package resource API is designed to work with normal filesystem packages, .egg files, and unpacked .egg files. It can also work in a limited way with .zip files and with custom PEP 302 loaders that support the ``get_data()`` method. + +This module is deprecated. Users are directed to :mod:`importlib.resources`, +:mod:`importlib.metadata` and :pypi:`packaging` instead. """ import sys @@ -34,7 +37,6 @@ import email.parser import errno import tempfile import textwrap -import itertools import inspect import ntpath import posixpath @@ -54,8 +56,10 @@ except NameError: # capture these to bypass sandboxing from os import utime + try: from os import mkdir, rename, unlink + WRITE_SUPPORT = True except ImportError: # no write support, probably under GAE @@ -66,17 +70,26 @@ from os.path import isdir, split try: import importlib.machinery as importlib_machinery + # access attribute to force import under delayed import mechanisms. importlib_machinery.__name__ except ImportError: importlib_machinery = None -from pkg_resources.extern import appdirs +from pkg_resources.extern.jaraco.text import ( + yield_lines, + drop_comment, + join_continuation, +) + +from pkg_resources.extern import platformdirs from pkg_resources.extern import packaging + __import__('pkg_resources.extern.packaging.version') __import__('pkg_resources.extern.packaging.specifiers') __import__('pkg_resources.extern.packaging.requirements') __import__('pkg_resources.extern.packaging.markers') +__import__('pkg_resources.extern.packaging.utils') if sys.version_info < (3, 5): raise RuntimeError("Python 3.5 or later is required") @@ -102,6 +115,17 @@ _namespace_handlers = None _namespace_packages = None +warnings.warn( + "pkg_resources is deprecated as an API. " + "See https://setuptools.pypa.io/en/latest/pkg_resources.html", + DeprecationWarning, + stacklevel=2, +) + + +_PEP440_FALLBACK = re.compile(r"^v?(?P(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I) + + class PEP440Warning(RuntimeWarning): """ Used when there is an issue with a version or specifier not complying with @@ -109,16 +133,7 @@ class PEP440Warning(RuntimeWarning): """ -def parse_version(v): - try: - return packaging.version.Version(v) - except packaging.version.InvalidVersion: - warnings.warn( - f"{v} is an invalid version and will not be supported in " - "a future release", - PkgResourcesDeprecationWarning, - ) - return packaging.version.LegacyVersion(v) +parse_version = packaging.version.Version _state_vars = {} @@ -190,51 +205,87 @@ def get_supported_platform(): __all__ = [ # Basic resource access and distribution/entry point discovery - 'require', 'run_script', 'get_provider', 'get_distribution', - 'load_entry_point', 'get_entry_map', 'get_entry_info', + 'require', + 'run_script', + 'get_provider', + 'get_distribution', + 'load_entry_point', + 'get_entry_map', + 'get_entry_info', 'iter_entry_points', - 'resource_string', 'resource_stream', 'resource_filename', - 'resource_listdir', 'resource_exists', 'resource_isdir', - + 'resource_string', + 'resource_stream', + 'resource_filename', + 'resource_listdir', + 'resource_exists', + 'resource_isdir', # Environmental control - 'declare_namespace', 'working_set', 'add_activation_listener', - 'find_distributions', 'set_extraction_path', 'cleanup_resources', + 'declare_namespace', + 'working_set', + 'add_activation_listener', + 'find_distributions', + 'set_extraction_path', + 'cleanup_resources', 'get_default_cache', - # Primary implementation classes - 'Environment', 'WorkingSet', 'ResourceManager', - 'Distribution', 'Requirement', 'EntryPoint', - + 'Environment', + 'WorkingSet', + 'ResourceManager', + 'Distribution', + 'Requirement', + 'EntryPoint', # Exceptions - 'ResolutionError', 'VersionConflict', 'DistributionNotFound', - 'UnknownExtra', 'ExtractionError', - + 'ResolutionError', + 'VersionConflict', + 'DistributionNotFound', + 'UnknownExtra', + 'ExtractionError', # Warnings 'PEP440Warning', - # Parsing functions and string utilities - 'parse_requirements', 'parse_version', 'safe_name', 'safe_version', - 'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections', - 'safe_extra', 'to_filename', 'invalid_marker', 'evaluate_marker', - + 'parse_requirements', + 'parse_version', + 'safe_name', + 'safe_version', + 'get_platform', + 'compatible_platforms', + 'yield_lines', + 'split_sections', + 'safe_extra', + 'to_filename', + 'invalid_marker', + 'evaluate_marker', # filesystem utilities - 'ensure_directory', 'normalize_path', - + 'ensure_directory', + 'normalize_path', # Distribution "precedence" constants - 'EGG_DIST', 'BINARY_DIST', 'SOURCE_DIST', 'CHECKOUT_DIST', 'DEVELOP_DIST', - + 'EGG_DIST', + 'BINARY_DIST', + 'SOURCE_DIST', + 'CHECKOUT_DIST', + 'DEVELOP_DIST', # "Provider" interfaces, implementations, and registration/lookup APIs - 'IMetadataProvider', 'IResourceProvider', 'FileMetadata', - 'PathMetadata', 'EggMetadata', 'EmptyProvider', 'empty_provider', - 'NullProvider', 'EggProvider', 'DefaultProvider', 'ZipProvider', - 'register_finder', 'register_namespace_handler', 'register_loader_type', - 'fixup_namespace_packages', 'get_importer', - + 'IMetadataProvider', + 'IResourceProvider', + 'FileMetadata', + 'PathMetadata', + 'EggMetadata', + 'EmptyProvider', + 'empty_provider', + 'NullProvider', + 'EggProvider', + 'DefaultProvider', + 'ZipProvider', + 'register_finder', + 'register_namespace_handler', + 'register_loader_type', + 'fixup_namespace_packages', + 'get_importer', # Warnings 'PkgResourcesDeprecationWarning', - # Deprecated/backward compatibility only - 'run_main', 'AvailableDistributions', + 'run_main', + 'AvailableDistributions', ] @@ -293,8 +344,10 @@ class ContextualVersionConflict(VersionConflict): class DistributionNotFound(ResolutionError): """A requested distribution was not found""" - _template = ("The '{self.req}' distribution was not found " - "and is required by {self.requirers_str}") + _template = ( + "The '{self.req}' distribution was not found " + "and is required by {self.requirers_str}" + ) @property def req(self): @@ -388,7 +441,8 @@ def get_build_platform(): version = _macos_vers() machine = os.uname()[4].replace(" ", "_") return "macosx-%d.%d-%s" % ( - int(version[0]), int(version[1]), + int(version[0]), + int(version[1]), _macos_arch(machine), ) except ValueError: @@ -429,15 +483,18 @@ def compatible_platforms(provided, required): if provDarwin: dversion = int(provDarwin.group(1)) macosversion = "%s.%s" % (reqMac.group(1), reqMac.group(2)) - if dversion == 7 and macosversion >= "10.3" or \ - dversion == 8 and macosversion >= "10.4": + if ( + dversion == 7 + and macosversion >= "10.3" + or dversion == 8 + and macosversion >= "10.4" + ): return True # egg isn't macOS or legacy darwin return False # are they the same major version and machine type? - if provMac.group(1) != reqMac.group(1) or \ - provMac.group(3) != reqMac.group(3): + if provMac.group(1) != reqMac.group(1) or provMac.group(3) != reqMac.group(3): return False # is the required OS major update >= the provided one? @@ -499,8 +556,8 @@ class IMetadataProvider: def get_metadata_lines(name): """Yield named metadata resource as list of non-blank non-comment lines - Leading and trailing whitespace is stripped from each line, and lines - with ``#`` as the first non-blank character are omitted.""" + Leading and trailing whitespace is stripped from each line, and lines + with ``#`` as the first non-blank character are omitted.""" def metadata_isdir(name): """Is the named metadata a directory? (like ``os.path.isdir()``)""" @@ -548,6 +605,7 @@ class WorkingSet: self.entries = [] self.entry_keys = {} self.by_key = {} + self.normalized_to_canonical_keys = {} self.callbacks = [] if entries is None: @@ -628,6 +686,14 @@ class WorkingSet: is returned. """ dist = self.by_key.get(req.key) + + if dist is None: + canonical_key = self.normalized_to_canonical_keys.get(req.key) + + if canonical_key is not None: + req.key = canonical_key + dist = self.by_key.get(canonical_key) + if dist is not None and dist not in req: # XXX add more info raise VersionConflict(dist, req) @@ -696,15 +762,22 @@ class WorkingSet: return self.by_key[dist.key] = dist + normalized_name = packaging.utils.canonicalize_name(dist.key) + self.normalized_to_canonical_keys[normalized_name] = dist.key if dist.key not in keys: keys.append(dist.key) if dist.key not in keys2: keys2.append(dist.key) self._added_new(dist) - # FIXME: 'WorkingSet.resolve' is too complex (11) - def resolve(self, requirements, env=None, installer=None, # noqa: C901 - replace_conflicting=False, extras=None): + def resolve( + self, + requirements, + env=None, + installer=None, + replace_conflicting=False, + extras=None, + ): """List all distributions needed to (recursively) meet `requirements` `requirements` must be a sequence of ``Requirement`` objects. `env`, @@ -753,33 +826,9 @@ class WorkingSet: if not req_extras.markers_pass(req, extras): continue - dist = best.get(req.key) - if dist is None: - # Find the best distribution and add it to the map - dist = self.by_key.get(req.key) - if dist is None or (dist not in req and replace_conflicting): - ws = self - if env is None: - if dist is None: - env = Environment(self.entries) - else: - # Use an empty environment and workingset to avoid - # any further conflicts with the conflicting - # distribution - env = Environment([]) - ws = WorkingSet([]) - dist = best[req.key] = env.best_match( - req, ws, installer, - replace_conflicting=replace_conflicting - ) - if dist is None: - requirers = required_by.get(req, None) - raise DistributionNotFound(req, requirers) - to_activate.append(dist) - if dist not in req: - # Oops, the "best" so far conflicts with a dependency - dependent_req = required_by[req] - raise VersionConflict(dist, req).with_context(dependent_req) + dist = self._resolve_dist( + req, best, replace_conflicting, env, installer, required_by, to_activate + ) # push the new requirements onto the stack new_requirements = dist.requires(req.extras)[::-1] @@ -795,8 +844,38 @@ class WorkingSet: # return list of distros to activate return to_activate - def find_plugins( - self, plugin_env, full_env=None, installer=None, fallback=True): + def _resolve_dist( + self, req, best, replace_conflicting, env, installer, required_by, to_activate + ): + dist = best.get(req.key) + if dist is None: + # Find the best distribution and add it to the map + dist = self.by_key.get(req.key) + if dist is None or (dist not in req and replace_conflicting): + ws = self + if env is None: + if dist is None: + env = Environment(self.entries) + else: + # Use an empty environment and workingset to avoid + # any further conflicts with the conflicting + # distribution + env = Environment([]) + ws = WorkingSet([]) + dist = best[req.key] = env.best_match( + req, ws, installer, replace_conflicting=replace_conflicting + ) + if dist is None: + requirers = required_by.get(req, None) + raise DistributionNotFound(req, requirers) + to_activate.append(dist) + if dist not in req: + # Oops, the "best" so far conflicts with a dependency + dependent_req = required_by[req] + raise VersionConflict(dist, req).with_context(dependent_req) + return dist + + def find_plugins(self, plugin_env, full_env=None, installer=None, fallback=True): """Find all activatable distributions in `plugin_env` Example usage:: @@ -849,9 +928,7 @@ class WorkingSet: list(map(shadow_set.add, self)) for project_name in plugin_projects: - for dist in plugin_env[project_name]: - req = [dist.as_requirement()] try: @@ -915,15 +992,19 @@ class WorkingSet: def __getstate__(self): return ( - self.entries[:], self.entry_keys.copy(), self.by_key.copy(), - self.callbacks[:] + self.entries[:], + self.entry_keys.copy(), + self.by_key.copy(), + self.normalized_to_canonical_keys.copy(), + self.callbacks[:], ) - def __setstate__(self, e_k_b_c): - entries, keys, by_key, callbacks = e_k_b_c + def __setstate__(self, e_k_b_n_c): + entries, keys, by_key, normalized_to_canonical_keys, callbacks = e_k_b_n_c self.entries = entries[:] self.entry_keys = keys.copy() self.by_key = by_key.copy() + self.normalized_to_canonical_keys = normalized_to_canonical_keys.copy() self.callbacks = callbacks[:] @@ -951,8 +1032,8 @@ class Environment: """Searchable snapshot of distributions on a search path""" def __init__( - self, search_path=None, platform=get_supported_platform(), - python=PY_MAJOR): + self, search_path=None, platform=get_supported_platform(), python=PY_MAJOR + ): """Snapshot distributions available on a search path Any distributions found on `search_path` are added to the environment. @@ -1019,16 +1100,14 @@ class Environment: return self._distmap.get(distribution_key, []) def add(self, dist): - """Add `dist` if we ``can_add()`` it and it has not already been added - """ + """Add `dist` if we ``can_add()`` it and it has not already been added""" if self.can_add(dist) and dist.has_version(): dists = self._distmap.setdefault(dist.key, []) if dist not in dists: dists.append(dist) dists.sort(key=operator.attrgetter('hashcmp'), reverse=True) - def best_match( - self, req, working_set, installer=None, replace_conflicting=False): + def best_match(self, req, working_set, installer=None, replace_conflicting=False): """Find distribution best matching `req` and usable on `working_set` This calls the ``find(req)`` method of the `working_set` to see if a @@ -1115,6 +1194,7 @@ class ExtractionError(RuntimeError): class ResourceManager: """Manage resource extraction and packages""" + extraction_path = None def __init__(self): @@ -1126,9 +1206,7 @@ class ResourceManager: def resource_isdir(self, package_or_requirement, resource_name): """Is the named resource an existing directory?""" - return get_provider(package_or_requirement).resource_isdir( - resource_name - ) + return get_provider(package_or_requirement).resource_isdir(resource_name) def resource_filename(self, package_or_requirement, resource_name): """Return a true filesystem path for specified resource""" @@ -1150,9 +1228,7 @@ class ResourceManager: def resource_listdir(self, package_or_requirement, resource_name): """List the contents of the named resource directory""" - return get_provider(package_or_requirement).resource_listdir( - resource_name - ) + return get_provider(package_or_requirement).resource_listdir(resource_name) def extraction_error(self): """Give an error message for problems extracting file(s)""" @@ -1160,7 +1236,8 @@ class ResourceManager: old_exc = sys.exc_info()[1] cache_path = self.extraction_path or get_default_cache() - tmpl = textwrap.dedent(""" + tmpl = textwrap.dedent( + """ Can't extract file(s) to egg cache The following error occurred while trying to extract file(s) @@ -1175,7 +1252,8 @@ class ResourceManager: Perhaps your account does not have write access to this directory? You can change the cache directory by setting the PYTHON_EGG_CACHE environment variable to point to an accessible directory. - """).lstrip() + """ + ).lstrip() err = ExtractionError(tmpl.format(**locals())) err.manager = self err.cache_path = cache_path @@ -1274,9 +1352,7 @@ class ResourceManager: ``cleanup_resources()``.) """ if self.cached_files: - raise ValueError( - "Can't change extraction path, files already extracted" - ) + raise ValueError("Can't change extraction path, files already extracted") self.extraction_path = path @@ -1300,9 +1376,8 @@ def get_default_cache(): or a platform-relevant user cache dir for an app named "Python-Eggs". """ - return ( - os.environ.get('PYTHON_EGG_CACHE') - or appdirs.user_cache_dir(appname='Python-Eggs') + return os.environ.get('PYTHON_EGG_CACHE') or platformdirs.user_cache_dir( + appname='Python-Eggs' ) @@ -1326,6 +1401,38 @@ def safe_version(version): return re.sub('[^A-Za-z0-9.]+', '-', version) +def _forgiving_version(version): + """Fallback when ``safe_version`` is not safe enough + >>> parse_version(_forgiving_version('0.23ubuntu1')) + + >>> parse_version(_forgiving_version('0.23-')) + + >>> parse_version(_forgiving_version('0.-_')) + + >>> parse_version(_forgiving_version('42.+?1')) + + >>> parse_version(_forgiving_version('hello world')) + + """ + version = version.replace(' ', '.') + match = _PEP440_FALLBACK.search(version) + if match: + safe = match["safe"] + rest = version[len(safe) :] + else: + safe = "0" + rest = version + local = f"sanitized.{_safe_segment(rest)}".strip(".") + return f"{safe}.dev0+{local}" + + +def _safe_segment(segment): + """Convert an arbitrary string into a safe segment""" + segment = re.sub('[^A-Za-z0-9.]+', '-', segment) + segment = re.sub('-[^A-Za-z0-9]+', '-', segment) + return re.sub(r'\.[^A-Za-z0-9]+', '.', segment).strip(".-") + + def safe_extra(extra): """Convert an arbitrary string to a standard 'extra' name @@ -1439,8 +1546,9 @@ class NullProvider: script = 'scripts/' + script_name if not self.has_metadata(script): raise ResolutionError( - "Script {script!r} not found in metadata at {self.egg_info!r}" - .format(**locals()), + "Script {script!r} not found in metadata at {self.egg_info!r}".format( + **locals() + ), ) script_text = self.get_metadata(script).replace('\r\n', '\n') script_text = script_text.replace('\r', '\n') @@ -1453,8 +1561,12 @@ class NullProvider: exec(code, namespace, namespace) else: from linecache import cache + cache[script_filename] = ( - len(script_text), 0, script_text.split('\n'), script_filename + len(script_text), + 0, + script_text.split('\n'), + script_filename, ) script_code = compile(script_text, script_filename, 'exec') exec(script_code, namespace, namespace) @@ -1534,9 +1646,9 @@ is not allowed. AttributeError: ... """ invalid = ( - os.path.pardir in path.split(posixpath.sep) or - posixpath.isabs(path) or - ntpath.isabs(path) + os.path.pardir in path.split(posixpath.sep) + or posixpath.isabs(path) + or ntpath.isabs(path) ) if not invalid: return @@ -1549,10 +1661,9 @@ is not allowed. # for compatibility, warn; in future # raise ValueError(msg) - warnings.warn( + issue_warning( msg[:-1] + " and will raise exceptions in a future release.", DeprecationWarning, - stacklevel=4, ) def _get(self, path): @@ -1581,7 +1692,7 @@ class EggProvider(NullProvider): """Provider based on a virtual filesystem""" def __init__(self, module): - NullProvider.__init__(self, module) + super().__init__(module) self._setup_prefix() def _setup_prefix(self): @@ -1618,7 +1729,10 @@ class DefaultProvider(EggProvider): @classmethod def _register(cls): - loader_names = 'SourceFileLoader', 'SourcelessFileLoader', + loader_names = ( + 'SourceFileLoader', + 'SourcelessFileLoader', + ) for name in loader_names: loader_cls = getattr(importlib_machinery, name, type(None)) register_loader_type(loader_cls, cls) @@ -1678,6 +1792,7 @@ class MemoizedZipManifests(ZipManifests): """ Memoized zipfile manifests. """ + manifest_mod = collections.namedtuple('manifest_mod', 'manifest mtime') def load(self, path): @@ -1701,7 +1816,7 @@ class ZipProvider(EggProvider): _zip_manifests = MemoizedZipManifests() def __init__(self, module): - EggProvider.__init__(self, module) + super().__init__(module) self.zip_pre = self.loader.archive + os.sep def _zipinfo_name(self, fspath): @@ -1711,20 +1826,16 @@ class ZipProvider(EggProvider): if fspath == self.loader.archive: return '' if fspath.startswith(self.zip_pre): - return fspath[len(self.zip_pre):] - raise AssertionError( - "%s is not a subpath of %s" % (fspath, self.zip_pre) - ) + return fspath[len(self.zip_pre) :] + raise AssertionError("%s is not a subpath of %s" % (fspath, self.zip_pre)) def _parts(self, zip_path): # Convert a zipfile subpath into an egg-relative path part list. # pseudo-fs path fspath = self.zip_pre + zip_path if fspath.startswith(self.egg_root + os.sep): - return fspath[len(self.egg_root) + 1:].split(os.sep) - raise AssertionError( - "%s is not a subpath of %s" % (fspath, self.egg_root) - ) + return fspath[len(self.egg_root) + 1 :].split(os.sep) + raise AssertionError("%s is not a subpath of %s" % (fspath, self.egg_root)) @property def zipinfo(self): @@ -1754,25 +1865,20 @@ class ZipProvider(EggProvider): # FIXME: 'ZipProvider._extract_resource' is too complex (12) def _extract_resource(self, manager, zip_path): # noqa: C901 - if zip_path in self._index(): for name in self._index()[zip_path]: - last = self._extract_resource( - manager, os.path.join(zip_path, name) - ) + last = self._extract_resource(manager, os.path.join(zip_path, name)) # return the extracted directory name return os.path.dirname(last) timestamp, size = self._get_date_and_size(self.zipinfo[zip_path]) if not WRITE_SUPPORT: - raise IOError('"os.rename" and "os.unlink" are not supported ' - 'on this platform') - try: - - real_path = manager.get_cache_path( - self.egg_name, self._parts(zip_path) + raise OSError( + '"os.rename" and "os.unlink" are not supported ' 'on this platform' ) + try: + real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path)) if self._is_current(real_path, zip_path): return real_path @@ -1895,7 +2001,7 @@ class FileMetadata(EmptyProvider): if name != 'PKG-INFO': raise KeyError("No metadata except PKG-INFO is available") - with io.open(self.path, encoding='utf-8', errors="replace") as f: + with open(self.path, encoding='utf-8', errors="replace") as f: metadata = f.read() self._warn_on_replacement(metadata) return metadata @@ -1989,8 +2095,7 @@ def find_eggs_in_zip(importer, path_item, only=False): if _is_egg_path(subitem): subpath = os.path.join(path_item, subitem) dists = find_eggs_in_zip(zipimport.zipimporter(subpath), subpath) - for dist in dists: - yield dist + yield from dists elif subitem.lower().endswith(('.dist-info', '.egg-info')): subpath = os.path.join(path_item, subitem) submeta = EggMetadata(zipimport.zipimporter(subpath)) @@ -2008,93 +2113,42 @@ def find_nothing(importer, path_item, only=False): register_finder(object, find_nothing) -def _by_version_descending(names): - """ - Given a list of filenames, return them in descending order - by version number. - - >>> names = 'bar', 'foo', 'Python-2.7.10.egg', 'Python-2.7.2.egg' - >>> _by_version_descending(names) - ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'bar', 'foo'] - >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.egg' - >>> _by_version_descending(names) - ['Setuptools-1.2.3.egg', 'Setuptools-1.2.3b1.egg'] - >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.post1.egg' - >>> _by_version_descending(names) - ['Setuptools-1.2.3.post1.egg', 'Setuptools-1.2.3b1.egg'] - """ - def try_parse(name): - """ - Attempt to parse as a version or return a null version. - """ - try: - return packaging.version.Version(name) - except Exception: - return packaging.version.Version('0') - - def _by_version(name): - """ - Parse each component of the filename - """ - name, ext = os.path.splitext(name) - parts = itertools.chain(name.split('-'), [ext]) - return [try_parse(part) for part in parts] - - return sorted(names, key=_by_version, reverse=True) - - def find_on_path(importer, path_item, only=False): """Yield distributions accessible on a sys.path directory""" path_item = _normalize_cached(path_item) if _is_unpacked_egg(path_item): yield Distribution.from_filename( - path_item, metadata=PathMetadata( - path_item, os.path.join(path_item, 'EGG-INFO') - ) + path_item, + metadata=PathMetadata(path_item, os.path.join(path_item, 'EGG-INFO')), ) return - entries = ( - os.path.join(path_item, child) - for child in safe_listdir(path_item) - ) - - # for performance, before sorting by version, - # screen entries for only those that will yield - # distributions - filtered = ( - entry - for entry in entries - if dist_factory(path_item, entry, only) - ) + entries = (os.path.join(path_item, child) for child in safe_listdir(path_item)) # scan for .egg and .egg-info in directory - path_item_entries = _by_version_descending(filtered) - for entry in path_item_entries: + for entry in sorted(entries): fullpath = os.path.join(path_item, entry) factory = dist_factory(path_item, entry, only) - for dist in factory(fullpath): - yield dist + yield from factory(fullpath) def dist_factory(path_item, entry, only): """Return a dist_factory for the given entry.""" lower = entry.lower() is_egg_info = lower.endswith('.egg-info') - is_dist_info = ( - lower.endswith('.dist-info') and - os.path.isdir(os.path.join(path_item, entry)) + is_dist_info = lower.endswith('.dist-info') and os.path.isdir( + os.path.join(path_item, entry) ) is_meta = is_egg_info or is_dist_info return ( distributions_from_metadata - if is_meta else - find_distributions - if not only and _is_egg_path(entry) else - resolve_egg_link - if not only and lower.endswith('.egg-link') else - NoDists() + if is_meta + else find_distributions + if not only and _is_egg_path(entry) + else resolve_egg_link + if not only and lower.endswith('.egg-link') + else NoDists() ) @@ -2106,6 +2160,7 @@ class NoDists: >>> list(NoDists()('anything')) [] """ + def __bool__(self): return False @@ -2140,7 +2195,10 @@ def distributions_from_metadata(path): metadata = FileMetadata(path) entry = os.path.basename(path) yield Distribution.from_location( - root, entry, metadata, precedence=DEVELOP_DIST, + root, + entry, + metadata, + precedence=DEVELOP_DIST, ) @@ -2162,17 +2220,16 @@ def resolve_egg_link(path): """ referenced_paths = non_empty_lines(path) resolved_paths = ( - os.path.join(os.path.dirname(path), ref) - for ref in referenced_paths + os.path.join(os.path.dirname(path), ref) for ref in referenced_paths ) dist_groups = map(find_distributions, resolved_paths) return next(dist_groups, ()) -register_finder(pkgutil.ImpImporter, find_on_path) +if hasattr(pkgutil, 'ImpImporter'): + register_finder(pkgutil.ImpImporter, find_on_path) -if hasattr(importlib_machinery, 'FileFinder'): - register_finder(importlib_machinery.FileFinder, find_on_path) +register_finder(importlib_machinery.FileFinder, find_on_path) _declare_state('dict', _namespace_handlers={}) _declare_state('dict', _namespace_packages={}) @@ -2205,12 +2262,14 @@ def _handle_ns(packageName, path_item): # use find_spec (PEP 451) and fall-back to find_module (PEP 302) try: - loader = importer.find_spec(packageName).loader + spec = importer.find_spec(packageName) except AttributeError: # capture warnings due to #1111 with warnings.catch_warnings(): warnings.simplefilter("ignore") loader = importer.find_module(packageName) + else: + loader = spec.loader if spec else None if loader is None: return None @@ -2268,6 +2327,15 @@ def _rebuild_mod_path(orig_path, package_name, module): def declare_namespace(packageName): """Declare that package 'packageName' is a namespace package""" + msg = ( + f"Deprecated call to `pkg_resources.declare_namespace({packageName!r})`.\n" + "Implementing implicit namespace packages (as specified in PEP 420) " + "is preferred to `pkg_resources.declare_namespace`. " + "See https://setuptools.pypa.io/en/latest/references/" + "keywords.html#keyword-namespace-packages" + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + _imp.acquire_lock() try: if packageName in _namespace_packages: @@ -2324,11 +2392,11 @@ def file_ns_handler(importer, path_item, packageName, module): return subpath -register_namespace_handler(pkgutil.ImpImporter, file_ns_handler) -register_namespace_handler(zipimport.zipimporter, file_ns_handler) +if hasattr(pkgutil, 'ImpImporter'): + register_namespace_handler(pkgutil.ImpImporter, file_ns_handler) -if hasattr(importlib_machinery, 'FileFinder'): - register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler) +register_namespace_handler(zipimport.zipimporter, file_ns_handler) +register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler) def null_ns_handler(importer, path_item, packageName, module): @@ -2340,8 +2408,7 @@ register_namespace_handler(object, null_ns_handler) def normalize_path(filename): """Normalize a file/dir name for comparison purposes""" - return os.path.normcase(os.path.realpath(os.path.normpath( - _cygwin_patch(filename)))) + return os.path.normcase(os.path.realpath(os.path.normpath(_cygwin_patch(filename)))) def _cygwin_patch(filename): # pragma: nocover @@ -2372,9 +2439,9 @@ def _is_egg_path(path): def _is_zip_egg(path): return ( - path.lower().endswith('.egg') and - os.path.isfile(path) and - zipfile.is_zipfile(path) + path.lower().endswith('.egg') + and os.path.isfile(path) + and zipfile.is_zipfile(path) ) @@ -2382,9 +2449,8 @@ def _is_unpacked_egg(path): """ Determine if given path appears to be an unpacked egg. """ - return ( - path.lower().endswith('.egg') and - os.path.isfile(os.path.join(path, 'EGG-INFO', 'PKG-INFO')) + return path.lower().endswith('.egg') and os.path.isfile( + os.path.join(path, 'EGG-INFO', 'PKG-INFO') ) @@ -2396,21 +2462,6 @@ def _set_parent_ns(packageName): setattr(sys.modules[parent], name, sys.modules[packageName]) -def _nonblank(str): - return str and not str.startswith('#') - - -@functools.singledispatch -def yield_lines(iterable): - """Yield valid lines of a string or iterable""" - return itertools.chain.from_iterable(map(yield_lines, iterable)) - - -@yield_lines.register(str) -def _(text): - return filter(_nonblank, map(str.strip, text.splitlines())) - - MODULE = re.compile(r"\w+(\.\w+)*$").match EGG_NAME = re.compile( r""" @@ -2563,8 +2614,10 @@ def _version_from_file(lines): Given an iterable of lines from a Metadata file, return the value of the Version field, if present, or None otherwise. """ + def is_version_line(line): return line.lower().startswith('version:') + version_lines = filter(is_version_line, lines) line = next(iter(version_lines), '') _, _, value = line.partition(':') @@ -2573,12 +2626,19 @@ def _version_from_file(lines): class Distribution: """Wrap an actual or potential sys.path entry w/metadata""" + PKG_INFO = 'PKG-INFO' def __init__( - self, location=None, metadata=None, project_name=None, - version=None, py_version=PY_MAJOR, platform=None, - precedence=EGG_DIST): + self, + location=None, + metadata=None, + project_name=None, + version=None, + py_version=PY_MAJOR, + platform=None, + precedence=EGG_DIST, + ): self.project_name = safe_name(project_name or 'Unknown') if version is not None: self._version = safe_version(version) @@ -2601,8 +2661,13 @@ class Distribution: 'name', 'ver', 'pyver', 'plat' ) return cls( - location, metadata, project_name=project_name, version=version, - py_version=py_version, platform=platform, **kw + location, + metadata, + project_name=project_name, + version=version, + py_version=py_version, + platform=platform, + **kw, )._reload_version() def _reload_version(self): @@ -2611,7 +2676,7 @@ class Distribution: @property def hashcmp(self): return ( - self.parsed_version, + self._forgiving_parsed_version, self.precedence, self.key, self.location, @@ -2658,35 +2723,42 @@ class Distribution: @property def parsed_version(self): if not hasattr(self, "_parsed_version"): - self._parsed_version = parse_version(self.version) + try: + self._parsed_version = parse_version(self.version) + except packaging.version.InvalidVersion as ex: + info = f"(package: {self.project_name})" + if hasattr(ex, "add_note"): + ex.add_note(info) # PEP 678 + raise + raise packaging.version.InvalidVersion(f"{str(ex)} {info}") from None return self._parsed_version - def _warn_legacy_version(self): - LV = packaging.version.LegacyVersion - is_legacy = isinstance(self._parsed_version, LV) - if not is_legacy: - return + @property + def _forgiving_parsed_version(self): + try: + return self.parsed_version + except packaging.version.InvalidVersion as ex: + self._parsed_version = parse_version(_forgiving_version(self.version)) - # While an empty version is technically a legacy version and - # is not a valid PEP 440 version, it's also unlikely to - # actually come from someone and instead it is more likely that - # it comes from setuptools attempting to parse a filename and - # including it in the list. So for that we'll gate this warning - # on if the version is anything at all or not. - if not self.version: - return + notes = "\n".join(getattr(ex, "__notes__", [])) # PEP 678 + msg = f"""!!\n\n + ************************************************************************* + {str(ex)}\n{notes} - tmpl = textwrap.dedent(""" - '{project_name} ({version})' is being parsed as a legacy, - non PEP 440, - version. You may find odd behavior and sort order. - In particular it will be sorted as less than 0.0. It - is recommended to migrate to PEP 440 compatible - versions. - """).strip().replace('\n', ' ') + This is a long overdue deprecation. + For the time being, `pkg_resources` will use `{self._parsed_version}` + as a replacement to avoid breaking existing environments, + but no future compatibility is guaranteed. - warnings.warn(tmpl.format(**vars(self)), PEP440Warning) + If you maintain package {self.project_name} you should implement + the relevant changes to adequate the project to PEP 440 immediately. + ************************************************************************* + \n\n!! + """ + warnings.warn(msg, DeprecationWarning) + + return self._parsed_version @property def version(self): @@ -2696,9 +2768,9 @@ class Distribution: version = self._get_version() if version is None: path = self._get_metadata_path_for_display(self.PKG_INFO) - msg = ( - "Missing 'Version:' header and/or {} file at path: {}" - ).format(self.PKG_INFO, path) + msg = ("Missing 'Version:' header and/or {} file at path: {}").format( + self.PKG_INFO, path + ) raise ValueError(msg, self) from e return version @@ -2727,8 +2799,7 @@ class Distribution: reqs = dm.pop(extra) new_extra, _, marker = extra.partition(':') fails_marker = marker and ( - invalid_marker(marker) - or not evaluate_marker(marker) + invalid_marker(marker) or not evaluate_marker(marker) ) if fails_marker: reqs = [] @@ -2777,8 +2848,7 @@ class Distribution: def _get_metadata(self, name): if self.has_metadata(name): - for line in self.get_metadata_lines(name): - yield line + yield from self.get_metadata_lines(name) def _get_version(self): lines = self._get_metadata(self.PKG_INFO) @@ -2800,8 +2870,9 @@ class Distribution: def egg_name(self): """Return what this distribution's standard .egg filename should be""" filename = "%s-%s-py%s" % ( - to_filename(self.project_name), to_filename(self.version), - self.py_version or PY_MAJOR + to_filename(self.project_name), + to_filename(self.version), + self.py_version or PY_MAJOR, ) if self.platform: @@ -2831,17 +2902,13 @@ class Distribution: def __dir__(self): return list( set(super(Distribution, self).__dir__()) - | set( - attr for attr in self._provider.__dir__() - if not attr.startswith('_') - ) + | set(attr for attr in self._provider.__dir__() if not attr.startswith('_')) ) @classmethod def from_filename(cls, filename, metadata=None, **kw): return cls.from_location( - _normalize_cached(filename), os.path.basename(filename), metadata, - **kw + _normalize_cached(filename), os.path.basename(filename), metadata, **kw ) def as_requirement(self): @@ -2953,14 +3020,18 @@ class Distribution: nsp = dict.fromkeys(self._get_metadata('namespace_packages.txt')) loc = normalize_path(self.location) for modname in self._get_metadata('top_level.txt'): - if (modname not in sys.modules or modname in nsp - or modname in _namespace_packages): + if ( + modname not in sys.modules + or modname in nsp + or modname in _namespace_packages + ): continue if modname in ('pkg_resources', 'setuptools', 'site'): continue fn = getattr(sys.modules[modname], '__file__', None) - if fn and (normalize_path(fn).startswith(loc) or - fn.startswith(self.location)): + if fn and ( + normalize_path(fn).startswith(loc) or fn.startswith(self.location) + ): continue issue_warning( "Module %s was already imported from %s, but %s is being added" @@ -2973,6 +3044,9 @@ class Distribution: except ValueError: issue_warning("Unbuilt egg for " + repr(self)) return False + except SystemError: + # TODO: remove this except clause when python/cpython#103632 is fixed. + return False return True def clone(self, **kw): @@ -3012,6 +3086,7 @@ class DistInfoDistribution(Distribution): Wrap an actual or potential sys.path entry w/metadata, .dist-info style. """ + PKG_INFO = 'METADATA' EQEQ = re.compile(r"([\(,])\s*(\d.*?)\s*([,\)])") @@ -3078,25 +3153,12 @@ def issue_warning(*args, **kw): def parse_requirements(strs): - """Yield ``Requirement`` objects for each specification in `strs` + """ + Yield ``Requirement`` objects for each specification in `strs`. `strs` must be a string, or a (possibly-nested) iterable thereof. """ - # create a steppable iterator, so we can handle \-continuations - lines = iter(yield_lines(strs)) - - for line in lines: - # Drop comments -- a hash without a space may be in a URL. - if ' #' in line: - line = line[:line.find(' #')] - # If there is a line continuation, drop it, and append the next line. - if line.endswith('\\'): - line = line[:-2].strip() - try: - line += next(lines) - except StopIteration: - return - yield Requirement(line) + return map(Requirement, join_continuation(map(drop_comment, yield_lines(strs)))) class RequirementParseError(packaging.requirements.InvalidRequirement): @@ -3110,8 +3172,7 @@ class Requirement(packaging.requirements.Requirement): self.unsafe_name = self.name project_name = safe_name(self.name) self.project_name, self.key = project_name, project_name.lower() - self.specs = [ - (spec.operator, spec.version) for spec in self.specifier] + self.specs = [(spec.operator, spec.version) for spec in self.specifier] self.extras = tuple(map(safe_extra, self.extras)) self.hashCmp = ( self.key, @@ -3123,10 +3184,7 @@ class Requirement(packaging.requirements.Requirement): self.__hash = hash(self.hashCmp) def __eq__(self, other): - return ( - isinstance(other, Requirement) and - self.hashCmp == other.hashCmp - ) + return isinstance(other, Requirement) and self.hashCmp == other.hashCmp def __ne__(self, other): return not self == other @@ -3151,7 +3209,7 @@ class Requirement(packaging.requirements.Requirement): @staticmethod def parse(s): - req, = parse_requirements(s) + (req,) = parse_requirements(s) return req @@ -3182,7 +3240,7 @@ def ensure_directory(path): def _bypass_ensure_directory(path): """Sandbox-bypassing version of ensure_directory()""" if not WRITE_SUPPORT: - raise IOError('"os.mkdir" not supported on this platform.') + raise OSError('"os.mkdir" not supported on this platform.') dirname, filename = split(path) if dirname and filename and not isdir(dirname): _bypass_ensure_directory(dirname) @@ -3289,10 +3347,7 @@ def _initialize_master_working_set(): # ensure that all distributions added to the working set in the future # (e.g. by calling ``require()``) will get activated as well, # with higher priority (replace=True). - tuple( - dist.activate(replace=False) - for dist in working_set - ) + tuple(dist.activate(replace=False) for dist in working_set) add_activation_listener( lambda dist: dist.activate(replace=True), existing=False, diff --git a/lib/pkg_resources/_vendor/appdirs.py b/lib/pkg_resources/_vendor/appdirs.py deleted file mode 100644 index ae67001a..00000000 --- a/lib/pkg_resources/_vendor/appdirs.py +++ /dev/null @@ -1,608 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2005-2010 ActiveState Software Inc. -# Copyright (c) 2013 Eddy Petrișor - -"""Utilities for determining application-specific dirs. - -See for details and usage. -""" -# Dev Notes: -# - MSDN on where to store app data files: -# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 -# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html -# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html - -__version_info__ = (1, 4, 3) -__version__ = '.'.join(map(str, __version_info__)) - - -import sys -import os - -PY3 = sys.version_info[0] == 3 - -if PY3: - unicode = str - -if sys.platform.startswith('java'): - import platform - os_name = platform.java_ver()[3][0] - if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. - system = 'win32' - elif os_name.startswith('Mac'): # "Mac OS X", etc. - system = 'darwin' - else: # "Linux", "SunOS", "FreeBSD", etc. - # Setting this to "linux2" is not ideal, but only Windows or Mac - # are actually checked for and the rest of the module expects - # *sys.platform* style strings. - system = 'linux2' -else: - system = sys.platform - - - -def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - - for a discussion of issues. - - Typical user data directories are: - Mac OS X: ~/Library/Application Support/ - Unix: ~/.local/share/ # or in $XDG_DATA_HOME, if defined - Win XP (not roaming): C:\Documents and Settings\\Application Data\\ - Win XP (roaming): C:\Documents and Settings\\Local Settings\Application Data\\ - Win 7 (not roaming): C:\Users\\AppData\Local\\ - Win 7 (roaming): C:\Users\\AppData\Roaming\\ - - For Unix, we follow the XDG spec and support $XDG_DATA_HOME. - That means, by default "~/.local/share/". - """ - if system == "win32": - if appauthor is None: - appauthor = appname - const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" - path = os.path.normpath(_get_win_folder(const)) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('~/Library/Application Support/') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): - r"""Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of data dirs should be - returned. By default, the first item from XDG_DATA_DIRS is - returned, or '/usr/local/share/', - if XDG_DATA_DIRS is not set - - Typical site data directories are: - Mac OS X: /Library/Application Support/ - Unix: /usr/local/share/ or /usr/share/ - Win XP: C:\Documents and Settings\All Users\Application Data\\ - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - Win 7: C:\ProgramData\\ # Hidden, but writeable on Win 7. - - For Unix, this is using the $XDG_DATA_DIRS[0] default. - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('/Library/Application Support') - if appname: - path = os.path.join(path, appname) - else: - # XDG default for $XDG_DATA_DIRS - # only first, if multipath is False - path = os.getenv('XDG_DATA_DIRS', - os.pathsep.join(['/usr/local/share', '/usr/share'])) - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - if appname and version: - path = os.path.join(path, version) - return path - - -def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific config dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - - for a discussion of issues. - - Typical user config directories are: - Mac OS X: same as user_data_dir - Unix: ~/.config/ # or in $XDG_CONFIG_HOME, if defined - Win *: same as user_data_dir - - For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. - That means, by default "~/.config/". - """ - if system in ["win32", "darwin"]: - path = user_data_dir(appname, appauthor, None, roaming) - else: - path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): - r"""Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of config dirs should be - returned. By default, the first item from XDG_CONFIG_DIRS is - returned, or '/etc/xdg/', if XDG_CONFIG_DIRS is not set - - Typical site config directories are: - Mac OS X: same as site_data_dir - Unix: /etc/xdg/ or $XDG_CONFIG_DIRS[i]/ for each value in - $XDG_CONFIG_DIRS - Win *: same as site_data_dir - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - - For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system in ["win32", "darwin"]: - path = site_data_dir(appname, appauthor) - if appname and version: - path = os.path.join(path, version) - else: - # XDG default for $XDG_CONFIG_DIRS - # only first, if multipath is False - path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg') - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - -def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific cache dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Cache" to the base app data dir for Windows. See - discussion below. - - Typical user cache directories are: - Mac OS X: ~/Library/Caches/ - Unix: ~/.cache/ (XDG default) - Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Cache - Vista: C:\Users\\AppData\Local\\\Cache - - On Windows the only suggestion in the MSDN docs is that local settings go in - the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming - app data dir (the default returned by `user_data_dir` above). Apps typically - put cache data somewhere *under* the given dir here. Some examples: - ...\Mozilla\Firefox\Profiles\\Cache - ...\Acme\SuperApp\Cache\1.0 - OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. - This can be disabled with the `opinion=False` option. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - if opinion: - path = os.path.join(path, "Cache") - elif system == 'darwin': - path = os.path.expanduser('~/Library/Caches') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def user_state_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific state dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - - for a discussion of issues. - - Typical user state directories are: - Mac OS X: same as user_data_dir - Unix: ~/.local/state/ # or in $XDG_STATE_HOME, if defined - Win *: same as user_data_dir - - For Unix, we follow this Debian proposal - to extend the XDG spec and support $XDG_STATE_HOME. - - That means, by default "~/.local/state/". - """ - if system in ["win32", "darwin"]: - path = user_data_dir(appname, appauthor, None, roaming) - else: - path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific log dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Logs" to the base app data dir for Windows, and "log" to the - base cache dir for Unix. See discussion below. - - Typical user log directories are: - Mac OS X: ~/Library/Logs/ - Unix: ~/.cache//log # or under $XDG_CACHE_HOME if defined - Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Logs - Vista: C:\Users\\AppData\Local\\\Logs - - On Windows the only suggestion in the MSDN docs is that local settings - go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in - examples of what some windows apps use for a logs dir.) - - OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` - value for Windows and appends "log" to the user cache dir for Unix. - This can be disabled with the `opinion=False` option. - """ - if system == "darwin": - path = os.path.join( - os.path.expanduser('~/Library/Logs'), - appname) - elif system == "win32": - path = user_data_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "Logs") - else: - path = user_cache_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "log") - if appname and version: - path = os.path.join(path, version) - return path - - -class AppDirs(object): - """Convenience wrapper for getting application dirs.""" - def __init__(self, appname=None, appauthor=None, version=None, - roaming=False, multipath=False): - self.appname = appname - self.appauthor = appauthor - self.version = version - self.roaming = roaming - self.multipath = multipath - - @property - def user_data_dir(self): - return user_data_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_data_dir(self): - return site_data_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_config_dir(self): - return user_config_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_config_dir(self): - return site_config_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_cache_dir(self): - return user_cache_dir(self.appname, self.appauthor, - version=self.version) - - @property - def user_state_dir(self): - return user_state_dir(self.appname, self.appauthor, - version=self.version) - - @property - def user_log_dir(self): - return user_log_dir(self.appname, self.appauthor, - version=self.version) - - -#---- internal support stuff - -def _get_win_folder_from_registry(csidl_name): - """This is a fallback technique at best. I'm not sure if using the - registry for this guarantees us the correct answer for all CSIDL_* - names. - """ - if PY3: - import winreg as _winreg - else: - import _winreg - - shell_folder_name = { - "CSIDL_APPDATA": "AppData", - "CSIDL_COMMON_APPDATA": "Common AppData", - "CSIDL_LOCAL_APPDATA": "Local AppData", - }[csidl_name] - - key = _winreg.OpenKey( - _winreg.HKEY_CURRENT_USER, - r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" - ) - dir, type = _winreg.QueryValueEx(key, shell_folder_name) - return dir - - -def _get_win_folder_with_pywin32(csidl_name): - from win32com.shell import shellcon, shell - dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) - # Try to make this a unicode path because SHGetFolderPath does - # not return unicode strings when there is unicode data in the - # path. - try: - dir = unicode(dir) - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - try: - import win32api - dir = win32api.GetShortPathName(dir) - except ImportError: - pass - except UnicodeError: - pass - return dir - - -def _get_win_folder_with_ctypes(csidl_name): - import ctypes - - csidl_const = { - "CSIDL_APPDATA": 26, - "CSIDL_COMMON_APPDATA": 35, - "CSIDL_LOCAL_APPDATA": 28, - }[csidl_name] - - buf = ctypes.create_unicode_buffer(1024) - ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in buf: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf2 = ctypes.create_unicode_buffer(1024) - if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): - buf = buf2 - - return buf.value - -def _get_win_folder_with_jna(csidl_name): - import array - from com.sun import jna - from com.sun.jna.platform import win32 - - buf_size = win32.WinDef.MAX_PATH * 2 - buf = array.zeros('c', buf_size) - shell = win32.Shell32.INSTANCE - shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf = array.zeros('c', buf_size) - kernel = win32.Kernel32.INSTANCE - if kernel.GetShortPathName(dir, buf, buf_size): - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - return dir - -if system == "win32": - try: - import win32com.shell - _get_win_folder = _get_win_folder_with_pywin32 - except ImportError: - try: - from ctypes import windll - _get_win_folder = _get_win_folder_with_ctypes - except ImportError: - try: - import com.sun.jna - _get_win_folder = _get_win_folder_with_jna - except ImportError: - _get_win_folder = _get_win_folder_from_registry - - -#---- self test code - -if __name__ == "__main__": - appname = "MyApp" - appauthor = "MyCompany" - - props = ("user_data_dir", - "user_config_dir", - "user_cache_dir", - "user_state_dir", - "user_log_dir", - "site_data_dir", - "site_config_dir") - - print("-- app dirs %s --" % __version__) - - print("-- app dirs (with optional 'version')") - dirs = AppDirs(appname, appauthor, version="1.0") - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'version')") - dirs = AppDirs(appname, appauthor) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'appauthor')") - dirs = AppDirs(appname) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (with disabled 'appauthor')") - dirs = AppDirs(appname, appauthor=False) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) diff --git a/lib/pkg_resources/_vendor/importlib_resources-5.10.2.dist-info/top_level.txt b/lib/pkg_resources/_vendor/importlib_resources-5.10.2.dist-info/top_level.txt new file mode 100644 index 00000000..58ad1bd3 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources-5.10.2.dist-info/top_level.txt @@ -0,0 +1 @@ +importlib_resources diff --git a/lib/pkg_resources/_vendor/importlib_resources/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/__init__.py new file mode 100644 index 00000000..34e3a995 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/__init__.py @@ -0,0 +1,36 @@ +"""Read resources contained within a package.""" + +from ._common import ( + as_file, + files, + Package, +) + +from ._legacy import ( + contents, + open_binary, + read_binary, + open_text, + read_text, + is_resource, + path, + Resource, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Resource', + 'ResourceReader', + 'as_file', + 'contents', + 'files', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/lib/pkg_resources/_vendor/importlib_resources/_adapters.py b/lib/pkg_resources/_vendor/importlib_resources/_adapters.py new file mode 100644 index 00000000..ea363d86 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/_adapters.py @@ -0,0 +1,170 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError( + "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode) + ) + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/lib/pkg_resources/_vendor/importlib_resources/_common.py b/lib/pkg_resources/_vendor/importlib_resources/_common.py new file mode 100644 index 00000000..3c6de1cf --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/_common.py @@ -0,0 +1,207 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib +import inspect +import warnings +import itertools + +from typing import Union, Optional, cast +from .abc import ResourceReader, Traversable + +from ._compat import wrap_spec + +Package = Union[types.ModuleType, str] +Anchor = Package + + +def package_to_anchor(func): + """ + Replace 'package' parameter as 'anchor' and warn about the change. + + Other errors should fall through. + + >>> files('a', 'b') + Traceback (most recent call last): + TypeError: files() takes from 0 to 1 positional arguments but 2 were given + """ + undefined = object() + + @functools.wraps(func) + def wrapper(anchor=undefined, package=undefined): + if package is not undefined: + if anchor is not undefined: + return func(anchor, package) + warnings.warn( + "First parameter to files is renamed to 'anchor'", + DeprecationWarning, + stacklevel=2, + ) + return func(package) + elif anchor is undefined: + return func() + return func(anchor) + + return wrapper + + +@package_to_anchor +def files(anchor: Optional[Anchor] = None) -> Traversable: + """ + Get a Traversable resource for an anchor. + """ + return from_package(resolve(anchor)) + + +def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]: + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore + if reader is None: + return None + return reader(spec.name) # type: ignore + + +@functools.singledispatch +def resolve(cand: Optional[Anchor]) -> types.ModuleType: + return cast(types.ModuleType, cand) + + +@resolve.register +def _(cand: str) -> types.ModuleType: + return importlib.import_module(cand) + + +@resolve.register +def _(cand: None) -> types.ModuleType: + return resolve(_infer_caller().f_globals['__name__']) + + +def _infer_caller(): + """ + Walk the stack and find the frame of the first caller not in this module. + """ + + def is_this_file(frame_info): + return frame_info.filename == __file__ + + def is_wrapper(frame_info): + return frame_info.function == 'wrapper' + + not_this_file = itertools.filterfalse(is_this_file, inspect.stack()) + # also exclude 'wrapper' due to singledispatch in the call stack + callers = itertools.filterfalse(is_wrapper, not_this_file) + return next(callers).frame + + +def from_package(package: types.ModuleType): + """ + Return a Traversable object for the given package. + + """ + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile( + reader, + suffix='', + # gh-93353: Keep a reference to call os.remove() in late Python + # finalization. + *, + _os_remove=os.remove, +): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + _os_remove(raw_path) + except FileNotFoundError: + pass + + +def _temp_file(path): + return _tempfile(path.read_bytes, suffix=path.name) + + +def _is_present_dir(path: Traversable) -> bool: + """ + Some Traversables implement ``is_dir()`` to raise an + exception (i.e. ``FileNotFoundError``) when the + directory doesn't exist. This function wraps that call + to always return a boolean and only return True + if there's a dir and it exists. + """ + with contextlib.suppress(FileNotFoundError): + return path.is_dir() + return False + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _temp_dir(path) if _is_present_dir(path) else _temp_file(path) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path + + +@contextlib.contextmanager +def _temp_path(dir: tempfile.TemporaryDirectory): + """ + Wrap tempfile.TemporyDirectory to return a pathlib object. + """ + with dir as result: + yield pathlib.Path(result) + + +@contextlib.contextmanager +def _temp_dir(path): + """ + Given a traversable dir, recursively replicate the whole tree + to the file system in a context manager. + """ + assert path.is_dir() + with _temp_path(tempfile.TemporaryDirectory()) as temp_dir: + yield _write_contents(temp_dir, path) + + +def _write_contents(target, source): + child = target.joinpath(source.name) + if source.is_dir(): + child.mkdir() + for item in source.iterdir(): + _write_contents(child, item) + else: + child.write_bytes(source.read_bytes()) + return child diff --git a/lib/pkg_resources/_vendor/importlib_resources/_compat.py b/lib/pkg_resources/_vendor/importlib_resources/_compat.py new file mode 100644 index 00000000..8b5b1d28 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/_compat.py @@ -0,0 +1,108 @@ +# flake8: noqa + +import abc +import os +import sys +import pathlib +from contextlib import suppress +from typing import Union + + +if sys.version_info >= (3, 10): + from zipfile import Path as ZipPath # type: ignore +else: + from ..zipp import Path as ZipPath # type: ignore + + +try: + from typing import runtime_checkable # type: ignore +except ImportError: + + def runtime_checkable(cls): # type: ignore + return cls + + +try: + from typing import Protocol # type: ignore +except ImportError: + Protocol = abc.ABC # type: ignore + + +class TraversableResourcesLoader: + """ + Adapt loaders to provide TraversableResources and other + compatibility. + + Used primarily for Python 3.9 and earlier where the native + loaders do not yet implement TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + @property + def path(self): + return self.spec.origin + + def get_resource_reader(self, name): + from . import readers, _adapters + + def _zip_reader(spec): + with suppress(AttributeError): + return readers.ZipReader(spec.loader, spec.name) + + def _namespace_reader(spec): + with suppress(AttributeError, ValueError): + return readers.NamespaceReader(spec.submodule_search_locations) + + def _available_reader(spec): + with suppress(AttributeError): + return spec.loader.get_resource_reader(spec.name) + + def _native_reader(spec): + reader = _available_reader(spec) + return reader if hasattr(reader, 'files') else None + + def _file_reader(spec): + try: + path = pathlib.Path(self.path) + except TypeError: + return None + if path.exists(): + return readers.FileReader(self) + + return ( + # native reader if it supplies 'files' + _native_reader(self.spec) + or + # local ZipReader if a zip module + _zip_reader(self.spec) + or + # local NamespaceReader if a namespace module + _namespace_reader(self.spec) + or + # local FileReader + _file_reader(self.spec) + # fallback - adapt the spec ResourceReader to TraversableReader + or _adapters.CompatibilityFiles(self.spec) + ) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + + Supersedes _adapters.wrap_spec to use TraversableResourcesLoader + from above for older Python compatibility (<3.10). + """ + from . import _adapters + + return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) + + +if sys.version_info >= (3, 9): + StrPath = Union[str, os.PathLike[str]] +else: + # PathLike is only subscriptable at runtime in 3.9+ + StrPath = Union[str, "os.PathLike[str]"] diff --git a/lib/pkg_resources/_vendor/importlib_resources/_itertools.py b/lib/pkg_resources/_vendor/importlib_resources/_itertools.py new file mode 100644 index 00000000..cce05582 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/_itertools.py @@ -0,0 +1,35 @@ +from itertools import filterfalse + +from typing import ( + Callable, + Iterable, + Iterator, + Optional, + Set, + TypeVar, + Union, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + + +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None +) -> Iterator[_T]: + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen: Set[Union[_T, _U]] = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/lib/pkg_resources/_vendor/importlib_resources/_legacy.py b/lib/pkg_resources/_vendor/importlib_resources/_legacy.py new file mode 100644 index 00000000..b1ea8105 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/_legacy.py @@ -0,0 +1,120 @@ +import functools +import os +import pathlib +import types +import warnings + +from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any + +from . import _common + +Package = Union[types.ModuleType, str] +Resource = str + + +def deprecated(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + warnings.warn( + f"{func.__name__} is deprecated. Use files() instead. " + "Refer to https://importlib-resources.readthedocs.io" + "/en/latest/using.html#migrating-from-legacy for migration advice.", + DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + +def normalize_path(path: Any) -> str: + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError(f'{path!r} must be only a file name') + return file_name + + +@deprecated +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open('rb') + + +@deprecated +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + return (_common.files(package) / normalize_path(resource)).read_bytes() + + +@deprecated +def open_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open( + 'r', encoding=encoding, errors=errors + ) + + +@deprecated +def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@deprecated +def contents(package: Package) -> Iterable[str]: + """Return an iterable of entries in `package`. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + return [path.name for path in _common.files(package).iterdir()] + + +@deprecated +def is_resource(package: Package, name: str) -> bool: + """True if `name` is a resource inside `package`. + + Directories are *not* resources. + """ + resource = normalize_path(name) + return any( + traversable.name == resource and traversable.is_file() + for traversable in _common.files(package).iterdir() + ) + + +@deprecated +def path( + package: Package, + resource: Resource, +) -> ContextManager[pathlib.Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + return _common.as_file(_common.files(package) / normalize_path(resource)) diff --git a/lib/pkg_resources/_vendor/importlib_resources/abc.py b/lib/pkg_resources/_vendor/importlib_resources/abc.py new file mode 100644 index 00000000..23b6aeaf --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/abc.py @@ -0,0 +1,170 @@ +import abc +import io +import itertools +import pathlib +from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional + +from ._compat import runtime_checkable, Protocol, StrPath + + +__all__ = ["ResourceReader", "Traversable", "TraversableResources"] + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +class TraversalError(Exception): + pass + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + Any exceptions that occur when accessing the backing resource + may propagate unaltered. + """ + + @abc.abstractmethod + def iterdir(self) -> Iterator["Traversable"]: + """ + Yield Traversable objects in self + """ + + def read_bytes(self) -> bytes: + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding: Optional[str] = None) -> str: + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + def joinpath(self, *descendants: StrPath) -> "Traversable": + """ + Return Traversable resolved with any descendants applied. + + Each descendant should be a path segment relative to self + and each may contain multiple levels separated by + ``posixpath.sep`` (``/``). + """ + if not descendants: + return self + names = itertools.chain.from_iterable( + path.parts for path in map(pathlib.PurePosixPath, descendants) + ) + target = next(names) + matches = ( + traversable for traversable in self.iterdir() if traversable.name == target + ) + try: + match = next(matches) + except StopIteration: + raise TraversalError( + "Target not found during traversal.", target, list(names) + ) + return match.joinpath(*names) + + def __truediv__(self, child: StrPath) -> "Traversable": + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self) -> "Traversable": + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource: StrPath) -> io.BufferedReader: + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource: Any) -> NoReturn: + raise FileNotFoundError(resource) + + def is_resource(self, path: StrPath) -> bool: + return self.files().joinpath(path).is_file() + + def contents(self) -> Iterator[str]: + return (item.name for item in self.files().iterdir()) diff --git a/lib/pkg_resources/_vendor/importlib_resources/py.typed b/lib/pkg_resources/_vendor/importlib_resources/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/readers.py b/lib/pkg_resources/_vendor/importlib_resources/readers.py new file mode 100644 index 00000000..ab34db74 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/readers.py @@ -0,0 +1,120 @@ +import collections +import pathlib +import operator + +from . import abc + +from ._itertools import unique_everseen +from ._compat import ZipPath + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + _, _, name = module.rpartition('.') + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + # workaround for `zipfile.Path.is_file` returning true + # for non-existent paths. + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return ZipPath(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + files = (file for path in self._paths for file in path.iterdir()) + return unique_everseen(files, key=operator.attrgetter('name')) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, *descendants): + try: + return super().joinpath(*descendants) + except abc.TraversalError: + # One of the paths did not resolve (a directory does not exist). + # Just return something that will not exist. + return self._paths[0].joinpath(*descendants) + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*list(namespace_path)) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path diff --git a/lib/pkg_resources/_vendor/importlib_resources/simple.py b/lib/pkg_resources/_vendor/importlib_resources/simple.py new file mode 100644 index 00000000..7770c922 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/simple.py @@ -0,0 +1,106 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @property + @abc.abstractmethod + def package(self) -> str: + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self) -> List['SimpleReader']: + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self) -> List[str]: + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource: str) -> BinaryIO: + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader: SimpleReader): + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent: ResourceContainer, name: str): + self.parent = parent + self.name = name # type: ignore + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(*args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/_compat.py b/lib/pkg_resources/_vendor/importlib_resources/tests/_compat.py new file mode 100644 index 00000000..e7bf06dd --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/_compat.py @@ -0,0 +1,32 @@ +import os + + +try: + from test.support import import_helper # type: ignore +except ImportError: + # Python 3.9 and earlier + class import_helper: # type: ignore + from test.support import ( + modules_setup, + modules_cleanup, + DirsOnSysPath, + CleanImport, + ) + + +try: + from test.support import os_helper # type: ignore +except ImportError: + # Python 3.9 compat + class os_helper: # type:ignore + from test.support import temp_dir + + +try: + # Python 3.10 + from test.support.os_helper import unlink +except ImportError: + from test.support import unlink as _unlink + + def unlink(target): + return _unlink(os.fspath(target)) diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/_path.py b/lib/pkg_resources/_vendor/importlib_resources/tests/_path.py new file mode 100644 index 00000000..c630e4d3 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/_path.py @@ -0,0 +1,50 @@ +import pathlib +import functools + + +#### +# from jaraco.path 3.4 + + +def build(spec, prefix=pathlib.Path()): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... } + ... } + >>> tmpdir = getfixture('tmpdir') + >>> build(spec, tmpdir) + """ + for name, contents in spec.items(): + create(contents, pathlib.Path(prefix) / name) + + +@functools.singledispatch +def create(content, path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content) + + +# end from jaraco.path +#### diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data01/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/data01/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data01/subdirectory/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/data01/subdirectory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data02/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data02/one/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/one/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data02/one/resource1.txt b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/one/resource1.txt new file mode 100644 index 00000000..61a813e4 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/one/resource1.txt @@ -0,0 +1 @@ +one resource diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data02/two/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/two/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/data02/two/resource2.txt b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/two/resource2.txt new file mode 100644 index 00000000..a80ce46e --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/data02/two/resource2.txt @@ -0,0 +1 @@ +two resource diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_compatibilty_files.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_compatibilty_files.py new file mode 100644 index 00000000..d92c7c56 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_compatibilty_files.py @@ -0,0 +1,102 @@ +import io +import unittest + +import importlib_resources as resources + +from importlib_resources._adapters import ( + CompatibilityFiles, + wrap_spec, +) + +from . import util + + +class CompatibilityFilesTests(unittest.TestCase): + @property + def package(self): + bytes_data = io.BytesIO(b'Hello, world!') + return util.create_package( + file=bytes_data, + path='some_path', + contents=('a', 'b', 'c'), + ) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_iter(self): + self.assertEqual( + sorted(path.name for path in self.files.iterdir()), + ['a', 'b', 'c'], + ) + + def test_child_path_iter(self): + self.assertEqual(list((self.files / 'a').iterdir()), []) + + def test_orphan_path_iter(self): + self.assertEqual(list((self.files / 'a' / 'a').iterdir()), []) + self.assertEqual(list((self.files / 'a' / 'a' / 'a').iterdir()), []) + + def test_spec_path_is(self): + self.assertFalse(self.files.is_file()) + self.assertFalse(self.files.is_dir()) + + def test_child_path_is(self): + self.assertTrue((self.files / 'a').is_file()) + self.assertFalse((self.files / 'a').is_dir()) + + def test_orphan_path_is(self): + self.assertFalse((self.files / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a').is_dir()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_dir()) + + def test_spec_path_name(self): + self.assertEqual(self.files.name, 'testingpackage') + + def test_child_path_name(self): + self.assertEqual((self.files / 'a').name, 'a') + + def test_orphan_path_name(self): + self.assertEqual((self.files / 'a' / 'b').name, 'b') + self.assertEqual((self.files / 'a' / 'b' / 'c').name, 'c') + + def test_spec_path_open(self): + self.assertEqual(self.files.read_bytes(), b'Hello, world!') + self.assertEqual(self.files.read_text(), 'Hello, world!') + + def test_child_path_open(self): + self.assertEqual((self.files / 'a').read_bytes(), b'Hello, world!') + self.assertEqual((self.files / 'a').read_text(), 'Hello, world!') + + def test_orphan_path_open(self): + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b').read_bytes() + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b' / 'c').read_bytes() + + def test_open_invalid_mode(self): + with self.assertRaises(ValueError): + self.files.open('0') + + def test_orphan_path_invalid(self): + with self.assertRaises(ValueError): + CompatibilityFiles.OrphanPath() + + def test_wrap_spec(self): + spec = wrap_spec(self.package) + self.assertIsInstance(spec.loader.get_resource_reader(None), CompatibilityFiles) + + +class CompatibilityFilesNoReaderTests(unittest.TestCase): + @property + def package(self): + return util.create_package_from_loader(None) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_joinpath(self): + self.assertIsInstance(self.files / 'a', CompatibilityFiles.OrphanPath) diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_contents.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_contents.py new file mode 100644 index 00000000..525568e8 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_contents.py @@ -0,0 +1,43 @@ +import unittest +import importlib_resources as resources + +from . import data01 +from . import util + + +class ContentsTests: + expected = { + '__init__.py', + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } + + def test_contents(self): + contents = {path.name for path in resources.files(self.data).iterdir()} + assert self.expected <= contents + + +class ContentsDiskTests(ContentsTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): + pass + + +class ContentsNamespaceTests(ContentsTests, unittest.TestCase): + expected = { + # no __init__ because of namespace design + # no subdirectory as incidental difference in fixture + 'binary.file', + 'utf-16.file', + 'utf-8.file', + } + + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_files.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_files.py new file mode 100644 index 00000000..d258fb5f --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_files.py @@ -0,0 +1,112 @@ +import typing +import textwrap +import unittest +import warnings +import importlib +import contextlib + +import importlib_resources as resources +from ..abc import Traversable +from . import data01 +from . import util +from . import _path +from ._compat import os_helper, import_helper + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') + assert actual == 'Hello, UTF-8 world!\n' + + @unittest.skipUnless( + hasattr(typing, 'runtime_checkable'), + "Only suitable when typing supports runtime_checkable", + ) + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + def test_old_parameter(self): + """ + Files used to take a 'package' parameter. Make sure anyone + passing by name is still supported. + """ + with suppress_known_deprecation(): + resources.files(package=self.data) + + +class OpenDiskTests(FilesTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceTests(FilesTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +class SiteDir: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + self.site_dir = self.fixtures.enter_context(os_helper.temp_dir()) + self.fixtures.enter_context(import_helper.DirsOnSysPath(self.site_dir)) + self.fixtures.enter_context(import_helper.CleanImport()) + + +class ModulesFilesTests(SiteDir, unittest.TestCase): + def test_module_resources(self): + """ + A module can have resources found adjacent to the module. + """ + spec = { + 'mod.py': '', + 'res.txt': 'resources are the best', + } + _path.build(spec, self.site_dir) + import mod + + actual = resources.files(mod).joinpath('res.txt').read_text() + assert actual == spec['res.txt'] + + +class ImplicitContextFilesTests(SiteDir, unittest.TestCase): + def test_implicit_files(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + spec = { + 'somepkg': { + '__init__.py': textwrap.dedent( + """ + import importlib_resources as res + val = res.files().joinpath('res.txt').read_text() + """ + ), + 'res.txt': 'resources are the best', + }, + } + _path.build(spec, self.site_dir) + assert importlib.import_module('somepkg').val == 'resources are the best' + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_open.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_open.py new file mode 100644 index 00000000..87b42c3d --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_open.py @@ -0,0 +1,81 @@ +import unittest + +import importlib_resources as resources +from . import data01 +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open('rb'): + pass + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open(): + pass + + +class OpenTests: + def test_open_binary(self): + target = resources.files(self.data) / 'binary.file' + with target.open('rb') as fp: + result = fp.read() + self.assertEqual(result, b'\x00\x01\x02\x03') + + def test_open_text_default_encoding(self): + target = resources.files(self.data) / 'utf-8.file' + with target.open() as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_open_text_given_encoding(self): + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-16', errors='strict') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_open_text_with_errors(self): + # Raises UnicodeError without the 'errors' argument. + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-8', errors='strict') as fp: + self.assertRaises(UnicodeError, fp.read) + with target.open(encoding='utf-8', errors='ignore') as fp: + result = fp.read() + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + def test_open_binary_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + self.assertRaises(FileNotFoundError, target.open, 'rb') + + def test_open_text_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + self.assertRaises(FileNotFoundError, target.open) + + +class OpenDiskTests(OpenTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenDiskNamespaceTests(OpenTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_path.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_path.py new file mode 100644 index 00000000..4f4d3943 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_path.py @@ -0,0 +1,64 @@ +import io +import unittest + +import importlib_resources as resources +from . import data01 +from . import util + + +class CommonTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + with resources.as_file(resources.files(package).joinpath(path)): + pass + + +class PathTests: + def test_reading(self): + # Path should be readable. + # Test also implicitly verifies the returned object is a pathlib.Path + # instance. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) + # pathlib.Path.read_text() was introduced in Python 3.5. + with path.open('r', encoding='utf-8') as file: + text = file.read() + self.assertEqual('Hello, UTF-8 world!\n', text) + + +class PathDiskTests(PathTests, unittest.TestCase): + data = data01 + + def test_natural_path(self): + """ + Guarantee the internal implementation detail that + file-system-backed resources do not get the tempdir + treatment. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + assert 'data' in str(path) + + +class PathMemoryTests(PathTests, unittest.TestCase): + def setUp(self): + file = io.BytesIO(b'Hello, UTF-8 world!\n') + self.addCleanup(file.close) + self.data = util.create_package( + file=file, path=FileNotFoundError("package exists only in memory") + ) + self.data.__spec__.origin = None + self.data.__spec__.has_location = False + + +class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase): + def test_remove_in_context_manager(self): + # It is not an error if the file that was temporarily stashed on the + # file system is removed inside the `with` stanza. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + path.unlink() + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_read.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_read.py new file mode 100644 index 00000000..41dd6db5 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_read.py @@ -0,0 +1,76 @@ +import unittest +import importlib_resources as resources + +from . import data01 +from . import util +from importlib import import_module + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_bytes() + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_text() + + +class ReadTests: + def test_read_bytes(self): + result = resources.files(self.data).joinpath('binary.file').read_bytes() + self.assertEqual(result, b'\0\1\2\3') + + def test_read_text_default_encoding(self): + result = resources.files(self.data).joinpath('utf-8.file').read_text() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_read_text_given_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-16.file') + .read_text(encoding='utf-16') + ) + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_read_text_with_errors(self): + # Raises UnicodeError without the 'errors' argument. + target = resources.files(self.data) / 'utf-16.file' + self.assertRaises(UnicodeError, target.read_text, encoding='utf-8') + result = target.read_text(encoding='utf-8', errors='ignore') + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + +class ReadDiskTests(ReadTests, unittest.TestCase): + data = data01 + + +class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + def test_read_submodule_resource(self): + submodule = import_module('ziptestdata.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, b'\0\1\2\3') + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('ziptestdata.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, b'\0\1\2\3') + + +class ReadNamespaceTests(ReadTests, unittest.TestCase): + def setUp(self): + from . import namespacedata01 + + self.data = namespacedata01 + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_reader.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_reader.py new file mode 100644 index 00000000..1c8ebeeb --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_reader.py @@ -0,0 +1,133 @@ +import os.path +import sys +import pathlib +import unittest + +from importlib import import_module +from importlib_resources.readers import MultiplexedPath, NamespaceReader + + +class MultiplexedPathTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + path = pathlib.Path(__file__).parent / 'namespacedata01' + cls.folder = str(path) + + def test_init_no_paths(self): + with self.assertRaises(FileNotFoundError): + MultiplexedPath() + + def test_init_file(self): + with self.assertRaises(NotADirectoryError): + MultiplexedPath(os.path.join(self.folder, 'binary.file')) + + def test_iterdir(self): + contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} + try: + contents.remove('__pycache__') + except (KeyError, ValueError): + pass + self.assertEqual(contents, {'binary.file', 'utf-16.file', 'utf-8.file'}) + + def test_iterdir_duplicate(self): + data01 = os.path.abspath(os.path.join(__file__, '..', 'data01')) + contents = { + path.name for path in MultiplexedPath(self.folder, data01).iterdir() + } + for remove in ('__pycache__', '__init__.pyc'): + try: + contents.remove(remove) + except (KeyError, ValueError): + pass + self.assertEqual( + contents, + {'__init__.py', 'binary.file', 'subdirectory', 'utf-16.file', 'utf-8.file'}, + ) + + def test_is_dir(self): + self.assertEqual(MultiplexedPath(self.folder).is_dir(), True) + + def test_is_file(self): + self.assertEqual(MultiplexedPath(self.folder).is_file(), False) + + def test_open_file(self): + path = MultiplexedPath(self.folder) + with self.assertRaises(FileNotFoundError): + path.read_bytes() + with self.assertRaises(FileNotFoundError): + path.read_text() + with self.assertRaises(FileNotFoundError): + path.open() + + def test_join_path(self): + prefix = os.path.abspath(os.path.join(__file__, '..')) + data01 = os.path.join(prefix, 'data01') + path = MultiplexedPath(self.folder, data01) + self.assertEqual( + str(path.joinpath('binary.file'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'binary.file'), + ) + self.assertEqual( + str(path.joinpath('subdirectory'))[len(prefix) + 1 :], + os.path.join('data01', 'subdirectory'), + ) + self.assertEqual( + str(path.joinpath('imaginary'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'imaginary'), + ) + self.assertEqual(path.joinpath(), path) + + def test_join_path_compound(self): + path = MultiplexedPath(self.folder) + assert not path.joinpath('imaginary/foo.py').exists() + + def test_repr(self): + self.assertEqual( + repr(MultiplexedPath(self.folder)), + f"MultiplexedPath('{self.folder}')", + ) + + def test_name(self): + self.assertEqual( + MultiplexedPath(self.folder).name, + os.path.basename(self.folder), + ) + + +class NamespaceReaderTest(unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + def test_init_error(self): + with self.assertRaises(ValueError): + NamespaceReader(['path1', 'path2']) + + def test_resource_path(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + + root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01')) + self.assertEqual( + reader.resource_path('binary.file'), os.path.join(root, 'binary.file') + ) + self.assertEqual( + reader.resource_path('imaginary'), os.path.join(root, 'imaginary') + ) + + def test_files(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01')) + self.assertIsInstance(reader.files(), MultiplexedPath) + self.assertEqual(repr(reader.files()), f"MultiplexedPath('{root}')") + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/test_resource.py b/lib/pkg_resources/_vendor/importlib_resources/tests/test_resource.py new file mode 100644 index 00000000..82390271 --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/test_resource.py @@ -0,0 +1,260 @@ +import sys +import unittest +import importlib_resources as resources +import uuid +import pathlib + +from . import data01 +from . import zipdata01, zipdata02 +from . import util +from importlib import import_module +from ._compat import import_helper, unlink + + +class ResourceTests: + # Subclasses are expected to set the `data` attribute. + + def test_is_file_exists(self): + target = resources.files(self.data) / 'binary.file' + self.assertTrue(target.is_file()) + + def test_is_file_missing(self): + target = resources.files(self.data) / 'not-a-file' + self.assertFalse(target.is_file()) + + def test_is_dir(self): + target = resources.files(self.data) / 'subdirectory' + self.assertFalse(target.is_file()) + self.assertTrue(target.is_dir()) + + +class ResourceDiskTests(ResourceTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class ResourceZipTests(ResourceTests, util.ZipSetup, unittest.TestCase): + pass + + +def names(traversable): + return {item.name for item in traversable.iterdir()} + + +class ResourceLoaderTests(unittest.TestCase): + def test_resource_contents(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C'] + ) + self.assertEqual(names(resources.files(package)), {'A', 'B', 'C'}) + + def test_is_file(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertTrue(resources.files(package).joinpath('B').is_file()) + + def test_is_dir(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertTrue(resources.files(package).joinpath('D').is_dir()) + + def test_resource_missing(self): + package = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F'] + ) + self.assertFalse(resources.files(package).joinpath('Z').is_file()) + + +class ResourceCornerCaseTests(unittest.TestCase): + def test_package_has_no_reader_fallback(self): + # Test odd ball packages which: + # 1. Do not have a ResourceReader as a loader + # 2. Are not on the file system + # 3. Are not in a zip file + module = util.create_package( + file=data01, path=data01.__file__, contents=['A', 'B', 'C'] + ) + # Give the module a dummy loader. + module.__loader__ = object() + # Give the module a dummy origin. + module.__file__ = '/path/which/shall/not/be/named' + module.__spec__.loader = module.__loader__ + module.__spec__.origin = module.__file__ + self.assertFalse(resources.files(module).joinpath('A').is_file()) + + +class ResourceFromZipsTest01(util.ZipSetupBase, unittest.TestCase): + ZIP_MODULE = zipdata01 # type: ignore + + def test_is_submodule_resource(self): + submodule = import_module('ziptestdata.subdirectory') + self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('ziptestdata.subdirectory') + .joinpath('binary.file') + .is_file() + ) + + def test_submodule_contents(self): + submodule = import_module('ziptestdata.subdirectory') + self.assertEqual( + names(resources.files(submodule)), {'__init__.py', 'binary.file'} + ) + + def test_submodule_contents_by_name(self): + self.assertEqual( + names(resources.files('ziptestdata.subdirectory')), + {'__init__.py', 'binary.file'}, + ) + + def test_as_file_directory(self): + with resources.as_file(resources.files('ziptestdata')) as data: + assert data.name == 'ziptestdata' + assert data.is_dir() + assert data.joinpath('subdirectory').is_dir() + assert len(list(data.iterdir())) + assert not data.parent.exists() + + +class ResourceFromZipsTest02(util.ZipSetupBase, unittest.TestCase): + ZIP_MODULE = zipdata02 # type: ignore + + def test_unrelated_contents(self): + """ + Test thata zip with two unrelated subpackages return + distinct resources. Ref python/importlib_resources#44. + """ + self.assertEqual( + names(resources.files('ziptestdata.one')), + {'__init__.py', 'resource1.txt'}, + ) + self.assertEqual( + names(resources.files('ziptestdata.two')), + {'__init__.py', 'resource2.txt'}, + ) + + +class DeletingZipsTest(unittest.TestCase): + """Having accessed resources in a zip file should not keep an open + reference to the zip. + """ + + ZIP_MODULE = zipdata01 + + def setUp(self): + modules = import_helper.modules_setup() + self.addCleanup(import_helper.modules_cleanup, *modules) + + data_path = pathlib.Path(self.ZIP_MODULE.__file__) + data_dir = data_path.parent + self.source_zip_path = data_dir / 'ziptestdata.zip' + self.zip_path = pathlib.Path(f'{uuid.uuid4()}.zip').absolute() + self.zip_path.write_bytes(self.source_zip_path.read_bytes()) + sys.path.append(str(self.zip_path)) + self.data = import_module('ziptestdata') + + def tearDown(self): + try: + sys.path.remove(str(self.zip_path)) + except ValueError: + pass + + try: + del sys.path_importer_cache[str(self.zip_path)] + del sys.modules[self.data.__name__] + except KeyError: + pass + + try: + unlink(self.zip_path) + except OSError: + # If the test fails, this will probably fail too + pass + + def test_iterdir_does_not_keep_open(self): + c = [item.name for item in resources.files('ziptestdata').iterdir()] + self.zip_path.unlink() + del c + + def test_is_file_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('binary.file').is_file() + self.zip_path.unlink() + del c + + def test_is_file_failure_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('not-present').is_file() + self.zip_path.unlink() + del c + + @unittest.skip("Desired but not supported.") + def test_as_file_does_not_keep_open(self): # pragma: no cover + c = resources.as_file(resources.files('ziptestdata') / 'binary.file') + self.zip_path.unlink() + del c + + def test_entered_path_does_not_keep_open(self): + # This is what certifi does on import to make its bundle + # available for the process duration. + c = resources.as_file( + resources.files('ziptestdata') / 'binary.file' + ).__enter__() + self.zip_path.unlink() + del c + + def test_read_binary_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('binary.file').read_bytes() + self.zip_path.unlink() + del c + + def test_read_text_does_not_keep_open(self): + c = resources.files('ziptestdata').joinpath('utf-8.file').read_text() + self.zip_path.unlink() + del c + + +class ResourceFromNamespaceTest01(unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + def test_is_submodule_resource(self): + self.assertTrue( + resources.files(import_module('namespacedata01')) + .joinpath('binary.file') + .is_file() + ) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('namespacedata01').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + contents = names(resources.files(import_module('namespacedata01'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + + def test_submodule_contents_by_name(self): + contents = names(resources.files('namespacedata01')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/update-zips.py b/lib/pkg_resources/_vendor/importlib_resources/tests/update-zips.py new file mode 100644 index 00000000..231334aa --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/update-zips.py @@ -0,0 +1,53 @@ +""" +Generate the zip test data files. + +Run to build the tests/zipdataNN/ziptestdata.zip files from +files in tests/dataNN. + +Replaces the file with the working copy, but does commit anything +to the source repo. +""" + +import contextlib +import os +import pathlib +import zipfile + + +def main(): + """ + >>> from unittest import mock + >>> monkeypatch = getfixture('monkeypatch') + >>> monkeypatch.setattr(zipfile, 'ZipFile', mock.MagicMock()) + >>> print(); main() # print workaround for bpo-32509 + + ...data01... -> ziptestdata/... + ... + ...data02... -> ziptestdata/... + ... + """ + suffixes = '01', '02' + tuple(map(generate, suffixes)) + + +def generate(suffix): + root = pathlib.Path(__file__).parent.relative_to(os.getcwd()) + zfpath = root / f'zipdata{suffix}/ziptestdata.zip' + with zipfile.ZipFile(zfpath, 'w') as zf: + for src, rel in walk(root / f'data{suffix}'): + dst = 'ziptestdata' / pathlib.PurePosixPath(rel.as_posix()) + print(src, '->', dst) + zf.write(src, dst) + + +def walk(datapath): + for dirpath, dirnames, filenames in os.walk(datapath): + with contextlib.suppress(ValueError): + dirnames.remove('__pycache__') + for filename in filenames: + res = pathlib.Path(dirpath) / filename + rel = res.relative_to(datapath) + yield res, rel + + +__name__ == '__main__' and main() diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/util.py b/lib/pkg_resources/_vendor/importlib_resources/tests/util.py new file mode 100644 index 00000000..b596c0ce --- /dev/null +++ b/lib/pkg_resources/_vendor/importlib_resources/tests/util.py @@ -0,0 +1,167 @@ +import abc +import importlib +import io +import sys +import types +import pathlib + +from . import data01 +from . import zipdata01 +from ..abc import ResourceReader +from ._compat import import_helper + + +from importlib.machinery import ModuleSpec + + +class Reader(ResourceReader): + def __init__(self, **kwargs): + vars(self).update(kwargs) + + def get_resource_reader(self, package): + return self + + def open_resource(self, path): + self._path = path + if isinstance(self.file, Exception): + raise self.file + return self.file + + def resource_path(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + return self.path + + def is_resource(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + + def part(entry): + return entry.split('/') + + return any( + len(parts) == 1 and parts[0] == path_ for parts in map(part, self._contents) + ) + + def contents(self): + if isinstance(self.path, Exception): + raise self.path + yield from self._contents + + +def create_package_from_loader(loader, is_package=True): + name = 'testingpackage' + module = types.ModuleType(name) + spec = ModuleSpec(name, loader, origin='does-not-exist', is_package=is_package) + module.__spec__ = spec + module.__loader__ = loader + return module + + +def create_package(file=None, path=None, is_package=True, contents=()): + return create_package_from_loader( + Reader(file=file, path=path, _contents=contents), + is_package, + ) + + +class CommonTests(metaclass=abc.ABCMeta): + """ + Tests shared by test_open, test_path, and test_read. + """ + + @abc.abstractmethod + def execute(self, package, path): + """ + Call the pertinent legacy API function (e.g. open_text, path) + on package and path. + """ + + def test_package_name(self): + # Passing in the package name should succeed. + self.execute(data01.__name__, 'utf-8.file') + + def test_package_object(self): + # Passing in the package itself should succeed. + self.execute(data01, 'utf-8.file') + + def test_string_path(self): + # Passing in a string for the path should succeed. + path = 'utf-8.file' + self.execute(data01, path) + + def test_pathlib_path(self): + # Passing in a pathlib.PurePath object for the path should succeed. + path = pathlib.PurePath('utf-8.file') + self.execute(data01, path) + + def test_importing_module_as_side_effect(self): + # The anchor package can already be imported. + del sys.modules[data01.__name__] + self.execute(data01.__name__, 'utf-8.file') + + def test_missing_path(self): + # Attempting to open or read or request the path for a + # non-existent path should succeed if open_resource + # can return a viable data stream. + bytes_data = io.BytesIO(b'Hello, world!') + package = create_package(file=bytes_data, path=FileNotFoundError()) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_extant_path(self): + # Attempting to open or read or request the path when the + # path does exist should still succeed. Does not assert + # anything about the result. + bytes_data = io.BytesIO(b'Hello, world!') + # any path that exists + path = __file__ + package = create_package(file=bytes_data, path=path) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_useless_loader(self): + package = create_package(file=FileNotFoundError(), path=FileNotFoundError()) + with self.assertRaises(FileNotFoundError): + self.execute(package, 'utf-8.file') + + +class ZipSetupBase: + ZIP_MODULE = None + + @classmethod + def setUpClass(cls): + data_path = pathlib.Path(cls.ZIP_MODULE.__file__) + data_dir = data_path.parent + cls._zip_path = str(data_dir / 'ziptestdata.zip') + sys.path.append(cls._zip_path) + cls.data = importlib.import_module('ziptestdata') + + @classmethod + def tearDownClass(cls): + try: + sys.path.remove(cls._zip_path) + except ValueError: + pass + + try: + del sys.path_importer_cache[cls._zip_path] + del sys.modules[cls.data.__name__] + except KeyError: + pass + + try: + del cls.data + del cls._zip_path + except AttributeError: + pass + + def setUp(self): + modules = import_helper.modules_setup() + self.addCleanup(import_helper.modules_cleanup, *modules) + + +class ZipSetup(ZipSetupBase): + ZIP_MODULE = zipdata01 # type: ignore diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/zipdata01/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/zipdata01/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/importlib_resources/tests/zipdata02/__init__.py b/lib/pkg_resources/_vendor/importlib_resources/tests/zipdata02/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/jaraco.context-4.3.0.dist-info/top_level.txt b/lib/pkg_resources/_vendor/jaraco.context-4.3.0.dist-info/top_level.txt new file mode 100644 index 00000000..f6205a5f --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco.context-4.3.0.dist-info/top_level.txt @@ -0,0 +1 @@ +jaraco diff --git a/lib/pkg_resources/_vendor/jaraco.functools-3.6.0.dist-info/top_level.txt b/lib/pkg_resources/_vendor/jaraco.functools-3.6.0.dist-info/top_level.txt new file mode 100644 index 00000000..f6205a5f --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco.functools-3.6.0.dist-info/top_level.txt @@ -0,0 +1 @@ +jaraco diff --git a/lib/pkg_resources/_vendor/jaraco.text-3.7.0.dist-info/top_level.txt b/lib/pkg_resources/_vendor/jaraco.text-3.7.0.dist-info/top_level.txt new file mode 100644 index 00000000..f6205a5f --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco.text-3.7.0.dist-info/top_level.txt @@ -0,0 +1 @@ +jaraco diff --git a/lib/pkg_resources/_vendor/jaraco/__init__.py b/lib/pkg_resources/_vendor/jaraco/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/jaraco/context.py b/lib/pkg_resources/_vendor/jaraco/context.py new file mode 100644 index 00000000..b0d1ef37 --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco/context.py @@ -0,0 +1,288 @@ +import os +import subprocess +import contextlib +import functools +import tempfile +import shutil +import operator +import warnings + + +@contextlib.contextmanager +def pushd(dir): + """ + >>> tmp_path = getfixture('tmp_path') + >>> with pushd(tmp_path): + ... assert os.getcwd() == os.fspath(tmp_path) + >>> assert os.getcwd() != os.fspath(tmp_path) + """ + + orig = os.getcwd() + os.chdir(dir) + try: + yield dir + finally: + os.chdir(orig) + + +@contextlib.contextmanager +def tarball_context(url, target_dir=None, runner=None, pushd=pushd): + """ + Get a tarball, extract it, change to that directory, yield, then + clean up. + `runner` is the function to invoke commands. + `pushd` is a context manager for changing the directory. + """ + if target_dir is None: + target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '') + if runner is None: + runner = functools.partial(subprocess.check_call, shell=True) + else: + warnings.warn("runner parameter is deprecated", DeprecationWarning) + # In the tar command, use --strip-components=1 to strip the first path and + # then + # use -C to cause the files to be extracted to {target_dir}. This ensures + # that we always know where the files were extracted. + runner('mkdir {target_dir}'.format(**vars())) + try: + getter = 'wget {url} -O -' + extract = 'tar x{compression} --strip-components=1 -C {target_dir}' + cmd = ' | '.join((getter, extract)) + runner(cmd.format(compression=infer_compression(url), **vars())) + with pushd(target_dir): + yield target_dir + finally: + runner('rm -Rf {target_dir}'.format(**vars())) + + +def infer_compression(url): + """ + Given a URL or filename, infer the compression code for tar. + + >>> infer_compression('http://foo/bar.tar.gz') + 'z' + >>> infer_compression('http://foo/bar.tgz') + 'z' + >>> infer_compression('file.bz') + 'j' + >>> infer_compression('file.xz') + 'J' + """ + # cheat and just assume it's the last two characters + compression_indicator = url[-2:] + mapping = dict(gz='z', bz='j', xz='J') + # Assume 'z' (gzip) if no match + return mapping.get(compression_indicator, 'z') + + +@contextlib.contextmanager +def temp_dir(remover=shutil.rmtree): + """ + Create a temporary directory context. Pass a custom remover + to override the removal behavior. + + >>> import pathlib + >>> with temp_dir() as the_dir: + ... assert os.path.isdir(the_dir) + ... _ = pathlib.Path(the_dir).joinpath('somefile').write_text('contents') + >>> assert not os.path.exists(the_dir) + """ + temp_dir = tempfile.mkdtemp() + try: + yield temp_dir + finally: + remover(temp_dir) + + +@contextlib.contextmanager +def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir): + """ + Check out the repo indicated by url. + + If dest_ctx is supplied, it should be a context manager + to yield the target directory for the check out. + """ + exe = 'git' if 'git' in url else 'hg' + with dest_ctx() as repo_dir: + cmd = [exe, 'clone', url, repo_dir] + if branch: + cmd.extend(['--branch', branch]) + devnull = open(os.path.devnull, 'w') + stdout = devnull if quiet else None + subprocess.check_call(cmd, stdout=stdout) + yield repo_dir + + +@contextlib.contextmanager +def null(): + """ + A null context suitable to stand in for a meaningful context. + + >>> with null() as value: + ... assert value is None + """ + yield + + +class ExceptionTrap: + """ + A context manager that will catch certain exceptions and provide an + indication they occurred. + + >>> with ExceptionTrap() as trap: + ... raise Exception() + >>> bool(trap) + True + + >>> with ExceptionTrap() as trap: + ... pass + >>> bool(trap) + False + + >>> with ExceptionTrap(ValueError) as trap: + ... raise ValueError("1 + 1 is not 3") + >>> bool(trap) + True + >>> trap.value + ValueError('1 + 1 is not 3') + >>> trap.tb + + + >>> with ExceptionTrap(ValueError) as trap: + ... raise Exception() + Traceback (most recent call last): + ... + Exception + + >>> bool(trap) + False + """ + + exc_info = None, None, None + + def __init__(self, exceptions=(Exception,)): + self.exceptions = exceptions + + def __enter__(self): + return self + + @property + def type(self): + return self.exc_info[0] + + @property + def value(self): + return self.exc_info[1] + + @property + def tb(self): + return self.exc_info[2] + + def __exit__(self, *exc_info): + type = exc_info[0] + matches = type and issubclass(type, self.exceptions) + if matches: + self.exc_info = exc_info + return matches + + def __bool__(self): + return bool(self.type) + + def raises(self, func, *, _test=bool): + """ + Wrap func and replace the result with the truth + value of the trap (True if an exception occurred). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> raises = ExceptionTrap(ValueError).raises + + Now decorate a function that always fails. + + >>> @raises + ... def fail(): + ... raise ValueError('failed') + >>> fail() + True + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with ExceptionTrap(self.exceptions) as trap: + func(*args, **kwargs) + return _test(trap) + + return wrapper + + def passes(self, func): + """ + Wrap func and replace the result with the truth + value of the trap (True if no exception). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> passes = ExceptionTrap(ValueError).passes + + Now decorate a function that always fails. + + >>> @passes + ... def fail(): + ... raise ValueError('failed') + + >>> fail() + False + """ + return self.raises(func, _test=operator.not_) + + +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ + + +class on_interrupt(contextlib.ContextDecorator): + """ + Replace a KeyboardInterrupt with SystemExit(1) + + >>> def do_interrupt(): + ... raise KeyboardInterrupt() + >>> on_interrupt('error')(do_interrupt)() + Traceback (most recent call last): + ... + SystemExit: 1 + >>> on_interrupt('error', code=255)(do_interrupt)() + Traceback (most recent call last): + ... + SystemExit: 255 + >>> on_interrupt('suppress')(do_interrupt)() + >>> with __import__('pytest').raises(KeyboardInterrupt): + ... on_interrupt('ignore')(do_interrupt)() + """ + + def __init__( + self, + action='error', + # py3.7 compat + # /, + code=1, + ): + self.action = action + self.code = code + + def __enter__(self): + return self + + def __exit__(self, exctype, excinst, exctb): + if exctype is not KeyboardInterrupt or self.action == 'ignore': + return + elif self.action == 'error': + raise SystemExit(self.code) from excinst + return self.action == 'suppress' diff --git a/lib/pkg_resources/_vendor/jaraco/functools.py b/lib/pkg_resources/_vendor/jaraco/functools.py new file mode 100644 index 00000000..67aeadc3 --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco/functools.py @@ -0,0 +1,556 @@ +import functools +import time +import inspect +import collections +import types +import itertools +import warnings + +import pkg_resources.extern.more_itertools + +from typing import Callable, TypeVar + + +CallableT = TypeVar("CallableT", bound=Callable[..., object]) + + +def compose(*funcs): + """ + Compose any number of unary functions into a single unary function. + + >>> import textwrap + >>> expected = str.strip(textwrap.dedent(compose.__doc__)) + >>> strip_and_dedent = compose(str.strip, textwrap.dedent) + >>> strip_and_dedent(compose.__doc__) == expected + True + + Compose also allows the innermost function to take arbitrary arguments. + + >>> round_three = lambda x: round(x, ndigits=3) + >>> f = compose(round_three, int.__truediv__) + >>> [f(3*x, x+1) for x in range(1,10)] + [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] + """ + + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) + + +def method_caller(method_name, *args, **kwargs): + """ + Return a function that will call a named method on the + target object with optional positional and keyword + arguments. + + >>> lower = method_caller('lower') + >>> lower('MyString') + 'mystring' + """ + + def call_method(target): + func = getattr(target, method_name) + return func(*args, **kwargs) + + return call_method + + +def once(func): + """ + Decorate func so it's only ever called the first time. + + This decorator can ensure that an expensive or non-idempotent function + will not be expensive on subsequent calls and is idempotent. + + >>> add_three = once(lambda a: a+3) + >>> add_three(3) + 6 + >>> add_three(9) + 6 + >>> add_three('12') + 6 + + To reset the stored value, simply clear the property ``saved_result``. + + >>> del add_three.saved_result + >>> add_three(9) + 12 + >>> add_three(8) + 12 + + Or invoke 'reset()' on it. + + >>> add_three.reset() + >>> add_three(-3) + 0 + >>> add_three(0) + 0 + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, 'saved_result'): + wrapper.saved_result = func(*args, **kwargs) + return wrapper.saved_result + + wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result') + return wrapper + + +def method_cache( + method: CallableT, + cache_wrapper: Callable[ + [CallableT], CallableT + ] = functools.lru_cache(), # type: ignore[assignment] +) -> CallableT: + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + + def wrapper(self: object, *args: object, **kwargs: object) -> object: + # it's the first call, replace the method with a cached, bound method + bound_method: CallableT = types.MethodType( # type: ignore[assignment] + method, self + ) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None # type: ignore[attr-defined] + + return ( # type: ignore[return-value] + _special_method_cache(method, cache_wrapper) or wrapper + ) + + +def _special_method_cache(method, cache_wrapper): + """ + Because Python treats special methods differently, it's not + possible to use instance attributes to implement the cached + methods. + + Instead, install the wrapper method under a different name + and return a simple proxy to that wrapper. + + https://github.com/jaraco/jaraco.functools/issues/5 + """ + name = method.__name__ + special_names = '__getattr__', '__getitem__' + if name not in special_names: + return + + wrapper_name = '__cached' + name + + def proxy(self, *args, **kwargs): + if wrapper_name not in vars(self): + bound = types.MethodType(method, self) + cache = cache_wrapper(bound) + setattr(self, wrapper_name, cache) + else: + cache = getattr(self, wrapper_name) + return cache(*args, **kwargs) + + return proxy + + +def apply(transform): + """ + Decorate a function with a transform function that is + invoked on results returned from the decorated function. + + >>> @apply(reversed) + ... def get_numbers(start): + ... "doc for get_numbers" + ... return range(start, start+3) + >>> list(get_numbers(4)) + [6, 5, 4] + >>> get_numbers.__doc__ + 'doc for get_numbers' + """ + + def wrap(func): + return functools.wraps(func)(compose(transform, func)) + + return wrap + + +def result_invoke(action): + r""" + Decorate a function with an action function that is + invoked on the results returned from the decorated + function (for its side-effect), then return the original + result. + + >>> @result_invoke(print) + ... def add_two(a, b): + ... return a + b + >>> x = add_two(2, 3) + 5 + >>> x + 5 + """ + + def wrap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + action(result) + return result + + return wrapper + + return wrap + + +def invoke(f, *args, **kwargs): + """ + Call a function for its side effect after initialization. + + The benefit of using the decorator instead of simply invoking a function + after defining it is that it makes explicit the author's intent for the + function to be called immediately. Whereas if one simply calls the + function immediately, it's less obvious if that was intentional or + incidental. It also avoids repeating the name - the two actions, defining + the function and calling it immediately are modeled separately, but linked + by the decorator construct. + + The benefit of having a function construct (opposed to just invoking some + behavior inline) is to serve as a scope in which the behavior occurs. It + avoids polluting the global namespace with local variables, provides an + anchor on which to attach documentation (docstring), keeps the behavior + logically separated (instead of conceptually separated or not separated at + all), and provides potential to re-use the behavior for testing or other + purposes. + + This function is named as a pithy way to communicate, "call this function + primarily for its side effect", or "while defining this function, also + take it aside and call it". It exists because there's no Python construct + for "define and call" (nor should there be, as decorators serve this need + just fine). The behavior happens immediately and synchronously. + + >>> @invoke + ... def func(): print("called") + called + >>> func() + called + + Use functools.partial to pass parameters to the initial call + + >>> @functools.partial(invoke, name='bingo') + ... def func(name): print("called with", name) + called with bingo + """ + f(*args, **kwargs) + return f + + +def call_aside(*args, **kwargs): + """ + Deprecated name for invoke. + """ + warnings.warn("call_aside is deprecated, use invoke", DeprecationWarning) + return invoke(*args, **kwargs) + + +class Throttler: + """ + Rate-limit a function (or other callable) + """ + + def __init__(self, func, max_rate=float('Inf')): + if isinstance(func, Throttler): + func = func.func + self.func = func + self.max_rate = max_rate + self.reset() + + def reset(self): + self.last_called = 0 + + def __call__(self, *args, **kwargs): + self._wait() + return self.func(*args, **kwargs) + + def _wait(self): + "ensure at least 1/max_rate seconds from last call" + elapsed = time.time() - self.last_called + must_wait = 1 / self.max_rate - elapsed + time.sleep(max(0, must_wait)) + self.last_called = time.time() + + def __get__(self, obj, type=None): + return first_invoke(self._wait, functools.partial(self.func, obj)) + + +def first_invoke(func1, func2): + """ + Return a function that when invoked will invoke func1 without + any parameters (for its side-effect) and then invoke func2 + with whatever parameters were passed, returning its result. + """ + + def wrapper(*args, **kwargs): + func1() + return func2(*args, **kwargs) + + return wrapper + + +def retry_call(func, cleanup=lambda: None, retries=0, trap=()): + """ + Given a callable func, trap the indicated exceptions + for up to 'retries' times, invoking cleanup on the + exception. On the final attempt, allow any exceptions + to propagate. + """ + attempts = itertools.count() if retries == float('inf') else range(retries) + for attempt in attempts: + try: + return func() + except trap: + cleanup() + + return func() + + +def retry(*r_args, **r_kwargs): + """ + Decorator wrapper for retry_call. Accepts arguments to retry_call + except func and then returns a decorator for the decorated function. + + Ex: + + >>> @retry(retries=3) + ... def my_func(a, b): + ... "this is my funk" + ... print(a, b) + >>> my_func.__doc__ + 'this is my funk' + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*f_args, **f_kwargs): + bound = functools.partial(func, *f_args, **f_kwargs) + return retry_call(bound, *r_args, **r_kwargs) + + return wrapper + + return decorate + + +def print_yielded(func): + """ + Convert a generator into a function that prints all yielded elements + + >>> @print_yielded + ... def x(): + ... yield 3; yield None + >>> x() + 3 + None + """ + print_all = functools.partial(map, print) + print_results = compose(more_itertools.consume, print_all, func) + return functools.wraps(func)(print_results) + + +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +def assign_params(func, namespace): + """ + Assign parameters from namespace where func solicits. + + >>> def func(x, y=3): + ... print(x, y) + >>> assigned = assign_params(func, dict(x=2, z=4)) + >>> assigned() + 2 3 + + The usual errors are raised if a function doesn't receive + its required parameters: + + >>> assigned = assign_params(func, dict(y=3, z=4)) + >>> assigned() + Traceback (most recent call last): + TypeError: func() ...argument... + + It even works on methods: + + >>> class Handler: + ... def meth(self, arg): + ... print(arg) + >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))() + crystal + """ + sig = inspect.signature(func) + params = sig.parameters.keys() + call_ns = {k: namespace[k] for k in params if k in namespace} + return functools.partial(func, **call_ns) + + +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + + >>> class MyClass: + ... @save_method_args + ... def method(self, a, b): + ... print(a, b) + >>> my_ob = MyClass() + >>> my_ob.method(1, 2) + 1 2 + >>> my_ob._saved_method.args + (1, 2) + >>> my_ob._saved_method.kwargs + {} + >>> my_ob.method(a=3, b='foo') + 3 foo + >>> my_ob._saved_method.args + () + >>> my_ob._saved_method.kwargs == dict(a=3, b='foo') + True + + The arguments are stored on the instance, allowing for + different instance to save different args. + + >>> your_ob = MyClass() + >>> your_ob.method({str('x'): 3}, b=[4]) + {'x': 3} [4] + >>> your_ob._saved_method.args + ({'x': 3},) + >>> my_ob._saved_method.args + () + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper + + +def except_(*exceptions, replace=None, use=None): + """ + Replace the indicated exceptions, if raised, with the indicated + literal replacement or evaluated expression (if present). + + >>> safe_int = except_(ValueError)(int) + >>> safe_int('five') + >>> safe_int('5') + 5 + + Specify a literal replacement with ``replace``. + + >>> safe_int_r = except_(ValueError, replace=0)(int) + >>> safe_int_r('five') + 0 + + Provide an expression to ``use`` to pass through particular parameters. + + >>> safe_int_pt = except_(ValueError, use='args[0]')(int) + >>> safe_int_pt('five') + 'five' + + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exceptions: + try: + return eval(use) + except TypeError: + return replace + + return wrapper + + return decorate diff --git a/lib/pkg_resources/_vendor/jaraco/text/Lorem ipsum.txt b/lib/pkg_resources/_vendor/jaraco/text/Lorem ipsum.txt new file mode 100644 index 00000000..986f944b --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco/text/Lorem ipsum.txt @@ -0,0 +1,2 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus magna felis sollicitudin mauris. Integer in mauris eu nibh euismod gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, molestie eu, feugiat in, orci. In hac habitasse platea dictumst. diff --git a/lib/pkg_resources/_vendor/jaraco/text/__init__.py b/lib/pkg_resources/_vendor/jaraco/text/__init__.py new file mode 100644 index 00000000..c466378c --- /dev/null +++ b/lib/pkg_resources/_vendor/jaraco/text/__init__.py @@ -0,0 +1,599 @@ +import re +import itertools +import textwrap +import functools + +try: + from importlib.resources import files # type: ignore +except ImportError: # pragma: nocover + from pkg_resources.extern.importlib_resources import files # type: ignore + +from pkg_resources.extern.jaraco.functools import compose, method_cache +from pkg_resources.extern.jaraco.context import ExceptionTrap + + +def substitution(old, new): + """ + Return a function that will perform a substitution on a string + """ + return lambda s: s.replace(old, new) + + +def multi_substitution(*substitutions): + """ + Take a sequence of pairs specifying substitutions, and create + a function that performs those substitutions. + + >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') + 'baz' + """ + substitutions = itertools.starmap(substitution, substitutions) + # compose function applies last function first, so reverse the + # substitutions to get the expected order. + substitutions = reversed(tuple(substitutions)) + return compose(*substitutions) + + +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use ``in_``: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) + + +# Python 3.8 compatibility +_unicode_trap = ExceptionTrap(UnicodeDecodeError) + + +@_unicode_trap.passes +def is_decodable(value): + r""" + Return True if the supplied value is decodable (using the default + encoding). + + >>> is_decodable(b'\xff') + False + >>> is_decodable(b'\x32') + True + """ + value.decode() + + +def is_binary(value): + r""" + Return True if the value appears to be binary (that is, it's a byte + string and isn't decodable). + + >>> is_binary(b'\xff') + True + >>> is_binary('\xff') + False + """ + return isinstance(value, bytes) and not is_decodable(value) + + +def trim(s): + r""" + Trim something like a docstring to remove the whitespace that + is common due to indentation and formatting. + + >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") + 'foo = bar\n\tbar = baz' + """ + return textwrap.dedent(s).strip() + + +def wrap(s): + """ + Wrap lines of text, retaining existing newlines as + paragraph markers. + + >>> print(wrap(lorem_ipsum)) + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris nisi ut + aliquip ex ea commodo consequat. Duis aute irure dolor in + reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in + culpa qui officia deserunt mollit anim id est laborum. + + Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam + varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus + magna felis sollicitudin mauris. Integer in mauris eu nibh euismod + gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis + risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, + eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas + fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla + a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, + neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing + sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque + nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus + quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, + molestie eu, feugiat in, orci. In hac habitasse platea dictumst. + """ + paragraphs = s.splitlines() + wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) + return '\n\n'.join(wrapped) + + +def unwrap(s): + r""" + Given a multi-line string, return an unwrapped version. + + >>> wrapped = wrap(lorem_ipsum) + >>> wrapped.count('\n') + 20 + >>> unwrapped = unwrap(wrapped) + >>> unwrapped.count('\n') + 1 + >>> print(unwrapped) + Lorem ipsum dolor sit amet, consectetur adipiscing ... + Curabitur pretium tincidunt lacus. Nulla gravida orci ... + + """ + paragraphs = re.split(r'\n\n+', s) + cleaned = (para.replace('\n', ' ') for para in paragraphs) + return '\n'.join(cleaned) + + + + +class Splitter(object): + """object that will split a string with the given arguments for each call + + >>> s = Splitter(',') + >>> s('hello, world, this is your, master calling') + ['hello', ' world', ' this is your', ' master calling'] + """ + + def __init__(self, *args): + self.args = args + + def __call__(self, s): + return s.split(*self.args) + + +def indent(string, prefix=' ' * 4): + """ + >>> indent('foo') + ' foo' + """ + return prefix + string + + +class WordSet(tuple): + """ + Given an identifier, return the words that identifier represents, + whether in camel case, underscore-separated, etc. + + >>> WordSet.parse("camelCase") + ('camel', 'Case') + + >>> WordSet.parse("under_sep") + ('under', 'sep') + + Acronyms should be retained + + >>> WordSet.parse("firstSNL") + ('first', 'SNL') + + >>> WordSet.parse("you_and_I") + ('you', 'and', 'I') + + >>> WordSet.parse("A simple test") + ('A', 'simple', 'test') + + Multiple caps should not interfere with the first cap of another word. + + >>> WordSet.parse("myABCClass") + ('my', 'ABC', 'Class') + + The result is a WordSet, so you can get the form you need. + + >>> WordSet.parse("myABCClass").underscore_separated() + 'my_ABC_Class' + + >>> WordSet.parse('a-command').camel_case() + 'ACommand' + + >>> WordSet.parse('someIdentifier').lowered().space_separated() + 'some identifier' + + Slices of the result should return another WordSet. + + >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() + 'out_of_context' + + >>> WordSet.from_class_name(WordSet()).lowered().space_separated() + 'word set' + + >>> example = WordSet.parse('figured it out') + >>> example.headless_camel_case() + 'figuredItOut' + >>> example.dash_separated() + 'figured-it-out' + + """ + + _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') + + def capitalized(self): + return WordSet(word.capitalize() for word in self) + + def lowered(self): + return WordSet(word.lower() for word in self) + + def camel_case(self): + return ''.join(self.capitalized()) + + def headless_camel_case(self): + words = iter(self) + first = next(words).lower() + new_words = itertools.chain((first,), WordSet(words).camel_case()) + return ''.join(new_words) + + def underscore_separated(self): + return '_'.join(self) + + def dash_separated(self): + return '-'.join(self) + + def space_separated(self): + return ' '.join(self) + + def trim_right(self, item): + """ + Remove the item from the end of the set. + + >>> WordSet.parse('foo bar').trim_right('foo') + ('foo', 'bar') + >>> WordSet.parse('foo bar').trim_right('bar') + ('foo',) + >>> WordSet.parse('').trim_right('bar') + () + """ + return self[:-1] if self and self[-1] == item else self + + def trim_left(self, item): + """ + Remove the item from the beginning of the set. + + >>> WordSet.parse('foo bar').trim_left('foo') + ('bar',) + >>> WordSet.parse('foo bar').trim_left('bar') + ('foo', 'bar') + >>> WordSet.parse('').trim_left('bar') + () + """ + return self[1:] if self and self[0] == item else self + + def trim(self, item): + """ + >>> WordSet.parse('foo bar').trim('foo') + ('bar',) + """ + return self.trim_left(item).trim_right(item) + + def __getitem__(self, item): + result = super(WordSet, self).__getitem__(item) + if isinstance(item, slice): + result = WordSet(result) + return result + + @classmethod + def parse(cls, identifier): + matches = cls._pattern.finditer(identifier) + return WordSet(match.group(0) for match in matches) + + @classmethod + def from_class_name(cls, subject): + return cls.parse(subject.__class__.__name__) + + +# for backward compatibility +words = WordSet.parse + + +def simple_html_strip(s): + r""" + Remove HTML from the string `s`. + + >>> str(simple_html_strip('')) + '' + + >>> print(simple_html_strip('A stormy day in paradise')) + A stormy day in paradise + + >>> print(simple_html_strip('Somebody tell the truth.')) + Somebody tell the truth. + + >>> print(simple_html_strip('What about
\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('()|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/lib/pkg_resources/_vendor/more_itertools/__init__.py b/lib/pkg_resources/_vendor/more_itertools/__init__.py new file mode 100644 index 00000000..66443971 --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/__init__.py @@ -0,0 +1,6 @@ +"""More routines for operating on iterables, beyond itertools""" + +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '9.1.0' diff --git a/lib/pkg_resources/_vendor/more_itertools/__init__.pyi b/lib/pkg_resources/_vendor/more_itertools/__init__.pyi new file mode 100644 index 00000000..96f6e36c --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/__init__.pyi @@ -0,0 +1,2 @@ +from .more import * +from .recipes import * diff --git a/lib/pkg_resources/_vendor/more_itertools/more.py b/lib/pkg_resources/_vendor/more_itertools/more.py new file mode 100755 index 00000000..e0e2d3de --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/more.py @@ -0,0 +1,4391 @@ +import warnings + +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from functools import partial, reduce, wraps +from heapq import heapify, heapreplace, heappop +from itertools import ( + chain, + compress, + count, + cycle, + dropwhile, + groupby, + islice, + repeat, + starmap, + takewhile, + tee, + zip_longest, +) +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt, ge, le +from sys import hexversion, maxsize +from time import monotonic + +from .recipes import ( + _marker, + _zip_equal, + UnequalIterablesError, + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, + all_equal, +) + +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'combination_index', + 'consecutive_groups', + 'constrained_batches', + 'consumer', + 'count_cycle', + 'countable', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'duplicates_everseen', + 'duplicates_justseen', + 'exactly_n', + 'filter_except', + 'first', + 'gray_product', + 'groupby_transform', + 'ichunked', + 'iequals', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'last', + 'locate', + 'longest_common_prefix', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_product', + 'numeric_range', + 'one', + 'only', + 'padded', + 'partitions', + 'peekable', + 'permutation_index', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + + +def chunked(iterable, n, strict=False): + """Break *iterable* into lists of length *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) + [[1, 2, 3], [4, 5, 6]] + + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[1, 2, 3], [4, 5, 6], [7, 8]] + + To use a fill-in value instead, see the :func:`grouper` recipe. + + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. + + """ + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + if n is None: + raise ValueError('n must not be None when using strict mode.') + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator + + +def first(iterable, default=_marker): + """Return the first item of *iterable*, or *default* if *iterable* is + empty. + + >>> first([0, 1, 2, 3]) + 0 + >>> first([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + + :func:`first` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(iterable), default)``. + + """ + try: + return next(iter(iterable)) + except StopIteration as e: + if default is _marker: + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e + return default + + +def last(iterable, default=_marker): + """Return the last item of *iterable*, or *default* if *iterable* is + empty. + + >>> last([0, 1, 2, 3]) + 3 + >>> last([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + try: + if isinstance(iterable, Sequence): + return iterable[-1] + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): + if default is _marker: + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) + return default + + +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: + """Wrap an iterator to allow lookahead and prepending elements. + + Call :meth:`peek` on the result to get the value that will be returned + by :func:`next`. This won't advance the iterator: + + >>> p = peekable(['a', 'b']) + >>> p.peek() + 'a' + >>> next(p) + 'a' + + Pass :meth:`peek` a default value to return that instead of raising + ``StopIteration`` when the iterator is exhausted. + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + peekables also offer a :meth:`prepend` method, which "inserts" items + at the head of the iterable: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> p.peek() + 11 + >>> list(p) + [11, 12, 1, 2, 3] + + peekables can be indexed. Index 0 is the item that will be returned by + :func:`next`, index 1 is the item after that, and so on: + The values up to the given index will be cached. + + >>> p = peekable(['a', 'b', 'c', 'd']) + >>> p[0] + 'a' + >>> p[1] + 'b' + >>> next(p) + 'a' + + Negative indexes are supported, but be aware that they will cache the + remaining items in the source iterator, which may require significant + storage. + + To check whether a peekable is exhausted, check its truth value: + + >>> p = peekable(['a', 'b']) + >>> if p: # peekable has items + ... list(p) + ['a', 'b'] + >>> if not p: # peekable is exhausted + ... list(p) + [] + + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self._cache = deque() + + def __iter__(self): + return self + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not self._cache: + try: + self._cache.append(next(self._it)) + except StopIteration: + if default is _marker: + raise + return default + return self._cache[0] + + def prepend(self, *items): + """Stack up items to be the next ones returned from ``next()`` or + ``self.peek()``. The items will be returned in + first in, first out order:: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> list(p) + [11, 12, 1, 2, 3] + + It is possible, by prepending items, to "resurrect" a peekable that + previously raised ``StopIteration``. + + >>> p = peekable([]) + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + >>> p.prepend(1) + >>> next(p) + 1 + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + + """ + self._cache.extendleft(reversed(items)) + + def __next__(self): + if self._cache: + return self._cache.popleft() + + return next(self._it) + + def _get_slice(self, index): + # Normalize the slice's arguments + step = 1 if (index.step is None) else index.step + if step > 0: + start = 0 if (index.start is None) else index.start + stop = maxsize if (index.stop is None) else index.stop + elif step < 0: + start = -1 if (index.start is None) else index.start + stop = (-maxsize - 1) if (index.stop is None) else index.stop + else: + raise ValueError('slice step cannot be zero') + + # If either the start or stop index is negative, we'll need to cache + # the rest of the iterable in order to slice from the right side. + if (start < 0) or (stop < 0): + self._cache.extend(self._it) + # Otherwise we'll need to find the rightmost index and cache to that + # point. + else: + n = min(max(start, stop) + 1, maxsize) + cache_len = len(self._cache) + if n >= cache_len: + self._cache.extend(islice(self._it, n - cache_len)) + + return list(self._cache)[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return self._get_slice(index) + + cache_len = len(self._cache) + if index < 0: + self._cache.extend(self._it) + elif index >= cache_len: + self._cache.extend(islice(self._it, index + 1 - cache_len)) + + return self._cache[index] + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print('Thing number %s is %s.' % (i, (yield))) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``next(t)`` before + ``t.send()`` could be used. + + """ + + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + next(gen) + return gen + + return wrapper + + +def ilen(iterable): + """Return the number of items in *iterable*. + + >>> ilen(x for x in range(1000000) if x % 3 == 0) + 333334 + + This consumes the iterable, so handle with care. + + """ + # This approach was selected because benchmarks showed it's likely the + # fastest of the known implementations at the time of writing. + # See GitHub tracker: #236, #230. + counter = count() + deque(zip(iterable, counter), maxlen=0) + return next(counter) + + +def iterate(func, start): + """Return ``start``, ``func(start)``, ``func(func(start))``, ... + + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + + """ + while True: + yield start + start = func(start) + + +def with_iter(context_manager): + """Wrap an iterable in a ``with`` statement, so it closes once exhausted. + + For example, this will close the file when the iterator is exhausted:: + + upper_lines = (line.upper() for line in with_iter(open('foo'))) + + Any context manager which returns an iterable is a candidate for + ``with_iter``. + + """ + with context_manager as iterable: + yield from iterable + + +def one(iterable, too_short=None, too_long=None): + """Return the first item from *iterable*, which is expected to contain only + that item. Raise an exception if *iterable* is empty or has more than one + item. + + :func:`one` is useful for ensuring that an iterable contains only one item. + For example, it can be used to retrieve the result of a database query + that is expected to return a single row. + + If *iterable* is empty, ``ValueError`` will be raised. You may specify a + different exception with the *too_short* keyword: + + >>> it = [] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (expected 1)' + >>> too_short = IndexError('too few items') + >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + IndexError: too few items + + Similarly, if *iterable* contains more than one item, ``ValueError`` will + be raised. You may specify a different exception with the *too_long* + keyword: + + >>> it = ['too', 'many'] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. + >>> too_long = RuntimeError + >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + Note that :func:`one` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. + + """ + it = iter(iterable) + + try: + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def raise_(exception, *args): + raise exception(*args) + + +def strictly_n(iterable, n, too_short=None, too_long=None): + """Validate that *iterable* has exactly *n* items and return them if + it does. If it has fewer than *n* items, call function *too_short* + with those items. If it has more than *n* items, call function + *too_long* with the first ``n + 1`` items. + + >>> iterable = ['a', 'b', 'c', 'd'] + >>> n = 4 + >>> list(strictly_n(iterable, n)) + ['a', 'b', 'c', 'd'] + + By default, *too_short* and *too_long* are functions that raise + ``ValueError``. + + >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too few items in iterable (got 2) + + >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (got at least 3) + + You can instead supply functions that do something else. + *too_short* will be called with the number of items in *iterable*. + *too_long* will be called with `n + 1`. + + >>> def too_short(item_count): + ... raise RuntimeError + >>> it = strictly_n('abcd', 6, too_short=too_short) + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + >>> def too_long(item_count): + ... print('The boss is going to hear about this') + >>> it = strictly_n('abcdef', 4, too_long=too_long) + >>> list(it) + The boss is going to hear about this + ['a', 'b', 'c', 'd'] + + """ + if too_short is None: + too_short = lambda item_count: raise_( + ValueError, + 'Too few items in iterable (got {})'.format(item_count), + ) + + if too_long is None: + too_long = lambda item_count: raise_( + ValueError, + 'Too many items in iterable (got at least {})'.format(item_count), + ) + + it = iter(iterable) + for i in range(n): + try: + item = next(it) + except StopIteration: + too_short(i) + return + else: + yield item + + try: + next(it) + except StopIteration: + pass + else: + too_long(n + 1) + + +def distinct_permutations(iterable, r=None): + """Yield successive distinct permutations of the elements in *iterable*. + + >>> sorted(distinct_permutations([1, 0, 1])) + [(0, 1, 1), (1, 0, 1), (1, 1, 0)] + + Equivalent to ``set(permutations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + Duplicate permutations arise when there are duplicated elements in the + input iterable. The number of items returned is + `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of + items input, and each `x_i` is the count of a distinct item in the input + sequence. + + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + + """ + + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) + + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return + + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break + + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] + + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) + + +def intersperse(e, iterable, n=1): + """Intersperse filler element *e* among the items in *iterable*, leaving + *n* items between each filler element. + + >>> list(intersperse('!', [1, 2, 3, 4, 5])) + [1, '!', 2, '!', 3, '!', 4, '!', 5] + + >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) + [1, 2, None, 3, 4, None, 5] + + """ + if n == 0: + raise ValueError('n must be > 0') + elif n == 1: + # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2... + # islice(..., 1, None) -> x_0, e, x_1, e, x_2... + return islice(interleave(repeat(e), iterable), 1, None) + else: + # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... + # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... + # flatten(...) -> x_0, x_1, e, x_2, x_3... + filler = repeat([e]) + chunks = chunked(iterable, n) + return flatten(islice(interleave(filler, chunks), 1, None)) + + +def unique_to_each(*iterables): + """Return the elements from each of the input iterables that aren't in the + other input iterables. + + For example, suppose you have a set of packages, each with a set of + dependencies:: + + {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} + + If you remove one package, which dependencies can also be removed? + + If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not + associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for + ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: + + >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) + [['A'], ['C'], ['D']] + + If there are duplicates in one input iterable that aren't in the others + they will be duplicated in the output. Input order is preserved:: + + >>> unique_to_each("mississippi", "missouri") + [['p', 'p'], ['o', 'u', 'r']] + + It is assumed that the elements of each iterable are hashable. + + """ + pool = [list(it) for it in iterables] + counts = Counter(chain.from_iterable(map(set, pool))) + uniques = {element for element in counts if counts[element] == 1} + return [list(filter(uniques.__contains__, it)) for it in pool] + + +def windowed(seq, n, fillvalue=None, step=1): + """Return a sliding window of width *n* over the given iterable. + + >>> all_windows = windowed([1, 2, 3, 4, 5], 3) + >>> list(all_windows) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + When the window is larger than the iterable, *fillvalue* is used in place + of missing values: + + >>> list(windowed([1, 2, 3], 4)) + [(1, 2, 3, None)] + + Each window will advance in increments of *step*: + + >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) + [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] + """ + if n < 0: + raise ValueError('n must be >= 0') + if n == 0: + yield tuple() + return + if step < 1: + raise ValueError('step must be >= 1') + + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step + yield tuple(window) + + size = len(window) + if size == 0: + return + elif size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i + yield tuple(window) + + +def substrings(iterable): + """Yield all of the substrings of *iterable*. + + >>> [''.join(s) for s in substrings('more')] + ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] + + Note that non-string iterables can also be subdivided. + + >>> list(substrings([0, 1, 2])) + [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] + + """ + # The length-1 substrings + seq = [] + for item in iter(iterable): + seq.append(item) + yield (item,) + seq = tuple(seq) + item_count = len(seq) + + # And the rest + for n in range(2, item_count + 1): + for i in range(item_count - n + 1): + yield seq[i : i + n] + + +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: + """Wrap *iterable* and return an object that buckets it iterable into + child iterables based on a *key* function. + + >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] + >>> a_iterable = s['a'] + >>> next(a_iterable) + 'a1' + >>> next(a_iterable) + 'a2' + >>> list(s['b']) + ['b1', 'b2', 'b3'] + + The original iterable will be advanced and its items will be cached until + they are used by the child iterables. This may require significant storage. + + By default, attempting to select a bucket to which no items belong will + exhaust the iterable and cache all values. + If you specify a *validator* function, selected buckets will instead be + checked against it. + + >>> from itertools import count + >>> it = count(1, 2) # Infinite sequence of odd numbers + >>> key = lambda x: x % 10 # Bucket by last digit + >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only + >>> s = bucket(it, key=key, validator=validator) + >>> 2 in s + False + >>> list(s[2]) + [] + + """ + + def __init__(self, iterable, key, validator=None): + self._it = iter(iterable) + self._key = key + self._cache = defaultdict(deque) + self._validator = validator or (lambda x: True) + + def __contains__(self, value): + if not self._validator(value): + return False + + try: + item = next(self[value]) + except StopIteration: + return False + else: + self._cache[value].appendleft(item) + + return True + + def _get_values(self, value): + """ + Helper to yield items from the parent iterator that match *value*. + Items that don't match are stored in the local cache as they + are encountered. + """ + while True: + # If we've cached some items that match the target value, emit + # the first one and evict it from the cache. + if self._cache[value]: + yield self._cache[value].popleft() + # Otherwise we need to advance the parent iterator to search for + # a matching item, caching the rest. + else: + while True: + try: + item = next(self._it) + except StopIteration: + return + item_value = self._key(item) + if item_value == value: + yield item + break + elif self._validator(item_value): + self._cache[item_value].append(item) + + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + + def __getitem__(self, value): + if not self._validator(value): + return iter(()) + + return self._get_values(value) + + +def spy(iterable, n=1): + """Return a 2-tuple with a list containing the first *n* elements of + *iterable*, and an iterator with the same items as *iterable*. + This allows you to "look ahead" at the items in the iterable without + advancing it. + + There is one item in the list by default: + + >>> iterable = 'abcdefg' + >>> head, iterable = spy(iterable) + >>> head + ['a'] + >>> list(iterable) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + + You may use unpacking to retrieve items instead of lists: + + >>> (head,), iterable = spy('abcdefg') + >>> head + 'a' + >>> (first, second), iterable = spy('abcdefg', 2) + >>> first + 'a' + >>> second + 'b' + + The number of items requested can be larger than the number of items in + the iterable: + + >>> iterable = [1, 2, 3, 4, 5] + >>> head, iterable = spy(iterable, 10) + >>> head + [1, 2, 3, 4, 5] + >>> list(iterable) + [1, 2, 3, 4, 5] + + """ + it = iter(iterable) + head = take(n, it) + + return head.copy(), chain(head, it) + + +def interleave(*iterables): + """Return a new iterable yielding from each iterable in turn, + until the shortest is exhausted. + + >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7] + + For a version that doesn't terminate after the shortest iterable is + exhausted, see :func:`interleave_longest`. + + """ + return chain.from_iterable(zip(*iterables)) + + +def interleave_longest(*iterables): + """Return a new iterable yielding from each iterable in turn, + skipping any that are exhausted. + + >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7, 3, 8] + + This function produces the same output as :func:`roundrobin`, but may + perform better for some inputs (in particular when the number of iterables + is large). + + """ + i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) + return (x for x in i if x is not _marker) + + +def interleave_evenly(iterables, lengths=None): + """ + Interleave multiple iterables so that their elements are evenly distributed + throughout the output sequence. + + >>> iterables = [1, 2, 3, 4, 5], ['a', 'b'] + >>> list(interleave_evenly(iterables)) + [1, 2, 'a', 3, 4, 'b', 5] + + >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]] + >>> list(interleave_evenly(iterables)) + [1, 6, 4, 2, 7, 3, 8, 5] + + This function requires iterables of known length. Iterables without + ``__len__()`` can be used by manually specifying lengths with *lengths*: + + >>> from itertools import combinations, repeat + >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']] + >>> lengths = [4 * (4 - 1) // 2, 3] + >>> list(interleave_evenly(iterables, lengths=lengths)) + [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c'] + + Based on Bresenham's algorithm. + """ + if lengths is None: + try: + lengths = [len(it) for it in iterables] + except TypeError: + raise ValueError( + 'Iterable lengths could not be determined automatically. ' + 'Specify them with the lengths keyword.' + ) + elif len(iterables) != len(lengths): + raise ValueError('Mismatching number of iterables and lengths.') + + dims = len(lengths) + + # sort iterables by length, descending + lengths_permute = sorted( + range(dims), key=lambda i: lengths[i], reverse=True + ) + lengths_desc = [lengths[i] for i in lengths_permute] + iters_desc = [iter(iterables[i]) for i in lengths_permute] + + # the longest iterable is the primary one (Bresenham: the longest + # distance along an axis) + delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:] + iter_primary, iters_secondary = iters_desc[0], iters_desc[1:] + errors = [delta_primary // dims] * len(deltas_secondary) + + to_yield = sum(lengths) + while to_yield: + yield next(iter_primary) + to_yield -= 1 + # update errors for each secondary iterable + errors = [e - delta for e, delta in zip(errors, deltas_secondary)] + + # those iterables for which the error is negative are yielded + # ("diagonal step" in Bresenham) + for i, e in enumerate(errors): + if e < 0: + yield next(iters_secondary[i]) + to_yield -= 1 + errors[i] += delta_primary + + +def collapse(iterable, base_type=None, levels=None): + """Flatten an iterable with multiple levels of nesting (e.g., a list of + lists of tuples) into non-iterable types. + + >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] + >>> list(collapse(iterable)) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and + will not be collapsed. + + To avoid collapsing other types, specify *base_type*: + + >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] + >>> list(collapse(iterable, base_type=tuple)) + ['ab', ('cd', 'ef'), 'gh', 'ij'] + + Specify *levels* to stop flattening after a certain level: + + >>> iterable = [('a', ['b']), ('c', ['d'])] + >>> list(collapse(iterable)) # Fully flattened + ['a', 'b', 'c', 'd'] + >>> list(collapse(iterable, levels=1)) # Only one level flattened + ['a', ['b'], 'c', ['d']] + + """ + + def walk(node, level): + if ( + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) + ): + yield node + return + + try: + tree = iter(node) + except TypeError: + yield node + return + else: + for child in tree: + yield from walk(child, level + 1) + + yield from walk(iterable, 0) + + +def side_effect(func, iterable, chunk_size=None, before=None, after=None): + """Invoke *func* on each item in *iterable* (or on each *chunk_size* group + of items) before yielding the item. + + `func` must be a function that takes a single argument. Its return value + will be discarded. + + *before* and *after* are optional functions that take no arguments. They + will be executed before iteration starts and after it ends, respectively. + + `side_effect` can be used for logging, updating progress bars, or anything + that is not functionally "pure." + + Emitting a status message: + + >>> from more_itertools import consume + >>> func = lambda item: print('Received {}'.format(item)) + >>> consume(side_effect(func, range(2))) + Received 0 + Received 1 + + Operating on chunks of items: + + >>> pair_sums = [] + >>> func = lambda chunk: pair_sums.append(sum(chunk)) + >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) + [0, 1, 2, 3, 4, 5] + >>> list(pair_sums) + [1, 5, 9] + + Writing to a file-like object: + + >>> from io import StringIO + >>> from more_itertools import consume + >>> f = StringIO() + >>> func = lambda x: print(x, file=f) + >>> before = lambda: print(u'HEADER', file=f) + >>> after = f.close + >>> it = [u'a', u'b', u'c'] + >>> consume(side_effect(func, it, before=before, after=after)) + >>> f.closed + True + + """ + try: + if before is not None: + before() + + if chunk_size is None: + for item in iterable: + func(item) + yield item + else: + for chunk in chunked(iterable, chunk_size): + func(chunk) + yield from chunk + finally: + if after is not None: + after() + + +def sliced(seq, n, strict=False): + """Yield slices of length *n* from the sequence *seq*. + + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] + + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: + + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. + + This function will only work for iterables that support slicing. + For non-sliceable iterables, see :func:`chunked`. + + """ + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator + + +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): + """Yield lists of items from *iterable*, where each list is delimited by + an item where callable *pred* returns ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b')) + [['a'], ['c', 'd', 'c'], ['a']] + + >>> list(split_at(range(10), lambda n: n % 2 == 1)) + [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + To include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item): + yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + else: + buf.append(item) + yield buf + + +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: + + >>> list(split_before('OneTwo', lambda s: s.isupper())) + [['O', 'n', 'e'], ['T', 'w', 'o']] + + >>> list(split_before(range(10), lambda n: n % 3 == 0)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield [item] + list(it) + return + buf = [] + maxsplit -= 1 + buf.append(item) + if buf: + yield buf + + +def split_after(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends with an + item where callable *pred* returns ``True``: + + >>> list(split_after('one1two2', lambda s: s.isdigit())) + [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] + + >>> list(split_after(range(10), lambda n: n % 3 == 0)) + [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + buf.append(item) + if pred(item) and buf: + yield buf + if maxsplit == 1: + buf = list(it) + if buf: + yield buf + return + buf = [] + maxsplit -= 1 + if buf: + yield buf + + +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + +def split_into(iterable, sizes): + """Yield a list of sequential items from *iterable* of length 'n' for each + integer 'n' in *sizes*. + + >>> list(split_into([1,2,3,4,5,6], [1,2,3])) + [[1], [2, 3], [4, 5, 6]] + + If the sum of *sizes* is smaller than the length of *iterable*, then the + remaining items of *iterable* will not be returned. + + >>> list(split_into([1,2,3,4,5,6], [2,3])) + [[1, 2], [3, 4, 5]] + + If the sum of *sizes* is larger than the length of *iterable*, fewer items + will be returned in the iteration that overruns *iterable* and further + lists will be empty: + + >>> list(split_into([1,2,3,4], [1,2,3,4])) + [[1], [2, 3], [4], []] + + When a ``None`` object is encountered in *sizes*, the returned list will + contain items up to the end of *iterable* the same way that itertools.slice + does: + + >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) + [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] + + :func:`split_into` can be useful for grouping a series of items where the + sizes of the groups are not uniform. An example would be where in a row + from a table, multiple columns represent elements of the same feature + (e.g. a point represented by x,y,z) but, the format is not the same for + all columns. + """ + # convert the iterable argument into an iterator so its contents can + # be consumed by islice in case it is a generator + it = iter(iterable) + + for size in sizes: + if size is None: + yield list(it) + return + else: + yield list(islice(it, size)) + + +def padded(iterable, fillvalue=None, n=None, next_multiple=False): + """Yield the elements from *iterable*, followed by *fillvalue*, such that + at least *n* items are emitted. + + >>> list(padded([1, 2, 3], '?', 5)) + [1, 2, 3, '?', '?'] + + If *next_multiple* is ``True``, *fillvalue* will be emitted until the + number of items emitted is a multiple of *n*:: + + >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) + [1, 2, 3, 4, None, None] + + If *n* is ``None``, *fillvalue* will be emitted indefinitely. + + """ + it = iter(iterable) + if n is None: + yield from chain(it, repeat(fillvalue)) + elif n < 1: + raise ValueError('n must be at least 1') + else: + item_count = 0 + for item in it: + yield item + item_count += 1 + + remaining = (n - item_count) % n if next_multiple else n - item_count + for _ in range(remaining): + yield fillvalue + + +def repeat_each(iterable, n=2): + """Repeat each element in *iterable* *n* times. + + >>> list(repeat_each('ABC', 3)) + ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'] + """ + return chain.from_iterable(map(repeat, iterable, repeat(n))) + + +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + +def distribute(n, iterable): + """Distribute the items from *iterable* among *n* smaller iterables. + + >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 3, 5] + >>> list(group_2) + [2, 4, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 4, 7], [2, 5], [3, 6]] + + If the length of *iterable* is smaller than *n*, then the last returned + iterables will be empty: + + >>> children = distribute(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function uses :func:`itertools.tee` and may require significant + storage. If you need the order items in the smaller iterables to match the + original iterable, see :func:`divide`. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + children = tee(iterable, n) + return [islice(it, index, None, n) for index, it in enumerate(children)] + + +def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): + """Yield tuples whose elements are offset from *iterable*. + The amount by which the `i`-th item in each tuple is offset is given by + the `i`-th item in *offsets*. + + >>> list(stagger([0, 1, 2, 3])) + [(None, 0, 1), (0, 1, 2), (1, 2, 3)] + >>> list(stagger(range(8), offsets=(0, 2, 4))) + [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] + + By default, the sequence will end when the final element of a tuple is the + last item in the iterable. To continue until the first element of a tuple + is the last item in the iterable, set *longest* to ``True``:: + + >>> list(stagger([0, 1, 2, 3], longest=True)) + [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + children = tee(iterable, len(offsets)) + + return zip_offset( + *children, offsets=offsets, longest=longest, fillvalue=fillvalue + ) + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + + return _zip_equal(*iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): + """``zip`` the input *iterables* together, but offset the `i`-th iterable + by the `i`-th item in *offsets*. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] + + This can be used as a lightweight alternative to SciPy or pandas to analyze + data sets in which some series have a lead or lag relationship. + + By default, the sequence will end when the shortest iterable is exhausted. + To continue until the longest iterable is exhausted, set *longest* to + ``True``. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + if len(iterables) != len(offsets): + raise ValueError("Number of iterables and offsets didn't match") + + staggered = [] + for it, n in zip(iterables, offsets): + if n < 0: + staggered.append(chain(repeat(fillvalue, -n), it)) + elif n > 0: + staggered.append(islice(it, n, None)) + else: + staggered.append(it) + + if longest: + return zip_longest(*staggered, fillvalue=fillvalue) + + return zip(*staggered) + + +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def longest_common_prefix(iterables): + """Yield elements of the longest common prefix amongst given *iterables*. + + >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf'])) + 'ab' + + """ + return (c[0] for c in takewhile(all_equal, zip(*iterables))) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, map(func, b, a)) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ + + # See https://sites.google.com/site/bbayles/index/decorator_factory for + # notes on how this works. + def decorator(*wrapping_args, **wrapping_kwargs): + def outer_wrapper(f): + def inner_wrapper(*args, **kwargs): + result = f(*args, **kwargs) + wrapping_args_ = list(wrapping_args) + wrapping_args_.insert(result_index, result) + return wrapping_func(*wrapping_args_, **wrapping_kwargs) + + return inner_wrapper + + return outer_wrapper + + return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + procedure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret + + +def rlocate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``, starting from the right and moving left. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 + [4, 2, 1] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item: + + >>> iterable = iter('abcb') + >>> pred = lambda x: x == 'b' + >>> list(rlocate(iterable, pred)) + [3, 1] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(rlocate(iterable, pred=pred, window_size=3)) + [9, 5, 1] + + Beware, this function won't return anything for infinite iterables. + If *iterable* is reversible, ``rlocate`` will reverse it and search from + the right. Otherwise, it will search from the left and return the results + in reverse order. + + See :func:`locate` to for other example applications. + + """ + if window_size is None: + try: + len_iter = len(iterable) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) + except TypeError: + pass + + return reversed(list(locate(iterable, pred, window_size))) + + +def replace(iterable, pred, substitutes, count=None, window_size=1): + """Yield the items from *iterable*, replacing the items for which *pred* + returns ``True`` with the items from the iterable *substitutes*. + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] + >>> pred = lambda x: x == 0 + >>> substitutes = (2, 3) + >>> list(replace(iterable, pred, substitutes)) + [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] + + If *count* is given, the number of replacements will be limited: + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] + >>> pred = lambda x: x == 0 + >>> substitutes = [None] + >>> list(replace(iterable, pred, substitutes, count=2)) + [1, 1, None, 1, 1, None, 1, 1, 0] + + Use *window_size* to control the number of items passed as arguments to + *pred*. This allows for locating and replacing subsequences. + + >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] + >>> window_size = 3 + >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred + >>> substitutes = [3, 4] # Splice in these items + >>> list(replace(iterable, pred, substitutes, window_size=window_size)) + [3, 4, 5, 3, 4, 5] + + """ + if window_size < 1: + raise ValueError('window_size must be at least 1') + + # Save the substitutes iterable, since it's used more than once + substitutes = tuple(substitutes) + + # Add padding such that the number of windows matches the length of the + # iterable + it = chain(iterable, [_marker] * (window_size - 1)) + windows = windowed(it, window_size) + + n = 0 + for w in windows: + # If the current window matches our predicate (and we haven't hit + # our maximum number of replacements), splice in the substitutes + # and then consume the following windows that overlap with this one. + # For example, if the iterable is (0, 1, 2, 3, 4...) + # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... + # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) + if pred(*w): + if (count is None) or (n < count): + n += 1 + yield from substitutes + consume(windows, window_size - 1) + continue + + # If there was no match (or we've reached the replacement limit), + # yield the first item from the window. + if w and (w[0] is not _marker): + yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +class _IChunk: + def __init__(self, iterable, n): + self._it = islice(iterable, n) + self._cache = deque() + + def fill_cache(self): + self._cache.extend(self._it) + + def __iter__(self): + return self + + def __next__(self): + try: + return next(self._it) + except StopIteration: + if self._cache: + return self._cache.popleft() + else: + raise + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = peekable(iter(iterable)) + ichunk_marker = object() + while True: + # Check to see whether we're at the end of the source iterable + item = source.peek(ichunk_marker) + if item is ichunk_marker: + return + + chunk = _IChunk(source, n) + yield chunk + + # Advance the source iterable and fill previous chunk's cache + chunk.fill_cache() + + +def iequals(*iterables): + """Return ``True`` if all given *iterables* are equal to each other, + which means that they contain the same elements in the same order. + + The function is useful for comparing iterables of different data types + or iterables that do not support equality checks. + + >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc")) + True + + >>> iequals("abc", "acb") + False + + Not to be confused with :func:`all_equals`, which checks whether all + elements of iterable are equal to each other. + + """ + return all(map(all_equal, zip_longest(*iterables, fillvalue=object()))) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def map_if(iterable, pred, func, func_else=lambda x: x): + """Evaluate each item from *iterable* using *pred*. If the result is + equivalent to ``True``, transform the item with *func* and yield it. + Otherwise, transform the item with *func_else* and yield it. + + *pred*, *func*, and *func_else* should each be functions that accept + one argument. By default, *func_else* is the identity function. + + >>> from math import sqrt + >>> iterable = list(range(-5, 5)) + >>> iterable + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig')) + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig'] + >>> list(map_if(iterable, lambda x: x >= 0, + ... lambda x: f'{sqrt(x):.2f}', lambda x: None)) + [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00'] + """ + for item in iterable: + yield func(item) if pred(item) else func_else(item) + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False, strict=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + If *strict*, tests for strict sorting, that is, returns ``False`` if equal + elements are found: + + >>> is_sorted([1, 2, 2]) + True + >>> is_sorted([1, 2, 2], strict=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = (le if reverse else ge) if strict else (lt if reverse else gt) + it = iterable if key is None else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + # Lazily import concurrent.future + self._executor = __import__( + ).futures.__import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versions below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item + + +def chunked_even(iterable, n): + """Break *iterable* into lists of approximately length *n*. + Items are distributed such the lengths of the lists differ by at most + 1 item. + + >>> iterable = [1, 2, 3, 4, 5, 6, 7] + >>> n = 3 + >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2 + [[1, 2, 3], [4, 5], [6, 7]] + >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1 + [[1, 2, 3], [4, 5, 6], [7]] + + """ + + len_method = getattr(iterable, '__len__', None) + + if len_method is None: + return _chunked_even_online(iterable, n) + else: + return _chunked_even_finite(iterable, len_method(), n) + + +def _chunked_even_online(iterable, n): + buffer = [] + maxbuf = n + (n - 2) * (n - 1) + for x in iterable: + buffer.append(x) + if len(buffer) == maxbuf: + yield buffer[:n] + buffer = buffer[n:] + yield from _chunked_even_finite(buffer, len(buffer), n) + + +def _chunked_even_finite(iterable, N, n): + if N < 1: + return + + # Lists are either size `full_size <= n` or `partial_size = full_size - 1` + q, r = divmod(N, n) + num_lists = q + (1 if r > 0 else 0) + q, r = divmod(N, num_lists) + full_size = q + (1 if r > 0 else 0) + partial_size = full_size - 1 + num_full = N - partial_size * num_lists + num_partial = num_lists - num_full + + buffer = [] + iterator = iter(iterable) + + # Yield num_full lists of full_size + for x in iterator: + buffer.append(x) + if len(buffer) == full_size: + yield buffer + buffer = [] + num_full -= 1 + if num_full <= 0: + break + + # Yield num_partial lists of partial_size + for x in iterator: + buffer.append(x) + if len(buffer) == partial_size: + yield buffer + buffer = [] + num_partial -= 1 + + +def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False): + """A version of :func:`zip` that "broadcasts" any scalar + (i.e., non-iterable) items into output tuples. + + >>> iterable_1 = [1, 2, 3] + >>> iterable_2 = ['a', 'b', 'c'] + >>> scalar = '_' + >>> list(zip_broadcast(iterable_1, iterable_2, scalar)) + [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')] + + The *scalar_types* keyword argument determines what types are considered + scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to + treat strings and byte strings as iterable: + + >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None)) + [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')] + + If the *strict* keyword argument is ``True``, then + ``UnequalIterablesError`` will be raised if any of the iterables have + different lengths. + """ + + def is_scalar(obj): + if scalar_types and isinstance(obj, scalar_types): + return True + try: + iter(obj) + except TypeError: + return True + else: + return False + + size = len(objects) + if not size: + return + + iterables, iterable_positions = [], [] + scalars, scalar_positions = [], [] + for i, obj in enumerate(objects): + if is_scalar(obj): + scalars.append(obj) + scalar_positions.append(i) + else: + iterables.append(iter(obj)) + iterable_positions.append(i) + + if len(scalars) == size: + yield tuple(objects) + return + + zipper = _zip_equal if strict else zip + for item in zipper(*iterables): + new_item = [None] * size + + for i, elem in zip(iterable_positions, item): + new_item[i] = elem + + for i, elem in zip(scalar_positions, scalars): + new_item[i] = elem + + yield tuple(new_item) + + +def unique_in_window(iterable, n, key=None): + """Yield the items from *iterable* that haven't been seen recently. + *n* is the size of the lookback window. + + >>> iterable = [0, 1, 0, 2, 3, 0] + >>> n = 3 + >>> list(unique_in_window(iterable, n)) + [0, 1, 2, 3, 0] + + The *key* function, if provided, will be used to determine uniqueness: + + >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower())) + ['a', 'b', 'c', 'd', 'a'] + + The items in *iterable* must be hashable. + + """ + if n <= 0: + raise ValueError('n must be greater than 0') + + window = deque(maxlen=n) + uniques = set() + use_key = key is not None + + for item in iterable: + k = key(item) if use_key else item + if k in uniques: + continue + + if len(uniques) == n: + uniques.discard(window[0]) + + uniques.add(k) + window.append(k) + + yield item + + +def duplicates_everseen(iterable, key=None): + """Yield duplicate elements after their first appearance. + + >>> list(duplicates_everseen('mississippi')) + ['s', 'i', 's', 's', 'i', 'p', 'i'] + >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a'] + + This function is analagous to :func:`unique_everseen` and is subject to + the same performance considerations. + + """ + seen_set = set() + seen_list = [] + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seen_set: + seen_set.add(k) + else: + yield element + except TypeError: + if k not in seen_list: + seen_list.append(k) + else: + yield element + + +def duplicates_justseen(iterable, key=None): + """Yields serially-duplicate elements after their first appearance. + + >>> list(duplicates_justseen('mississippi')) + ['s', 's', 'p'] + >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a'] + + This function is analagous to :func:`unique_justseen`. + + """ + return flatten( + map( + lambda group_tuple: islice_extended(group_tuple[1])[1:], + groupby(iterable, key), + ) + ) + + +def minmax(iterable_or_value, *others, key=None, default=_marker): + """Returns both the smallest and largest items in an iterable + or the largest of two or more arguments. + + >>> minmax([3, 1, 5]) + (1, 5) + + >>> minmax(4, 2, 6) + (2, 6) + + If a *key* function is provided, it will be used to transform the input + items for comparison. + + >>> minmax([5, 30], key=str) # '30' sorts before '5' + (30, 5) + + If a *default* value is provided, it will be returned if there are no + input items. + + >>> minmax([], default=(0, 0)) + (0, 0) + + Otherwise ``ValueError`` is raised. + + This function is based on the + `recipe `__ by + Raymond Hettinger and takes care to minimize the number of comparisons + performed. + """ + iterable = (iterable_or_value, *others) if others else iterable_or_value + + it = iter(iterable) + + try: + lo = hi = next(it) + except StopIteration as e: + if default is _marker: + raise ValueError( + '`minmax()` argument is an empty iterable. ' + 'Provide a `default` value to suppress this error.' + ) from e + return default + + # Different branches depending on the presence of key. This saves a lot + # of unimportant copies which would slow the "key=None" branch + # significantly down. + if key is None: + for x, y in zip_longest(it, it, fillvalue=lo): + if y < x: + x, y = y, x + if x < lo: + lo = x + if hi < y: + hi = y + + else: + lo_key = hi_key = key(lo) + + for x, y in zip_longest(it, it, fillvalue=lo): + x_key, y_key = key(x), key(y) + + if y_key < x_key: + x, y, x_key, y_key = y, x, y_key, x_key + if x_key < lo_key: + lo, lo_key = x, x_key + if hi_key < y_key: + hi, hi_key = y, y_key + + return lo, hi + + +def constrained_batches( + iterable, max_size, max_count=None, get_len=len, strict=True +): + """Yield batches of items from *iterable* with a combined size limited by + *max_size*. + + >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1'] + >>> list(constrained_batches(iterable, 10)) + [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')] + + If a *max_count* is supplied, the number of items per batch is also + limited: + + >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1'] + >>> list(constrained_batches(iterable, 10, max_count = 2)) + [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)] + + If a *get_len* function is supplied, use that instead of :func:`len` to + determine item size. + + If *strict* is ``True``, raise ``ValueError`` if any single item is bigger + than *max_size*. Otherwise, allow single items to exceed *max_size*. + """ + if max_size <= 0: + raise ValueError('maximum size must be greater than zero') + + batch = [] + batch_size = 0 + batch_count = 0 + for item in iterable: + item_len = get_len(item) + if strict and item_len > max_size: + raise ValueError('item size exceeds maximum size') + + reached_count = batch_count == max_count + reached_size = item_len + batch_size > max_size + if batch_count and (reached_size or reached_count): + yield tuple(batch) + batch.clear() + batch_size = 0 + batch_count = 0 + + batch.append(item) + batch_size += item_len + batch_count += 1 + + if batch: + yield tuple(batch) + + +def gray_product(*iterables): + """Like :func:`itertools.product`, but return tuples in an order such + that only one element in the generated tuple changes from one iteration + to the next. + + >>> list(gray_product('AB','CD')) + [('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')] + + This function consumes all of the input iterables before producing output. + If any of the input iterables have fewer than two items, ``ValueError`` + is raised. + + For information on the algorithm, see + `this section `__ + of Donald Knuth's *The Art of Computer Programming*. + """ + all_iterables = tuple(tuple(x) for x in iterables) + iterable_count = len(all_iterables) + for iterable in all_iterables: + if len(iterable) < 2: + raise ValueError("each iterable must have two or more items") + + # This is based on "Algorithm H" from section 7.2.1.1, page 20. + # a holds the indexes of the source iterables for the n-tuple to be yielded + # f is the array of "focus pointers" + # o is the array of "directions" + a = [0] * iterable_count + f = list(range(iterable_count + 1)) + o = [1] * iterable_count + while True: + yield tuple(all_iterables[i][a[i]] for i in range(iterable_count)) + j = f[0] + f[0] = 0 + if j == iterable_count: + break + a[j] = a[j] + o[j] + if a[j] == 0 or a[j] == len(all_iterables[j]) - 1: + o[j] = -o[j] + f[j] = f[j + 1] + f[j + 1] = j + 1 diff --git a/lib/pkg_resources/_vendor/more_itertools/more.pyi b/lib/pkg_resources/_vendor/more_itertools/more.pyi new file mode 100644 index 00000000..75c5232c --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/more.pyi @@ -0,0 +1,666 @@ +"""Stubs for more_itertools.more""" +from __future__ import annotations + +from types import TracebackType +from typing import ( + Any, + Callable, + Container, + ContextManager, + Generic, + Hashable, + Iterable, + Iterator, + overload, + Reversible, + Sequence, + Sized, + Type, + TypeVar, + type_check_only, +) +from typing_extensions import Protocol + +# Type and type variable definitions +_T = TypeVar('_T') +_T1 = TypeVar('_T1') +_T2 = TypeVar('_T2') +_U = TypeVar('_U') +_V = TypeVar('_V') +_W = TypeVar('_W') +_T_co = TypeVar('_T_co', covariant=True) +_GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[object]]) +_Raisable = BaseException | Type[BaseException] + +@type_check_only +class _SizedIterable(Protocol[_T_co], Sized, Iterable[_T_co]): ... + +@type_check_only +class _SizedReversible(Protocol[_T_co], Sized, Reversible[_T_co]): ... + +@type_check_only +class _SupportsSlicing(Protocol[_T_co]): + def __getitem__(self, __k: slice) -> _T_co: ... + +def chunked( + iterable: Iterable[_T], n: int | None, strict: bool = ... +) -> Iterator[list[_T]]: ... +@overload +def first(iterable: Iterable[_T]) -> _T: ... +@overload +def first(iterable: Iterable[_T], default: _U) -> _T | _U: ... +@overload +def last(iterable: Iterable[_T]) -> _T: ... +@overload +def last(iterable: Iterable[_T], default: _U) -> _T | _U: ... +@overload +def nth_or_last(iterable: Iterable[_T], n: int) -> _T: ... +@overload +def nth_or_last(iterable: Iterable[_T], n: int, default: _U) -> _T | _U: ... + +class peekable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> peekable[_T]: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> _T | _U: ... + def prepend(self, *items: _T) -> None: ... + def __next__(self) -> _T: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> list[_T]: ... + +def consumer(func: _GenFn) -> _GenFn: ... +def ilen(iterable: Iterable[object]) -> int: ... +def iterate(func: Callable[[_T], _T], start: _T) -> Iterator[_T]: ... +def with_iter( + context_manager: ContextManager[Iterable[_T]], +) -> Iterator[_T]: ... +def one( + iterable: Iterable[_T], + too_short: _Raisable | None = ..., + too_long: _Raisable | None = ..., +) -> _T: ... +def raise_(exception: _Raisable, *args: Any) -> None: ... +def strictly_n( + iterable: Iterable[_T], + n: int, + too_short: _GenFn | None = ..., + too_long: _GenFn | None = ..., +) -> list[_T]: ... +def distinct_permutations( + iterable: Iterable[_T], r: int | None = ... +) -> Iterator[tuple[_T, ...]]: ... +def intersperse( + e: _U, iterable: Iterable[_T], n: int = ... +) -> Iterator[_T | _U]: ... +def unique_to_each(*iterables: Iterable[_T]) -> list[list[_T]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, *, step: int = ... +) -> Iterator[tuple[_T | None, ...]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, fillvalue: _U, step: int = ... +) -> Iterator[tuple[_T | _U, ...]]: ... +def substrings(iterable: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ... +def substrings_indexes( + seq: Sequence[_T], reverse: bool = ... +) -> Iterator[tuple[Sequence[_T], int, int]]: ... + +class bucket(Generic[_T, _U], Container[_U]): + def __init__( + self, + iterable: Iterable[_T], + key: Callable[[_T], _U], + validator: Callable[[object], object] | None = ..., + ) -> None: ... + def __contains__(self, value: object) -> bool: ... + def __iter__(self) -> Iterator[_U]: ... + def __getitem__(self, value: object) -> Iterator[_T]: ... + +def spy( + iterable: Iterable[_T], n: int = ... +) -> tuple[list[_T], Iterator[_T]]: ... +def interleave(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def interleave_longest(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def interleave_evenly( + iterables: list[Iterable[_T]], lengths: list[int] | None = ... +) -> Iterator[_T]: ... +def collapse( + iterable: Iterable[Any], + base_type: type | None = ..., + levels: int | None = ..., +) -> Iterator[Any]: ... +@overload +def side_effect( + func: Callable[[_T], object], + iterable: Iterable[_T], + chunk_size: None = ..., + before: Callable[[], object] | None = ..., + after: Callable[[], object] | None = ..., +) -> Iterator[_T]: ... +@overload +def side_effect( + func: Callable[[list[_T]], object], + iterable: Iterable[_T], + chunk_size: int, + before: Callable[[], object] | None = ..., + after: Callable[[], object] | None = ..., +) -> Iterator[_T]: ... +def sliced( + seq: _SupportsSlicing[_T], n: int, strict: bool = ... +) -> Iterator[_T]: ... +def split_at( + iterable: Iterable[_T], + pred: Callable[[_T], object], + maxsplit: int = ..., + keep_separator: bool = ..., +) -> Iterator[list[_T]]: ... +def split_before( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[list[_T]]: ... +def split_after( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[list[_T]]: ... +def split_when( + iterable: Iterable[_T], + pred: Callable[[_T, _T], object], + maxsplit: int = ..., +) -> Iterator[list[_T]]: ... +def split_into( + iterable: Iterable[_T], sizes: Iterable[int | None] +) -> Iterator[list[_T]]: ... +@overload +def padded( + iterable: Iterable[_T], + *, + n: int | None = ..., + next_multiple: bool = ..., +) -> Iterator[_T | None]: ... +@overload +def padded( + iterable: Iterable[_T], + fillvalue: _U, + n: int | None = ..., + next_multiple: bool = ..., +) -> Iterator[_T | _U]: ... +@overload +def repeat_last(iterable: Iterable[_T]) -> Iterator[_T]: ... +@overload +def repeat_last(iterable: Iterable[_T], default: _U) -> Iterator[_T | _U]: ... +def distribute(n: int, iterable: Iterable[_T]) -> list[Iterator[_T]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., +) -> Iterator[tuple[_T | None, ...]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., + fillvalue: _U = ..., +) -> Iterator[tuple[_T | _U, ...]]: ... + +class UnequalIterablesError(ValueError): + def __init__(self, details: tuple[int, int, int] | None = ...) -> None: ... + +@overload +def zip_equal(__iter1: Iterable[_T1]) -> Iterator[tuple[_T1]]: ... +@overload +def zip_equal( + __iter1: Iterable[_T1], __iter2: Iterable[_T2] +) -> Iterator[tuple[_T1, _T2]]: ... +@overload +def zip_equal( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T], +) -> Iterator[tuple[_T, ...]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None, +) -> Iterator[tuple[_T1 | None]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + __iter2: Iterable[_T2], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None, +) -> Iterator[tuple[_T1 | None, _T2 | None]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T], + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None, +) -> Iterator[tuple[_T | None, ...]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[tuple[_T1 | _U]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + __iter2: Iterable[_T2], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[tuple[_T1 | _U, _T2 | _U]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T], + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[tuple[_T | _U, ...]]: ... +def sort_together( + iterables: Iterable[Iterable[_T]], + key_list: Iterable[int] = ..., + key: Callable[..., Any] | None = ..., + reverse: bool = ..., +) -> list[tuple[_T, ...]]: ... +def unzip(iterable: Iterable[Sequence[_T]]) -> tuple[Iterator[_T], ...]: ... +def divide(n: int, iterable: Iterable[_T]) -> list[Iterator[_T]]: ... +def always_iterable( + obj: object, + base_type: type | tuple[type | tuple[Any, ...], ...] | None = ..., +) -> Iterator[Any]: ... +def adjacent( + predicate: Callable[[_T], bool], + iterable: Iterable[_T], + distance: int = ..., +) -> Iterator[tuple[bool, _T]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None = None, + valuefunc: None = None, + reducefunc: None = None, +) -> Iterator[tuple[_T, Iterator[_T]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None, + reducefunc: None, +) -> Iterator[tuple[_U, Iterator[_T]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: Callable[[_T], _V], + reducefunc: None, +) -> Iterable[tuple[_T, Iterable[_V]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: None, +) -> Iterable[tuple[_U, Iterator[_V]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: None, + reducefunc: Callable[[Iterator[_T]], _W], +) -> Iterable[tuple[_T, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None, + reducefunc: Callable[[Iterator[_T]], _W], +) -> Iterable[tuple[_U, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[Iterable[_V]], _W], +) -> Iterable[tuple[_T, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[Iterable[_V]], _W], +) -> Iterable[tuple[_U, _W]]: ... + +class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]): + @overload + def __init__(self, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T, __step: _U) -> None: ... + def __bool__(self) -> bool: ... + def __contains__(self, elem: object) -> bool: ... + def __eq__(self, other: object) -> bool: ... + @overload + def __getitem__(self, key: int) -> _T: ... + @overload + def __getitem__(self, key: slice) -> numeric_range[_T, _U]: ... + def __hash__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __len__(self) -> int: ... + def __reduce__( + self, + ) -> tuple[Type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ... + def __repr__(self) -> str: ... + def __reversed__(self) -> Iterator[_T]: ... + def count(self, value: _T) -> int: ... + def index(self, value: _T) -> int: ... # type: ignore + +def count_cycle( + iterable: Iterable[_T], n: int | None = ... +) -> Iterable[tuple[int, _T]]: ... +def mark_ends( + iterable: Iterable[_T], +) -> Iterable[tuple[bool, bool, _T]]: ... +def locate( + iterable: Iterable[object], + pred: Callable[..., Any] = ..., + window_size: int | None = ..., +) -> Iterator[int]: ... +def lstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def rstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def strip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... + +class islice_extended(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T], *args: int | None) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + def __getitem__(self, index: slice) -> islice_extended[_T]: ... + +def always_reversible(iterable: Iterable[_T]) -> Iterator[_T]: ... +def consecutive_groups( + iterable: Iterable[_T], ordering: Callable[[_T], int] = ... +) -> Iterator[Iterator[_T]]: ... +@overload +def difference( + iterable: Iterable[_T], + func: Callable[[_T, _T], _U] = ..., + *, + initial: None = ..., +) -> Iterator[_T | _U]: ... +@overload +def difference( + iterable: Iterable[_T], func: Callable[[_T, _T], _U] = ..., *, initial: _U +) -> Iterator[_U]: ... + +class SequenceView(Generic[_T], Sequence[_T]): + def __init__(self, target: Sequence[_T]) -> None: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> Sequence[_T]: ... + def __len__(self) -> int: ... + +class seekable(Generic[_T], Iterator[_T]): + def __init__( + self, iterable: Iterable[_T], maxlen: int | None = ... + ) -> None: ... + def __iter__(self) -> seekable[_T]: ... + def __next__(self) -> _T: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> _T | _U: ... + def elements(self) -> SequenceView[_T]: ... + def seek(self, index: int) -> None: ... + +class run_length: + @staticmethod + def encode(iterable: Iterable[_T]) -> Iterator[tuple[_T, int]]: ... + @staticmethod + def decode(iterable: Iterable[tuple[_T, int]]) -> Iterator[_T]: ... + +def exactly_n( + iterable: Iterable[_T], n: int, predicate: Callable[[_T], object] = ... +) -> bool: ... +def circular_shifts(iterable: Iterable[_T]) -> list[tuple[_T, ...]]: ... +def make_decorator( + wrapping_func: Callable[..., _U], result_index: int = ... +) -> Callable[..., Callable[[Callable[..., Any]], Callable[..., _U]]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: None = ..., +) -> dict[_U, list[_T]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: None = ..., +) -> dict[_U, list[_V]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: Callable[[list[_T]], _W] = ..., +) -> dict[_U, _W]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[list[_V]], _W], +) -> dict[_U, _W]: ... +def rlocate( + iterable: Iterable[_T], + pred: Callable[..., object] = ..., + window_size: int | None = ..., +) -> Iterator[int]: ... +def replace( + iterable: Iterable[_T], + pred: Callable[..., object], + substitutes: Iterable[_U], + count: int | None = ..., + window_size: int = ..., +) -> Iterator[_T | _U]: ... +def partitions(iterable: Iterable[_T]) -> Iterator[list[list[_T]]]: ... +def set_partitions( + iterable: Iterable[_T], k: int | None = ... +) -> Iterator[list[list[_T]]]: ... + +class time_limited(Generic[_T], Iterator[_T]): + def __init__( + self, limit_seconds: float, iterable: Iterable[_T] + ) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + +@overload +def only( + iterable: Iterable[_T], *, too_long: _Raisable | None = ... +) -> _T | None: ... +@overload +def only( + iterable: Iterable[_T], default: _U, too_long: _Raisable | None = ... +) -> _T | _U: ... +def ichunked(iterable: Iterable[_T], n: int) -> Iterator[Iterator[_T]]: ... +def distinct_combinations( + iterable: Iterable[_T], r: int +) -> Iterator[tuple[_T, ...]]: ... +def filter_except( + validator: Callable[[Any], object], + iterable: Iterable[_T], + *exceptions: Type[BaseException], +) -> Iterator[_T]: ... +def map_except( + function: Callable[[Any], _U], + iterable: Iterable[_T], + *exceptions: Type[BaseException], +) -> Iterator[_U]: ... +def map_if( + iterable: Iterable[Any], + pred: Callable[[Any], bool], + func: Callable[[Any], Any], + func_else: Callable[[Any], Any] | None = ..., +) -> Iterator[Any]: ... +def sample( + iterable: Iterable[_T], + k: int, + weights: Iterable[float] | None = ..., +) -> list[_T]: ... +def is_sorted( + iterable: Iterable[_T], + key: Callable[[_T], _U] | None = ..., + reverse: bool = False, + strict: bool = False, +) -> bool: ... + +class AbortThread(BaseException): + pass + +class callback_iter(Generic[_T], Iterator[_T]): + def __init__( + self, + func: Callable[..., Any], + callback_kwd: str = ..., + wait_seconds: float = ..., + ) -> None: ... + def __enter__(self) -> callback_iter[_T]: ... + def __exit__( + self, + exc_type: Type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: ... + def __iter__(self) -> callback_iter[_T]: ... + def __next__(self) -> _T: ... + def _reader(self) -> Iterator[_T]: ... + @property + def done(self) -> bool: ... + @property + def result(self) -> Any: ... + +def windowed_complete( + iterable: Iterable[_T], n: int +) -> Iterator[tuple[_T, ...]]: ... +def all_unique( + iterable: Iterable[_T], key: Callable[[_T], _U] | None = ... +) -> bool: ... +def nth_product(index: int, *args: Iterable[_T]) -> tuple[_T, ...]: ... +def nth_permutation( + iterable: Iterable[_T], r: int, index: int +) -> tuple[_T, ...]: ... +def value_chain(*args: _T | Iterable[_T]) -> Iterable[_T]: ... +def product_index(element: Iterable[_T], *args: Iterable[_T]) -> int: ... +def combination_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... +def permutation_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... +def repeat_each(iterable: Iterable[_T], n: int = ...) -> Iterator[_T]: ... + +class countable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> countable[_T]: ... + def __next__(self) -> _T: ... + +def chunked_even(iterable: Iterable[_T], n: int) -> Iterator[list[_T]]: ... +def zip_broadcast( + *objects: _T | Iterable[_T], + scalar_types: type | tuple[type | tuple[Any, ...], ...] | None = ..., + strict: bool = ..., +) -> Iterable[tuple[_T, ...]]: ... +def unique_in_window( + iterable: Iterable[_T], n: int, key: Callable[[_T], _U] | None = ... +) -> Iterator[_T]: ... +def duplicates_everseen( + iterable: Iterable[_T], key: Callable[[_T], _U] | None = ... +) -> Iterator[_T]: ... +def duplicates_justseen( + iterable: Iterable[_T], key: Callable[[_T], _U] | None = ... +) -> Iterator[_T]: ... + +class _SupportsLessThan(Protocol): + def __lt__(self, __other: Any) -> bool: ... + +_SupportsLessThanT = TypeVar("_SupportsLessThanT", bound=_SupportsLessThan) + +@overload +def minmax( + iterable_or_value: Iterable[_SupportsLessThanT], *, key: None = None +) -> tuple[_SupportsLessThanT, _SupportsLessThanT]: ... +@overload +def minmax( + iterable_or_value: Iterable[_T], *, key: Callable[[_T], _SupportsLessThan] +) -> tuple[_T, _T]: ... +@overload +def minmax( + iterable_or_value: Iterable[_SupportsLessThanT], + *, + key: None = None, + default: _U, +) -> _U | tuple[_SupportsLessThanT, _SupportsLessThanT]: ... +@overload +def minmax( + iterable_or_value: Iterable[_T], + *, + key: Callable[[_T], _SupportsLessThan], + default: _U, +) -> _U | tuple[_T, _T]: ... +@overload +def minmax( + iterable_or_value: _SupportsLessThanT, + __other: _SupportsLessThanT, + *others: _SupportsLessThanT, +) -> tuple[_SupportsLessThanT, _SupportsLessThanT]: ... +@overload +def minmax( + iterable_or_value: _T, + __other: _T, + *others: _T, + key: Callable[[_T], _SupportsLessThan], +) -> tuple[_T, _T]: ... +def longest_common_prefix( + iterables: Iterable[Iterable[_T]], +) -> Iterator[_T]: ... +def iequals(*iterables: Iterable[object]) -> bool: ... +def constrained_batches( + iterable: Iterable[object], + max_size: int, + max_count: int | None = ..., + get_len: Callable[[_T], object] = ..., + strict: bool = ..., +) -> Iterator[tuple[_T]]: ... +def gray_product(*iterables: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ... diff --git a/lib/pkg_resources/_vendor/more_itertools/py.typed b/lib/pkg_resources/_vendor/more_itertools/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/more_itertools/recipes.py b/lib/pkg_resources/_vendor/more_itertools/recipes.py new file mode 100644 index 00000000..3facc2e3 --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/recipes.py @@ -0,0 +1,930 @@ +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" +import math +import operator +import warnings + +from collections import deque +from collections.abc import Sized +from functools import reduce +from itertools import ( + chain, + combinations, + compress, + count, + cycle, + groupby, + islice, + product, + repeat, + starmap, + tee, + zip_longest, +) +from random import randrange, sample, choice +from sys import hexversion + +__all__ = [ + 'all_equal', + 'batched', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'factor', + 'flatten', + 'grouper', + 'iter_except', + 'iter_index', + 'matmul', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'polynomial_from_roots', + 'powerset', + 'prepend', + 'quantify', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sieve', + 'sliding_window', + 'subslices', + 'tabulate', + 'tail', + 'take', + 'transpose', + 'triplewise', + 'unique_everseen', + 'unique_justseen', +] + +_marker = object() + + +def take(n, iterable): + """Return first *n* items of the iterable as a list. + + >>> take(3, range(10)) + [0, 1, 2] + + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] + + """ + return list(islice(iterable, n)) + + +def tabulate(function, start=0): + """Return an iterator over the results of ``func(start)``, + ``func(start + 1)``, ``func(start + 2)``... + + *func* should be a function that accepts one integer argument. + + If *start* is not specified it defaults to 0. It will be incremented each + time the iterator is advanced. + + >>> square = lambda x: x ** 2 + >>> iterator = tabulate(square, -3) + >>> take(4, iterator) + [9, 4, 1, 0] + + """ + return map(function, count(start)) + + +def tail(n, iterable): + """Return an iterator over the last *n* items of *iterable*. + + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] + + """ + # If the given iterable has a length, then we can use islice to get its + # final elements. Note that if the iterable is not actually Iterable, + # either islice or deque will throw a TypeError. This is why we don't + # check if it is Iterable. + if isinstance(iterable, Sized): + yield from islice(iterable, max(0, len(iterable) - n), None) + else: + yield from iter(deque(iterable, maxlen=n)) + + +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def nth(iterable, n, default=None): + """Returns the nth item or a default value. + + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' + + """ + return next(islice(iterable, n, None), default) + + +def all_equal(iterable): + """ + Returns ``True`` if all the elements are equal to each other. + + >>> all_equal('aaaa') + True + >>> all_equal('aaab') + False + + """ + g = groupby(iterable) + return next(g, True) and not next(g, False) + + +def quantify(iterable, pred=bool): + """Return the how many times the predicate is true. + + >>> quantify([True, False, True]) + 2 + + """ + return sum(map(pred, iterable)) + + +def pad_none(iterable): + """Returns the sequence of elements and then returns ``None`` indefinitely. + + >>> take(5, pad_none(range(3))) + [0, 1, 2, None, None] + + Useful for emulating the behavior of the built-in :func:`map` function. + + See also :func:`padded`. + + """ + return chain(iterable, repeat(None)) + + +padnone = pad_none + + +def ncycles(iterable, n): + """Returns the sequence elements *n* times + + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] + + """ + return chain.from_iterable(repeat(tuple(iterable), n)) + + +def dotproduct(vec1, vec2): + """Returns the dot product of the two iterables. + + >>> dotproduct([10, 10], [20, 20]) + 400 + + """ + return sum(map(operator.mul, vec1, vec2)) + + +def flatten(listOfLists): + """Return an iterator flattening one level of nesting in a list of lists. + + >>> list(flatten([[0, 1], [2, 3]])) + [0, 1, 2, 3] + + See also :func:`collapse`, which can flatten multiple levels of nesting. + + """ + return chain.from_iterable(listOfLists) + + +def repeatfunc(func, times=None, *args): + """Call *func* with *args* repeatedly, returning an iterable over the + results. + + If *times* is specified, the iterable will terminate after that many + repetitions: + + >>> from operator import add + >>> times = 4 + >>> args = 3, 5 + >>> list(repeatfunc(add, times, *args)) + [8, 8, 8, 8] + + If *times* is ``None`` the iterable will not terminate: + + >>> from random import randrange + >>> times = None + >>> args = 1, 11 + >>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP + [2, 4, 8, 1, 8, 4] + + """ + if times is None: + return starmap(func, repeat(args)) + return starmap(func, repeat(args, times)) + + +def _pairwise(iterable): + """Returns an iterator of paired items, overlapping, from the original + + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. + + """ + a, b = tee(iterable) + next(b, None) + yield from zip(a, b) + + +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def _zip_equal(*iterables): + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def grouper(iterable, n, incomplete='fill', fillvalue=None): + """Group elements from *iterable* into fixed-length groups of length *n*. + + >>> list(grouper('ABCDEF', 3)) + [('A', 'B', 'C'), ('D', 'E', 'F')] + + The keyword arguments *incomplete* and *fillvalue* control what happens for + iterables whose length is not a multiple of *n*. + + When *incomplete* is `'fill'`, the last group will contain instances of + *fillvalue*. + + >>> list(grouper('ABCDEFG', 3, incomplete='fill', fillvalue='x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + + When *incomplete* is `'ignore'`, the last group will not be emitted. + + >>> list(grouper('ABCDEFG', 3, incomplete='ignore', fillvalue='x')) + [('A', 'B', 'C'), ('D', 'E', 'F')] + + When *incomplete* is `'strict'`, a subclass of `ValueError` will be raised. + + >>> it = grouper('ABCDEFG', 3, incomplete='strict') + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + UnequalIterablesError + + """ + args = [iter(iterable)] * n + if incomplete == 'fill': + return zip_longest(*args, fillvalue=fillvalue) + if incomplete == 'strict': + return _zip_equal(*args) + if incomplete == 'ignore': + return zip(*args) + else: + raise ValueError('Expected fill, strict, or ignore') + + +def roundrobin(*iterables): + """Yields an item from each iterable, alternating between them. + + >>> list(roundrobin('ABC', 'D', 'EF')) + ['A', 'D', 'E', 'B', 'F', 'C'] + + This function produces the same output as :func:`interleave_longest`, but + may perform better for some inputs (in particular when the number of + iterables is small). + + """ + # Recipe credited to George Sakkis + pending = len(iterables) + nexts = cycle(iter(it).__next__ for it in iterables) + while pending: + try: + for next in nexts: + yield next() + except StopIteration: + pending -= 1 + nexts = cycle(islice(nexts, pending)) + + +def partition(pred, iterable): + """ + Returns a 2-tuple of iterables derived from the input iterable. + The first yields the items that have ``pred(item) == False``. + The second yields the items that have ``pred(item) == True``. + + >>> is_odd = lambda x: x % 2 != 0 + >>> iterable = range(10) + >>> even_items, odd_items = partition(is_odd, iterable) + >>> list(even_items), list(odd_items) + ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + + """ + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) + + +def powerset(iterable): + """Yields all possible subsets of the iterable. + + >>> list(powerset([1, 2, 3])) + [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] + + :func:`powerset` will operate on iterables that aren't :class:`set` + instances, so repeated elements in the input will produce repeated elements + in the output. Use :func:`unique_everseen` on the input to avoid generating + duplicates: + + >>> seq = [1, 1, 0] + >>> list(powerset(seq)) + [(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)] + >>> from more_itertools import unique_everseen + >>> list(powerset(unique_everseen(seq))) + [(), (1,), (0,), (1, 0)] + + """ + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def unique_everseen(iterable, key=None): + """ + Yield unique elements, preserving order. + + >>> list(unique_everseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D'] + >>> list(unique_everseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'D'] + + Sequences with a mix of hashable and unhashable items can be used. + The function will be slower (i.e., `O(n^2)`) for unhashable items. + + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element + + +def unique_justseen(iterable, key=None): + """Yields elements in order, ignoring serial duplicates + + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] + + """ + return map(next, map(operator.itemgetter(1), groupby(iterable, key))) + + +def iter_except(func, exception, first=None): + """Yields results from a function repeatedly until an exception is raised. + + Converts a call-until-exception interface to an iterator interface. + Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel + to end the loop. + + >>> l = [0, 1, 2] + >>> list(iter_except(l.pop, IndexError)) + [2, 1, 0] + + Multiple exceptions can be specified as a stopping condition: + + >>> l = [1, 2, 3, '...', 4, 5, 6] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [7, 6, 5] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [4, 3, 2] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [] + + """ + try: + if first is not None: + yield first() + while 1: + yield func() + except exception: + pass + + +def first_true(iterable, default=None, pred=None): + """ + Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) + + +def random_product(*args, repeat=1): + """Draw an item at random from each of the input iterables. + + >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP + ('c', 3, 'Z') + + If *repeat* is provided as a keyword argument, that many items will be + drawn from each iterable. + + >>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP + ('a', 2, 'd', 3) + + This equivalent to taking a random selection from + ``itertools.product(*args, **kwarg)``. + + """ + pools = [tuple(pool) for pool in args] * repeat + return tuple(choice(pool) for pool in pools) + + +def random_permutation(iterable, r=None): + """Return a random *r* length permutation of the elements in *iterable*. + + If *r* is not specified or is ``None``, then *r* defaults to the length of + *iterable*. + + >>> random_permutation(range(5)) # doctest:+SKIP + (3, 4, 0, 1, 2) + + This equivalent to taking a random selection from + ``itertools.permutations(iterable, r)``. + + """ + pool = tuple(iterable) + r = len(pool) if r is None else r + return tuple(sample(pool, r)) + + +def random_combination(iterable, r): + """Return a random *r* length subsequence of the elements in *iterable*. + + >>> random_combination(range(5), 3) # doctest:+SKIP + (2, 3, 4) + + This equivalent to taking a random selection from + ``itertools.combinations(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(sample(range(n), r)) + return tuple(pool[i] for i in indices) + + +def random_combination_with_replacement(iterable, r): + """Return a random *r* length subsequence of elements in *iterable*, + allowing individual elements to be repeated. + + >>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP + (0, 0, 1, 2, 2) + + This equivalent to taking a random selection from + ``itertools.combinations_with_replacement(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(randrange(n) for i in range(r)) + return tuple(pool[i] for i in indices) + + +def nth_combination(iterable, r, index): + """Equivalent to ``list(combinations(iterable, r))[index]``. + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`nth_combination` computes the subsequence at + sort position *index* directly, without computing the previous + subsequences. + + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = tuple(iterable) + n = len(pool) + if (r < 0) or (r > n): + raise ValueError + + c = 1 + k = min(r, n - r) + for i in range(1, k + 1): + c = c * (n - k + i) // i + + if index < 0: + index += c + + if (index < 0) or (index >= c): + raise IndexError + + result = [] + while r: + c, n, r = c * r // n, n - 1, r - 1 + while index >= c: + index -= c + c, n = c * (n - r) // n, n - 1 + result.append(pool[-1 - n]) + + return tuple(result) + + +def prepend(value, iterator): + """Yield *value*, followed by the elements in *iterator*. + + >>> value = '0' + >>> iterator = ['1', '2', '3'] + >>> list(prepend(value, iterator)) + ['0', '1', '2', '3'] + + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. + + """ + return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) + + +def before_and_after(predicate, it): + """A variant of :func:`takewhile` that allows complete access to the + remainder of the iterator. + + >>> it = iter('ABCdEfGhI') + >>> all_upper, remainder = before_and_after(str.isupper, it) + >>> ''.join(all_upper) + 'ABC' + >>> ''.join(remainder) # takewhile() would lose the 'd' + 'dEfGhI' + + Note that the first iterator must be fully consumed before the second + iterator can generate valid results. + """ + it = iter(it) + transition = [] + + def true_iterator(): + for elem in it: + if predicate(elem): + yield elem + else: + transition.append(elem) + return + + # Note: this is different from itertools recipes to allow nesting + # before_and_after remainders into before_and_after again. See tests + # for an example. + remainder_iterator = chain(transition, it) + + return true_iterator(), remainder_iterator + + +def triplewise(iterable): + """Return overlapping triplets from *iterable*. + + >>> list(triplewise('ABCDE')) + [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E')] + + """ + for (a, _), (b, c) in pairwise(pairwise(iterable)): + yield a, b, c + + +def sliding_window(iterable, n): + """Return a sliding window of width *n* over *iterable*. + + >>> list(sliding_window(range(6), 4)) + [(0, 1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5)] + + If *iterable* has fewer than *n* items, then nothing is yielded: + + >>> list(sliding_window(range(3), 4)) + [] + + For a variant with more features, see :func:`windowed`. + """ + it = iter(iterable) + window = deque(islice(it, n), maxlen=n) + if len(window) == n: + yield tuple(window) + for x in it: + window.append(x) + yield tuple(window) + + +def subslices(iterable): + """Return all contiguous non-empty subslices of *iterable*. + + >>> list(subslices('ABC')) + [['A'], ['A', 'B'], ['A', 'B', 'C'], ['B'], ['B', 'C'], ['C']] + + This is similar to :func:`substrings`, but emits items in a different + order. + """ + seq = list(iterable) + slices = starmap(slice, combinations(range(len(seq) + 1), 2)) + return map(operator.getitem, repeat(seq), slices) + + +def polynomial_from_roots(roots): + """Compute a polynomial's coefficients from its roots. + + >>> roots = [5, -4, 3] # (x - 5) * (x + 4) * (x - 3) + >>> polynomial_from_roots(roots) # x^3 - 4 * x^2 - 17 * x + 60 + [1, -4, -17, 60] + """ + # Use math.prod for Python 3.8+, + prod = getattr(math, 'prod', lambda x: reduce(operator.mul, x, 1)) + roots = list(map(operator.neg, roots)) + return [ + sum(map(prod, combinations(roots, k))) for k in range(len(roots) + 1) + ] + + +def iter_index(iterable, value, start=0): + """Yield the index of each place in *iterable* that *value* occurs, + beginning with index *start*. + + See :func:`locate` for a more general means of finding the indexes + associated with particular values. + + >>> list(iter_index('AABCADEAF', 'A')) + [0, 1, 4, 7] + """ + try: + seq_index = iterable.index + except AttributeError: + # Slow path for general iterables + it = islice(iterable, start, None) + for i, element in enumerate(it, start): + if element is value or element == value: + yield i + else: + # Fast path for sequences + i = start - 1 + try: + while True: + i = seq_index(value, i + 1) + yield i + except ValueError: + pass + + +def sieve(n): + """Yield the primes less than n. + + >>> list(sieve(30)) + [2, 3, 5, 7, 11, 13, 17, 19, 23, 29] + """ + isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x))) + data = bytearray((0, 1)) * (n // 2) + data[:3] = 0, 0, 0 + limit = isqrt(n) + 1 + for p in compress(range(limit), data): + data[p * p : n : p + p] = bytes(len(range(p * p, n, p + p))) + data[2] = 1 + return iter_index(data, 1) if n > 2 else iter([]) + + +def batched(iterable, n): + """Batch data into lists of length *n*. The last batch may be shorter. + + >>> list(batched('ABCDEFG', 3)) + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + + This recipe is from the ``itertools`` docs. This library also provides + :func:`chunked`, which has a different implementation. + """ + if hexversion >= 0x30C00A0: # Python 3.12.0a0 + warnings.warn( + ( + 'batched will be removed in a future version of ' + 'more-itertools. Use the standard library ' + 'itertools.batched function instead' + ), + DeprecationWarning, + ) + + it = iter(iterable) + while True: + batch = list(islice(it, n)) + if not batch: + break + yield batch + + +def transpose(it): + """Swap the rows and columns of the input. + + >>> list(transpose([(1, 2, 3), (11, 22, 33)])) + [(1, 11), (2, 22), (3, 33)] + + The caller should ensure that the dimensions of the input are compatible. + """ + # TODO: when 3.9 goes end-of-life, add stric=True to this. + return zip(*it) + + +def matmul(m1, m2): + """Multiply two matrices. + >>> list(matmul([(7, 5), (3, 5)], [(2, 5), (7, 9)])) + [[49, 80], [41, 60]] + + The caller should ensure that the dimensions of the input matrices are + compatible with each other. + """ + n = len(m2[0]) + return batched(starmap(dotproduct, product(m1, transpose(m2))), n) + + +def factor(n): + """Yield the prime factors of n. + >>> list(factor(360)) + [2, 2, 2, 3, 3, 5] + """ + isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x))) + for prime in sieve(isqrt(n) + 1): + while True: + quotient, remainder = divmod(n, prime) + if remainder: + break + yield prime + n = quotient + if n == 1: + return + if n >= 2: + yield n diff --git a/lib/pkg_resources/_vendor/more_itertools/recipes.pyi b/lib/pkg_resources/_vendor/more_itertools/recipes.pyi new file mode 100644 index 00000000..0267ed56 --- /dev/null +++ b/lib/pkg_resources/_vendor/more_itertools/recipes.pyi @@ -0,0 +1,119 @@ +"""Stubs for more_itertools.recipes""" +from __future__ import annotations + +from typing import ( + Any, + Callable, + Iterable, + Iterator, + overload, + Sequence, + Type, + TypeVar, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + +def take(n: int, iterable: Iterable[_T]) -> list[_T]: ... +def tabulate( + function: Callable[[int], _T], start: int = ... +) -> Iterator[_T]: ... +def tail(n: int, iterable: Iterable[_T]) -> Iterator[_T]: ... +def consume(iterator: Iterable[object], n: int | None = ...) -> None: ... +@overload +def nth(iterable: Iterable[_T], n: int) -> _T | None: ... +@overload +def nth(iterable: Iterable[_T], n: int, default: _U) -> _T | _U: ... +def all_equal(iterable: Iterable[object]) -> bool: ... +def quantify( + iterable: Iterable[_T], pred: Callable[[_T], bool] = ... +) -> int: ... +def pad_none(iterable: Iterable[_T]) -> Iterator[_T | None]: ... +def padnone(iterable: Iterable[_T]) -> Iterator[_T | None]: ... +def ncycles(iterable: Iterable[_T], n: int) -> Iterator[_T]: ... +def dotproduct(vec1: Iterable[object], vec2: Iterable[object]) -> object: ... +def flatten(listOfLists: Iterable[Iterable[_T]]) -> Iterator[_T]: ... +def repeatfunc( + func: Callable[..., _U], times: int | None = ..., *args: Any +) -> Iterator[_U]: ... +def pairwise(iterable: Iterable[_T]) -> Iterator[tuple[_T, _T]]: ... +def grouper( + iterable: Iterable[_T], + n: int, + incomplete: str = ..., + fillvalue: _U = ..., +) -> Iterator[tuple[_T | _U, ...]]: ... +def roundrobin(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def partition( + pred: Callable[[_T], object] | None, iterable: Iterable[_T] +) -> tuple[Iterator[_T], Iterator[_T]]: ... +def powerset(iterable: Iterable[_T]) -> Iterator[tuple[_T, ...]]: ... +def unique_everseen( + iterable: Iterable[_T], key: Callable[[_T], _U] | None = ... +) -> Iterator[_T]: ... +def unique_justseen( + iterable: Iterable[_T], key: Callable[[_T], object] | None = ... +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], + exception: Type[BaseException] | tuple[Type[BaseException], ...], + first: None = ..., +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], + exception: Type[BaseException] | tuple[Type[BaseException], ...], + first: Callable[[], _U], +) -> Iterator[_T | _U]: ... +@overload +def first_true( + iterable: Iterable[_T], *, pred: Callable[[_T], object] | None = ... +) -> _T | None: ... +@overload +def first_true( + iterable: Iterable[_T], + default: _U, + pred: Callable[[_T], object] | None = ..., +) -> _T | _U: ... +def random_product( + *args: Iterable[_T], repeat: int = ... +) -> tuple[_T, ...]: ... +def random_permutation( + iterable: Iterable[_T], r: int | None = ... +) -> tuple[_T, ...]: ... +def random_combination(iterable: Iterable[_T], r: int) -> tuple[_T, ...]: ... +def random_combination_with_replacement( + iterable: Iterable[_T], r: int +) -> tuple[_T, ...]: ... +def nth_combination( + iterable: Iterable[_T], r: int, index: int +) -> tuple[_T, ...]: ... +def prepend(value: _T, iterator: Iterable[_U]) -> Iterator[_T | _U]: ... +def convolve(signal: Iterable[_T], kernel: Iterable[_T]) -> Iterator[_T]: ... +def before_and_after( + predicate: Callable[[_T], bool], it: Iterable[_T] +) -> tuple[Iterator[_T], Iterator[_T]]: ... +def triplewise(iterable: Iterable[_T]) -> Iterator[tuple[_T, _T, _T]]: ... +def sliding_window( + iterable: Iterable[_T], n: int +) -> Iterator[tuple[_T, ...]]: ... +def subslices(iterable: Iterable[_T]) -> Iterator[list[_T]]: ... +def polynomial_from_roots(roots: Sequence[int]) -> list[int]: ... +def iter_index( + iterable: Iterable[object], + value: Any, + start: int | None = ..., +) -> Iterator[int]: ... +def sieve(n: int) -> Iterator[int]: ... +def batched( + iterable: Iterable[_T], + n: int, +) -> Iterator[list[_T]]: ... +def transpose( + it: Iterable[Iterable[_T]], +) -> tuple[Iterator[_T], ...]: ... +def matmul(m1: Sequence[_T], m2: Sequence[_T]) -> Iterator[list[_T]]: ... +def factor(n: int) -> Iterator[int]: ... diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE b/lib/pkg_resources/_vendor/packaging/LICENSE deleted file mode 100644 index 6f62d44e..00000000 --- a/lib/pkg_resources/_vendor/packaging/LICENSE +++ /dev/null @@ -1,3 +0,0 @@ -This software is made available under the terms of *either* of the licenses -found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made -under the terms of *both* these licenses. diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE b/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE deleted file mode 100644 index f433b1a5..00000000 --- a/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE +++ /dev/null @@ -1,177 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE.BSD b/lib/pkg_resources/_vendor/packaging/LICENSE.BSD deleted file mode 100644 index 42ce7b75..00000000 --- a/lib/pkg_resources/_vendor/packaging/LICENSE.BSD +++ /dev/null @@ -1,23 +0,0 @@ -Copyright (c) Donald Stufft and individual contributors. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/pkg_resources/_vendor/packaging/__about__.py b/lib/pkg_resources/_vendor/packaging/__about__.py deleted file mode 100644 index c359122f..00000000 --- a/lib/pkg_resources/_vendor/packaging/__about__.py +++ /dev/null @@ -1,26 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. - -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] - -__title__ = "packaging" -__summary__ = "Core utilities for Python packages" -__uri__ = "https://github.com/pypa/packaging" - -__version__ = "21.2" - -__author__ = "Donald Stufft and individual contributors" -__email__ = "donald@stufft.io" - -__license__ = "BSD-2-Clause or Apache-2.0" -__copyright__ = "2014-2019 %s" % __author__ diff --git a/lib/pkg_resources/_vendor/packaging/__init__.py b/lib/pkg_resources/_vendor/packaging/__init__.py index 3c50c5dc..13cadc7f 100644 --- a/lib/pkg_resources/_vendor/packaging/__init__.py +++ b/lib/pkg_resources/_vendor/packaging/__init__.py @@ -2,24 +2,14 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from .__about__ import ( - __author__, - __copyright__, - __email__, - __license__, - __summary__, - __title__, - __uri__, - __version__, -) +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] +__version__ = "23.1" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/lib/pkg_resources/_vendor/packaging/_elffile.py b/lib/pkg_resources/_vendor/packaging/_elffile.py new file mode 100644 index 00000000..6fb19b30 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/lib/pkg_resources/_vendor/packaging/_manylinux.py b/lib/pkg_resources/_vendor/packaging/_manylinux.py index 4c379aa6..449c655b 100644 --- a/lib/pkg_resources/_vendor/packaging/_manylinux.py +++ b/lib/pkg_resources/_vendor/packaging/_manylinux.py @@ -1,121 +1,60 @@ import collections +import contextlib import functools import os import re -import struct import sys import warnings -from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader: - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file: IO[bytes]) -> None: - def unpack(fmt: str) -> int: - try: - data = file.read(struct.calcsize(fmt)) - result: Tuple[int, ...] = struct.unpack(fmt, data) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result[0] - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "H" - format_i = "I" - format_q = "Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header() -> Optional[_ELFFileHeader]: +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None -def _is_linux_armhf() -> bool: +def _is_linux_armhf(executable: str) -> bool: # hard-float ABI can be detected from the ELF header of the running # process # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) -def _is_linux_i686() -> bool: - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) -def _have_compatible_abi(arch: str) -> bool: +def _have_compatible_abi(executable: str, arch: str) -> bool: if arch == "armv7l": - return _is_linux_armhf() + return _is_linux_armhf(executable) if arch == "i686": - return _is_linux_i686() + return _is_linux_i686(executable) return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} @@ -141,10 +80,10 @@ def _glibc_version_string_confstr() -> Optional[str]: # platform module. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr("CS_GNU_LIBC_VERSION") + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") assert version_string is not None - _, version = version_string.split() + _, version = version_string.rsplit() except (AssertionError, AttributeError, OSError, ValueError): # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... return None @@ -211,8 +150,8 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]: m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) if not m: warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", RuntimeWarning, ) return -1, -1 @@ -265,7 +204,7 @@ _LEGACY_MANYLINUX_MAP = { def platform_tags(linux: str, arch: str) -> Iterator[str]: - if not _have_compatible_abi(arch): + if not _have_compatible_abi(sys.executable, arch): return # Oldest glibc to be supported regardless of architecture is (2, 17). too_old_glibc2 = _GLibCVersion(2, 16) diff --git a/lib/pkg_resources/_vendor/packaging/_musllinux.py b/lib/pkg_resources/_vendor/packaging/_musllinux.py index 85450faf..706ba600 100644 --- a/lib/pkg_resources/_vendor/packaging/_musllinux.py +++ b/lib/pkg_resources/_vendor/packaging/_musllinux.py @@ -4,68 +4,13 @@ This module implements logic to detect if the currently running Python is linked against musl, and what musl version is used. """ -import contextlib import functools -import operator -import os import re -import struct import subprocess import sys -from typing import IO, Iterator, NamedTuple, Optional, Tuple +from typing import Iterator, NamedTuple, Optional - -def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: - return struct.unpack(fmt, f.read(struct.calcsize(fmt))) - - -def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: - """Detect musl libc location by parsing the Python executable. - - Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca - ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html - """ - f.seek(0) - try: - ident = _read_unpacked(f, "16B") - except struct.error: - return None - if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. - return None - f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. - - try: - # e_fmt: Format for program header. - # p_fmt: Format for section header. - # p_idx: Indexes to find p_type, p_offset, and p_filesz. - e_fmt, p_fmt, p_idx = { - 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. - 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. - }[ident[4]] - except KeyError: - return None - else: - p_get = operator.itemgetter(*p_idx) - - # Find the interpreter section and return its content. - try: - _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) - except struct.error: - return None - for i in range(e_phnum + 1): - f.seek(e_phoff + e_phentsize * i) - try: - p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) - except struct.error: - return None - if p_type != 3: # Not PT_INTERP. - continue - f.seek(p_offset) - interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") - if "musl" not in interpreter: - return None - return interpreter - return None +from ._elffile import ELFFile class _MuslVersion(NamedTuple): @@ -95,13 +40,12 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]: Version 1.2.2 Dynamic Program Loader """ - with contextlib.ExitStack() as stack: - try: - f = stack.enter_context(open(executable, "rb")) - except IOError: - return None - ld = _parse_ld_musl_from_elf(f) - if not ld: + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: return None proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) return _parse_musl_version(proc.stderr) diff --git a/lib/pkg_resources/_vendor/packaging/_parser.py b/lib/pkg_resources/_vendor/packaging/_parser.py new file mode 100644 index 00000000..5a18b758 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_parser.py @@ -0,0 +1,353 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/lib/pkg_resources/_vendor/packaging/_structures.py b/lib/pkg_resources/_vendor/packaging/_structures.py index 95154975..90a6465f 100644 --- a/lib/pkg_resources/_vendor/packaging/_structures.py +++ b/lib/pkg_resources/_vendor/packaging/_structures.py @@ -19,9 +19,6 @@ class InfinityType: def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other: object) -> bool: - return not isinstance(other, self.__class__) - def __gt__(self, other: object) -> bool: return True @@ -51,9 +48,6 @@ class NegativeInfinityType: def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other: object) -> bool: - return not isinstance(other, self.__class__) - def __gt__(self, other: object) -> bool: return False diff --git a/lib/pkg_resources/_vendor/packaging/_tokenizer.py b/lib/pkg_resources/_vendor/packaging/_tokenizer.py new file mode 100644 index 00000000..dd0d648d --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/lib/pkg_resources/_vendor/packaging/markers.py b/lib/pkg_resources/_vendor/packaging/markers.py index 18769b09..8b98fca7 100644 --- a/lib/pkg_resources/_vendor/packaging/markers.py +++ b/lib/pkg_resources/_vendor/packaging/markers.py @@ -8,19 +8,17 @@ import platform import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pkg_resources.extern.pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, ) - +from ._tokenizer import ParserSyntaxError from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -52,101 +50,24 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -192,7 +113,7 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: @@ -201,25 +122,19 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: return oper(lhs, rhs) -class Undefined: - pass +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + + # other environment markers don't have such standards + return values -_undefined = Undefined() - - -def _get_env(environment: Dict[str, str], name: str) -> str: - value: Union[str, Undefined] = environment.get(name, _undefined) - - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - f"{name!r} does not exist in evaluation environment." - ) - - return value - - -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -231,12 +146,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -274,13 +192,29 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - raise InvalidMarker( - f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" - ) + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e def __str__(self) -> str: return _format_marker(self._markers) @@ -288,6 +222,15 @@ class Marker: def __repr__(self) -> str: return f"" + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. @@ -298,7 +241,12 @@ class Marker: The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" return _evaluate_markers(self._markers, current_environment) diff --git a/lib/pkg_resources/_vendor/packaging/metadata.py b/lib/pkg_resources/_vendor/packaging/metadata.py new file mode 100644 index 00000000..e76a60c3 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/metadata.py @@ -0,0 +1,408 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import Dict, List, Optional, Tuple, Union, cast + +if sys.version_info >= (3, 8): # pragma: no cover + from typing import TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import TypedDict + else: + try: + from typing_extensions import TypedDict + except ImportError: + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emiting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_STRING_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata. + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_STRING_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed diff --git a/lib/pkg_resources/_vendor/packaging/requirements.py b/lib/pkg_resources/_vendor/packaging/requirements.py index 6af14ec4..f34bfa85 100644 --- a/lib/pkg_resources/_vendor/packaging/requirements.py +++ b/lib/pkg_resources/_vendor/packaging/requirements.py @@ -2,26 +2,13 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string import urllib.parse -from typing import List, Optional as TOptional, Set +from typing import Any, List, Optional, Set -from pkg_resources.extern.pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet class InvalidRequirement(ValueError): @@ -30,60 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pkg_resources.extern.pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -99,28 +32,29 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e - self.name: str = req.name - if req.url: - parsed_url = urllib.parse.urlparse(req.url) + self.name: str = parsed.name + if parsed.url: + parsed_url = urllib.parse.urlparse(parsed.url) if parsed_url.scheme == "file": - if urllib.parse.urlunparse(parsed_url) != req.url: + if urllib.parse.urlunparse(parsed_url) != parsed.url: raise InvalidRequirement("Invalid URL given") elif not (parsed_url.scheme and parsed_url.netloc) or ( not parsed_url.scheme and not parsed_url.netloc ): - raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url + raise InvalidRequirement(f"Invalid URL: {parsed.url}") + self.url: Optional[str] = parsed.url else: self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) - self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) def __str__(self) -> str: parts: List[str] = [self.name] @@ -144,3 +78,18 @@ class Requirement: def __repr__(self) -> str: return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + self.name == other.name + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/lib/pkg_resources/_vendor/packaging/specifiers.py b/lib/pkg_resources/_vendor/packaging/specifiers.py index ce66bd4a..ba8fe37b 100644 --- a/lib/pkg_resources/_vendor/packaging/specifiers.py +++ b/lib/pkg_resources/_vendor/packaging/specifiers.py @@ -1,20 +1,22 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" import abc -import functools import itertools import re -import warnings from typing import ( Callable, - Dict, Iterable, Iterator, List, Optional, - Pattern, Set, Tuple, TypeVar, @@ -22,17 +24,28 @@ from typing import ( ) from .utils import canonicalize_version -from .version import LegacyVersion, Version, parse +from .version import Version -ParsedVersion = Union[Version, LegacyVersion] -UnparsedVersion = Union[Version, LegacyVersion, str] -VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) -CallableOperator = Callable[[ParsedVersion, str], bool] +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ @@ -40,42 +53,39 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. """ + @property @abc.abstractmethod - def __ne__(self, other: object) -> bool: - """ - Returns a boolean representing whether or not the two Specifier like - objects are not equal. - """ - - @abc.abstractproperty def prereleases(self) -> Optional[bool]: - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter def prereleases(self, value: bool) -> None: - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod @@ -86,238 +96,28 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. - _operators: Dict[str, str] = {} - _regex: Pattern[str] + .. tip:: - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - self._spec: Tuple[str, str] = ( - match.group("operator").strip(), - match.group("version").strip(), - ) - - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases - - def __repr__(self) -> str: - pre = ( - f", prereleases={self.prereleases!r}" - if self._prereleases is not None - else "" - ) - - return "<{}({!r}{})>".format(self.__class__.__name__, str(self), pre) - - def __str__(self) -> str: - return "{}{}".format(*self._spec) - - @property - def _canonical_spec(self) -> Tuple[str, str]: - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self) -> int: - return hash(self._canonical_spec) - - def __eq__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def __ne__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._spec != other._spec - - def _get_operator(self, op: str) -> CallableOperator: - operator_callable: CallableOperator = getattr( - self, f"_compare_{self._operators[op]}" - ) - return operator_callable - - def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version - - @property - def operator(self) -> str: - return self._spec[0] - - @property - def version(self) -> str: - return self._spec[1] - - @property - def prereleases(self) -> Optional[bool]: - return self._prereleases - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - - def __contains__(self, item: str) -> bool: - return self.contains(item) - - def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None - ) -> bool: - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable: CallableOperator = self._get_operator(self.operator) - return operator_callable(normalized_item, self.version) - - def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later in case nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P(==|!=|<=|>=|<|>)) - \s* - (?P - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - super().__init__(spec, prereleases) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal( - self, prospective: LegacyVersion, spec: str - ) -> bool: - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn: Callable[["Specifier", ParsedVersion, str], bool] -) -> Callable[["Specifier", ParsedVersion, str], bool]: - @functools.wraps(fn) - def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped - - -class Specifier(_IndividualSpecifier): - - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -327,8 +127,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -341,23 +143,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -372,7 +174,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -397,7 +199,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)* # release (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -409,7 +211,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -422,8 +227,153 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -444,34 +394,35 @@ class Specifier(_IndividualSpecifier): prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(normalized_spec) # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - split_prospective = _version_split(str(prospective)) + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] + shortened_prospective = padded_prospective[: len(split_spec)] - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) - - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -484,30 +435,24 @@ class Specifier(_IndividualSpecifier): return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal( - self, prospective: ParsedVersion, spec: str - ) -> bool: + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -532,8 +477,7 @@ class Specifier(_IndividualSpecifier): # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -567,34 +511,133 @@ class Specifier(_IndividualSpecifier): def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self) -> bool: + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") @@ -636,22 +679,39 @@ def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + def __init__( self, specifiers: str = "", prereleases: Optional[bool] = None ) -> None: + """Initialize a SpecifierSet instance. - # Split on , to break each individual specifier into it's own item, and + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed: Set[_IndividualSpecifier] = set() + # Specifier. + parsed: Set[Specifier] = set() for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) + parsed.add(Specifier(specifier)) # Turn our parsed specifiers into a frozen set and save them for later. self._specs = frozenset(parsed) @@ -660,22 +720,74 @@ class SpecifierSet(BaseSpecifier): # we accept prereleases or not. self._prereleases = prereleases + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ pre = ( f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "".format(str(self), pre) + return f"" def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) def __hash__(self) -> int: return hash(self._specs) def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): @@ -699,59 +811,98 @@ class SpecifierSet(BaseSpecifier): return specifier def __eq__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented return self._specs == other._specs - def __ne__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): - other = SpecifierSet(str(other)) - elif not isinstance(other, SpecifierSet): - return NotImplemented - - return self._specs != other._specs - def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self) -> Iterator[_IndividualSpecifier]: + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ return iter(self._specs) - @property - def prereleases(self) -> Optional[bool]: - - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases - - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None - - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -768,6 +919,9 @@ class SpecifierSet(BaseSpecifier): if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -775,9 +929,46 @@ class SpecifierSet(BaseSpecifier): return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -790,27 +981,16 @@ class SpecifierSet(BaseSpecifier): if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered: List[VersionTypeVar] = [] - found_prereleases: List[VersionTypeVar] = [] - - item: UnparsedVersion - parsed_version: Union[Version, LegacyVersion] + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -823,6 +1003,6 @@ class SpecifierSet(BaseSpecifier): # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/lib/pkg_resources/_vendor/packaging/tags.py b/lib/pkg_resources/_vendor/packaging/tags.py index e65890a9..76d24341 100644 --- a/lib/pkg_resources/_vendor/packaging/tags.py +++ b/lib/pkg_resources/_vendor/packaging/tags.py @@ -4,6 +4,7 @@ import logging import platform +import subprocess import sys import sysconfig from importlib.machinery import EXTENSION_SUFFIXES @@ -36,7 +37,7 @@ INTERPRETER_SHORT_NAMES: Dict[str, str] = { } -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = sys.maxsize <= 2**32 class Tag: @@ -90,7 +91,7 @@ class Tag: return f"{self._interpreter}-{self._abi}-{self._platform}" def __repr__(self) -> str: - return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + return f"<{self} @ {id(self)}>" def parse_tag(tag: str) -> FrozenSet[Tag]: @@ -110,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]: def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: - value = sysconfig.get_config_var(name) + value: Union[int, str, None] = sysconfig.get_config_var(name) if value is None and warn: logger.debug( "Config variable '%s' is unset, Python ABI tag may be incorrect", name @@ -119,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: def _normalize_string(string: str) -> str: - return string.replace(".", "_").replace("-", "_") + return string.replace(".", "_").replace("-", "_").replace(" ", "_") def _abi3_applies(python_version: PythonVersion) -> bool: @@ -192,7 +193,7 @@ def cpython_tags( if not python_version: python_version = sys.version_info[:2] - interpreter = "cp{}".format(_version_nodot(python_version[:2])) + interpreter = f"cp{_version_nodot(python_version[:2])}" if abis is None: if len(python_version) > 1: @@ -224,10 +225,45 @@ def cpython_tags( yield Tag(interpreter, "abi3", platform_) -def _generic_abi() -> Iterator[str]: - abi = sysconfig.get_config_var("SOABI") - if abi: - yield _normalize_string(abi) +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] def generic_tags( @@ -251,8 +287,9 @@ def generic_tags( interpreter = "".join([interp_name, interp_version]) if abis is None: abis = _generic_abi() + else: + abis = list(abis) platforms = list(platforms or platform_tags()) - abis = list(abis) if "none" not in abis: abis.append("none") for abi in abis: @@ -268,11 +305,11 @@ def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: all previous versions of that major version. """ if len(py_version) > 1: - yield "py{version}".format(version=_version_nodot(py_version[:2])) - yield "py{major}".format(major=py_version[0]) + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" if len(py_version) > 1: for minor in range(py_version[1] - 1, -1, -1): - yield "py{version}".format(version=_version_nodot((py_version[0], minor))) + yield f"py{_version_nodot((py_version[0], minor))}" def compatible_tags( @@ -356,6 +393,22 @@ def mac_platforms( version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + universal_newlines=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: @@ -446,6 +499,9 @@ def platform_tags() -> Iterator[str]: def interpreter_name() -> str: """ Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. """ name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name @@ -481,4 +537,10 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]: else: yield from generic_tags() - yield from compatible_tags() + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/lib/pkg_resources/_vendor/packaging/utils.py b/lib/pkg_resources/_vendor/packaging/utils.py index bab11b80..33c613b7 100644 --- a/lib/pkg_resources/_vendor/packaging/utils.py +++ b/lib/pkg_resources/_vendor/packaging/utils.py @@ -35,7 +35,9 @@ def canonicalize_name(name: str) -> NormalizedName: return cast(NormalizedName, value) -def canonicalize_version(version: Union[Version, str]) -> str: +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. @@ -56,8 +58,11 @@ def canonicalize_version(version: Union[Version, str]) -> str: parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release if parsed.pre is not None: diff --git a/lib/pkg_resources/_vendor/packaging/version.py b/lib/pkg_resources/_vendor/packaging/version.py index de9a09a4..b30e8cbf 100644 --- a/lib/pkg_resources/_vendor/packaging/version.py +++ b/lib/pkg_resources/_vendor/packaging/version.py @@ -1,16 +1,20 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" import collections import itertools import re -import warnings -from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union +from typing import Any, Callable, Optional, SupportsInt, Tuple, Union from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] InfiniteTypes = Union[InfinityType, NegativeInfinityType] PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] @@ -29,36 +33,37 @@ LocalType = Union[ CmpKey = Tuple[ int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType ] -LegacyCmpKey = Tuple[int, Tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool -] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] _Version = collections.namedtuple( "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) -def parse(version: str) -> Union["LegacyVersion", "Version"]: +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. - """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + return Version(version) class InvalidVersion(ValueError): - """ - An invalid version was found, users should refer to PEP 440. + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' """ class _BaseVersion: - _key: Union[CmpKey, LegacyCmpKey] + _key: Tuple[Any, ...] def __hash__(self) -> int: return hash(self._key) @@ -103,126 +108,9 @@ class _BaseVersion: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue - - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: List[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch @@ -253,12 +141,56 @@ VERSION_PATTERN = r""" (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version """ +VERSION_PATTERN = _VERSION_PATTERN +""" +A string containing the regular expression used to match a valid version. + +The pattern is not anchored at either end, and is intended for embedding in larger +expressions (for example, matching a version number as part of a file name). The +regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE`` +flags set. + +:meta hide-value: +""" + class Version(_BaseVersion): + """This class abstracts handling of a project's versions. + + A :class:`Version` instance is comparison aware and can be compared and + sorted using the standard Python interfaces. + + >>> v1 = Version("1.0a5") + >>> v2 = Version("1.0") + >>> v1 + + >>> v2 + + >>> v1 < v2 + True + >>> v1 == v2 + False + >>> v1 > v2 + False + >>> v1 >= v2 + False + >>> v1 <= v2 + True + """ _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _key: CmpKey def __init__(self, version: str) -> None: + """Initialize a Version object. + + :param version: + The string representation of a version which will be parsed and normalized + before use. + :raises InvalidVersion: + If the ``version`` does not conform to PEP 440 in any way then this + exception will be raised. + """ # Validate the version and parse it into pieces match = self._regex.search(version) @@ -288,9 +220,19 @@ class Version(_BaseVersion): ) def __repr__(self) -> str: + """A representation of the Version that shows all internal state. + + >>> Version('1.0.0') + + """ return f"" def __str__(self) -> str: + """A string representation of the version that can be rounded-tripped. + + >>> str(Version("1.0a5")) + '1.0a5' + """ parts = [] # Epoch @@ -320,29 +262,80 @@ class Version(_BaseVersion): @property def epoch(self) -> int: + """The epoch of the version. + + >>> Version("2.0.0").epoch + 0 + >>> Version("1!2.0.0").epoch + 1 + """ _epoch: int = self._version.epoch return _epoch @property def release(self) -> Tuple[int, ...]: + """The components of the "release" segment of the version. + + >>> Version("1.2.3").release + (1, 2, 3) + >>> Version("2.0.0").release + (2, 0, 0) + >>> Version("1!2.0.0.post0").release + (2, 0, 0) + + Includes trailing zeroes but not the epoch or any pre-release / development / + post-release suffixes. + """ _release: Tuple[int, ...] = self._version.release return _release @property def pre(self) -> Optional[Tuple[str, int]]: + """The pre-release segment of the version. + + >>> print(Version("1.2.3").pre) + None + >>> Version("1.2.3a1").pre + ('a', 1) + >>> Version("1.2.3b1").pre + ('b', 1) + >>> Version("1.2.3rc1").pre + ('rc', 1) + """ _pre: Optional[Tuple[str, int]] = self._version.pre return _pre @property def post(self) -> Optional[int]: + """The post-release number of the version. + + >>> print(Version("1.2.3").post) + None + >>> Version("1.2.3.post1").post + 1 + """ return self._version.post[1] if self._version.post else None @property def dev(self) -> Optional[int]: + """The development number of the version. + + >>> print(Version("1.2.3").dev) + None + >>> Version("1.2.3.dev1").dev + 1 + """ return self._version.dev[1] if self._version.dev else None @property def local(self) -> Optional[str]: + """The local version segment of the version. + + >>> print(Version("1.2.3").local) + None + >>> Version("1.2.3+abc").local + 'abc' + """ if self._version.local: return ".".join(str(x) for x in self._version.local) else: @@ -350,10 +343,31 @@ class Version(_BaseVersion): @property def public(self) -> str: + """The public portion of the version. + + >>> Version("1.2.3").public + '1.2.3' + >>> Version("1.2.3+abc").public + '1.2.3' + >>> Version("1.2.3+abc.dev1").public + '1.2.3' + """ return str(self).split("+", 1)[0] @property def base_version(self) -> str: + """The "base version" of the version. + + >>> Version("1.2.3").base_version + '1.2.3' + >>> Version("1.2.3+abc").base_version + '1.2.3' + >>> Version("1!1.2.3+abc.dev1").base_version + '1!1.2.3' + + The "base version" is the public version of the project without any pre or post + release markers. + """ parts = [] # Epoch @@ -367,26 +381,72 @@ class Version(_BaseVersion): @property def is_prerelease(self) -> bool: + """Whether this version is a pre-release. + + >>> Version("1.2.3").is_prerelease + False + >>> Version("1.2.3a1").is_prerelease + True + >>> Version("1.2.3b1").is_prerelease + True + >>> Version("1.2.3rc1").is_prerelease + True + >>> Version("1.2.3dev1").is_prerelease + True + """ return self.dev is not None or self.pre is not None @property def is_postrelease(self) -> bool: + """Whether this version is a post-release. + + >>> Version("1.2.3").is_postrelease + False + >>> Version("1.2.3.post1").is_postrelease + True + """ return self.post is not None @property def is_devrelease(self) -> bool: + """Whether this version is a development release. + + >>> Version("1.2.3").is_devrelease + False + >>> Version("1.2.3.dev1").is_devrelease + True + """ return self.dev is not None @property def major(self) -> int: + """The first item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").major + 1 + """ return self.release[0] if len(self.release) >= 1 else 0 @property def minor(self) -> int: + """The second item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").minor + 2 + >>> Version("1").minor + 0 + """ return self.release[1] if len(self.release) >= 2 else 0 @property def micro(self) -> int: + """The third item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").micro + 3 + >>> Version("1").micro + 0 + """ return self.release[2] if len(self.release) >= 3 else 0 diff --git a/lib/pkg_resources/_vendor/platformdirs/__init__.py b/lib/pkg_resources/_vendor/platformdirs/__init__.py new file mode 100644 index 00000000..aef2821b --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/__init__.py @@ -0,0 +1,342 @@ +""" +Utilities for determining application-specific dirs. See for details and +usage. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +if sys.version_info >= (3, 8): # pragma: no cover (py38+) + from typing import Literal +else: # pragma: no cover (py38+) + from ..typing_extensions import Literal + +from .api import PlatformDirsABC +from .version import __version__ +from .version import __version_tuple__ as __version_info__ + + +def _set_platform_dir_class() -> type[PlatformDirsABC]: + if sys.platform == "win32": + from .windows import Windows as Result + elif sys.platform == "darwin": + from .macos import MacOS as Result + else: + from .unix import Unix as Result + + if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system": + + if os.getenv("SHELL") or os.getenv("PREFIX"): + return Result + + from .android import _android_folder + + if _android_folder() is not None: + from .android import Android + + return Android # return to avoid redefinition of result + + return Result + + +PlatformDirs = _set_platform_dir_class() #: Currently active platform +AppDirs = PlatformDirs #: Backwards compatibility with appdirs + + +def user_data_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: data directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_data_dir + + +def site_data_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param multipath: See `roaming `. + :returns: data directory shared by users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_data_dir + + +def user_config_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: config directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_config_dir + + +def site_config_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param multipath: See `roaming `. + :returns: config directory shared by the users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_config_dir + + +def user_cache_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `roaming `. + :returns: cache directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_cache_dir + + +def user_state_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: state directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_state_dir + + +def user_log_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `roaming `. + :returns: log directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_log_dir + + +def user_documents_dir() -> str: + """ + :returns: documents directory tied to the user + """ + return PlatformDirs().user_documents_dir + + +def user_runtime_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `opinion `. + :returns: runtime directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_runtime_dir + + +def user_data_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: data path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_data_path + + +def site_data_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param multipath: See `multipath `. + :returns: data path shared by users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_data_path + + +def user_config_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: config path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_config_path + + +def site_config_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param multipath: See `roaming `. + :returns: config path shared by the users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_config_path + + +def user_cache_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `roaming `. + :returns: cache path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_cache_path + + +def user_state_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param roaming: See `roaming `. + :returns: state path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_state_path + + +def user_log_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `roaming `. + :returns: log path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_log_path + + +def user_documents_path() -> Path: + """ + :returns: documents path tied to the user + """ + return PlatformDirs().user_documents_path + + +def user_runtime_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname `. + :param appauthor: See `appauthor `. + :param version: See `version `. + :param opinion: See `opinion `. + :returns: runtime path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_runtime_path + + +__all__ = [ + "__version__", + "__version_info__", + "PlatformDirs", + "AppDirs", + "PlatformDirsABC", + "user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "user_documents_dir", + "user_runtime_dir", + "site_data_dir", + "site_config_dir", + "user_data_path", + "user_config_path", + "user_cache_path", + "user_state_path", + "user_log_path", + "user_documents_path", + "user_runtime_path", + "site_data_path", + "site_config_path", +] diff --git a/lib/pkg_resources/_vendor/platformdirs/__main__.py b/lib/pkg_resources/_vendor/platformdirs/__main__.py new file mode 100644 index 00000000..0fc1edd5 --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/__main__.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from platformdirs import PlatformDirs, __version__ + +PROPS = ( + "user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "user_documents_dir", + "user_runtime_dir", + "site_data_dir", + "site_config_dir", +) + + +def main() -> None: + app_name = "MyApp" + app_author = "MyCompany" + + print(f"-- platformdirs {__version__} --") + + print("-- app dirs (with optional 'version')") + dirs = PlatformDirs(app_name, app_author, version="1.0") + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (without optional 'version')") + dirs = PlatformDirs(app_name, app_author) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (without optional 'appauthor')") + dirs = PlatformDirs(app_name) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (with disabled 'appauthor')") + dirs = PlatformDirs(app_name, appauthor=False) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + +if __name__ == "__main__": + main() diff --git a/lib/pkg_resources/_vendor/platformdirs/android.py b/lib/pkg_resources/_vendor/platformdirs/android.py new file mode 100644 index 00000000..eda80935 --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/android.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import os +import re +import sys +from functools import lru_cache +from typing import cast + +from .api import PlatformDirsABC + + +class Android(PlatformDirsABC): + """ + Follows the guidance `from here `_. Makes use of the + `appname ` and + `version `. + """ + + @property + def user_data_dir(self) -> str: + """:return: data directory tied to the user, e.g. ``/data/user///files/``""" + return self._append_app_name_and_version(cast(str, _android_folder()), "files") + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_config_dir(self) -> str: + """ + :return: config directory tied to the user, e.g. ``/data/user///shared_prefs/`` + """ + return self._append_app_name_and_version(cast(str, _android_folder()), "shared_prefs") + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, same as `user_config_dir`""" + return self.user_config_dir + + @property + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user, e.g. e.g. ``/data/user///cache/``""" + return self._append_app_name_and_version(cast(str, _android_folder()), "cache") + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_cache_dir` if not opinionated else ``log`` in it, + e.g. ``/data/user///cache//log`` + """ + path = self.user_cache_dir + if self.opinion: + path = os.path.join(path, "log") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user e.g. ``/storage/emulated/0/Documents`` + """ + return _android_documents_folder() + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, same as `user_cache_dir` if not opinionated else ``tmp`` in it, + e.g. ``/data/user///cache//tmp`` + """ + path = self.user_cache_dir + if self.opinion: + path = os.path.join(path, "tmp") + return path + + +@lru_cache(maxsize=1) +def _android_folder() -> str | None: + """:return: base folder for the Android OS or None if cannot be found""" + try: + # First try to get path to android app via pyjnius + from jnius import autoclass + + Context = autoclass("android.content.Context") # noqa: N806 + result: str | None = Context.getFilesDir().getParentFile().getAbsolutePath() + except Exception: + # if fails find an android folder looking path on the sys.path + pattern = re.compile(r"/data/(data|user/\d+)/(.+)/files") + for path in sys.path: + if pattern.match(path): + result = path.split("/files")[0] + break + else: + result = None + return result + + +@lru_cache(maxsize=1) +def _android_documents_folder() -> str: + """:return: documents folder for the Android OS""" + # Get directories with pyjnius + try: + from jnius import autoclass + + Context = autoclass("android.content.Context") # noqa: N806 + Environment = autoclass("android.os.Environment") # noqa: N806 + documents_dir: str = Context.getExternalFilesDir(Environment.DIRECTORY_DOCUMENTS).getAbsolutePath() + except Exception: + documents_dir = "/storage/emulated/0/Documents" + + return documents_dir + + +__all__ = [ + "Android", +] diff --git a/lib/pkg_resources/_vendor/platformdirs/api.py b/lib/pkg_resources/_vendor/platformdirs/api.py new file mode 100644 index 00000000..6f6e2c2c --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/api.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import os +import sys +from abc import ABC, abstractmethod +from pathlib import Path + +if sys.version_info >= (3, 8): # pragma: no branch + from typing import Literal # pragma: no cover + + +class PlatformDirsABC(ABC): + """ + Abstract base class for platform directories. + """ + + def __init__( + self, + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, + multipath: bool = False, + opinion: bool = True, + ): + """ + Create a new platform directory. + + :param appname: See `appname`. + :param appauthor: See `appauthor`. + :param version: See `version`. + :param roaming: See `roaming`. + :param multipath: See `multipath`. + :param opinion: See `opinion`. + """ + self.appname = appname #: The name of application. + self.appauthor = appauthor + """ + The name of the app author or distributing body for this application. Typically, it is the owning company name. + Defaults to `appname`. You may pass ``False`` to disable it. + """ + self.version = version + """ + An optional version path element to append to the path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this would typically be ``.``. + """ + self.roaming = roaming + """ + Whether to use the roaming appdata directory on Windows. That means that for users on a Windows network setup + for roaming profiles, this user data will be synced on login (see + `here `_). + """ + self.multipath = multipath + """ + An optional parameter only applicable to Unix/Linux which indicates that the entire list of data dirs should be + returned. By default, the first item would only be returned. + """ + self.opinion = opinion #: A flag to indicating to use opinionated values. + + def _append_app_name_and_version(self, *base: str) -> str: + params = list(base[1:]) + if self.appname: + params.append(self.appname) + if self.version: + params.append(self.version) + return os.path.join(base[0], *params) + + @property + @abstractmethod + def user_data_dir(self) -> str: + """:return: data directory tied to the user""" + + @property + @abstractmethod + def site_data_dir(self) -> str: + """:return: data directory shared by users""" + + @property + @abstractmethod + def user_config_dir(self) -> str: + """:return: config directory tied to the user""" + + @property + @abstractmethod + def site_config_dir(self) -> str: + """:return: config directory shared by the users""" + + @property + @abstractmethod + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user""" + + @property + @abstractmethod + def user_state_dir(self) -> str: + """:return: state directory tied to the user""" + + @property + @abstractmethod + def user_log_dir(self) -> str: + """:return: log directory tied to the user""" + + @property + @abstractmethod + def user_documents_dir(self) -> str: + """:return: documents directory tied to the user""" + + @property + @abstractmethod + def user_runtime_dir(self) -> str: + """:return: runtime directory tied to the user""" + + @property + def user_data_path(self) -> Path: + """:return: data path tied to the user""" + return Path(self.user_data_dir) + + @property + def site_data_path(self) -> Path: + """:return: data path shared by users""" + return Path(self.site_data_dir) + + @property + def user_config_path(self) -> Path: + """:return: config path tied to the user""" + return Path(self.user_config_dir) + + @property + def site_config_path(self) -> Path: + """:return: config path shared by the users""" + return Path(self.site_config_dir) + + @property + def user_cache_path(self) -> Path: + """:return: cache path tied to the user""" + return Path(self.user_cache_dir) + + @property + def user_state_path(self) -> Path: + """:return: state path tied to the user""" + return Path(self.user_state_dir) + + @property + def user_log_path(self) -> Path: + """:return: log path tied to the user""" + return Path(self.user_log_dir) + + @property + def user_documents_path(self) -> Path: + """:return: documents path tied to the user""" + return Path(self.user_documents_dir) + + @property + def user_runtime_path(self) -> Path: + """:return: runtime path tied to the user""" + return Path(self.user_runtime_dir) diff --git a/lib/pkg_resources/_vendor/platformdirs/macos.py b/lib/pkg_resources/_vendor/platformdirs/macos.py new file mode 100644 index 00000000..a01337c7 --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/macos.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import os + +from .api import PlatformDirsABC + + +class MacOS(PlatformDirsABC): + """ + Platform directories for the macOS operating system. Follows the guidance from `Apple documentation + `_. + Makes use of the `appname ` and + `version `. + """ + + @property + def user_data_dir(self) -> str: + """:return: data directory tied to the user, e.g. ``~/Library/Application Support/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Application Support/")) + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, e.g. ``/Library/Application Support/$appname/$version``""" + return self._append_app_name_and_version("/Library/Application Support") + + @property + def user_config_dir(self) -> str: + """:return: config directory tied to the user, e.g. ``~/Library/Preferences/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Preferences/")) + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, e.g. ``/Library/Preferences/$appname``""" + return self._append_app_name_and_version("/Library/Preferences") + + @property + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user, e.g. ``~/Library/Caches/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches")) + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """:return: log directory tied to the user, e.g. ``~/Library/Logs/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Logs")) + + @property + def user_documents_dir(self) -> str: + """:return: documents directory tied to the user, e.g. ``~/Documents``""" + return os.path.expanduser("~/Documents") + + @property + def user_runtime_dir(self) -> str: + """:return: runtime directory tied to the user, e.g. ``~/Library/Caches/TemporaryItems/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches/TemporaryItems")) + + +__all__ = [ + "MacOS", +] diff --git a/lib/pkg_resources/_vendor/platformdirs/py.typed b/lib/pkg_resources/_vendor/platformdirs/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/platformdirs/unix.py b/lib/pkg_resources/_vendor/platformdirs/unix.py new file mode 100644 index 00000000..9aca5a03 --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/unix.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import os +import sys +from configparser import ConfigParser +from pathlib import Path + +from .api import PlatformDirsABC + +if sys.platform.startswith("linux"): # pragma: no branch # no op check, only to please the type checker + from os import getuid +else: + + def getuid() -> int: + raise RuntimeError("should only be used on Linux") + + +class Unix(PlatformDirsABC): + """ + On Unix/Linux, we follow the + `XDG Basedir Spec `_. The spec allows + overriding directories with environment variables. The examples show are the default values, alongside the name of + the environment variable that overrides them. Makes use of the + `appname `, + `version `, + `multipath `, + `opinion `. + """ + + @property + def user_data_dir(self) -> str: + """ + :return: data directory tied to the user, e.g. ``~/.local/share/$appname/$version`` or + ``$XDG_DATA_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_DATA_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.local/share") + return self._append_app_name_and_version(path) + + @property + def site_data_dir(self) -> str: + """ + :return: data directories shared by users (if `multipath ` is + enabled and ``XDG_DATA_DIR`` is set and a multi path the response is also a multi path separated by the OS + path separator), e.g. ``/usr/local/share/$appname/$version`` or ``/usr/share/$appname/$version`` + """ + # XDG default for $XDG_DATA_DIRS; only first, if multipath is False + path = os.environ.get("XDG_DATA_DIRS", "") + if not path.strip(): + path = f"/usr/local/share{os.pathsep}/usr/share" + return self._with_multi_path(path) + + def _with_multi_path(self, path: str) -> str: + path_list = path.split(os.pathsep) + if not self.multipath: + path_list = path_list[0:1] + path_list = [self._append_app_name_and_version(os.path.expanduser(p)) for p in path_list] + return os.pathsep.join(path_list) + + @property + def user_config_dir(self) -> str: + """ + :return: config directory tied to the user, e.g. ``~/.config/$appname/$version`` or + ``$XDG_CONFIG_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_CONFIG_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.config") + return self._append_app_name_and_version(path) + + @property + def site_config_dir(self) -> str: + """ + :return: config directories shared by users (if `multipath ` + is enabled and ``XDG_DATA_DIR`` is set and a multi path the response is also a multi path separated by the OS + path separator), e.g. ``/etc/xdg/$appname/$version`` + """ + # XDG default for $XDG_CONFIG_DIRS only first, if multipath is False + path = os.environ.get("XDG_CONFIG_DIRS", "") + if not path.strip(): + path = "/etc/xdg" + return self._with_multi_path(path) + + @property + def user_cache_dir(self) -> str: + """ + :return: cache directory tied to the user, e.g. ``~/.cache/$appname/$version`` or + ``~/$XDG_CACHE_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_CACHE_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.cache") + return self._append_app_name_and_version(path) + + @property + def user_state_dir(self) -> str: + """ + :return: state directory tied to the user, e.g. ``~/.local/state/$appname/$version`` or + ``$XDG_STATE_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_STATE_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.local/state") + return self._append_app_name_and_version(path) + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it + """ + path = self.user_state_dir + if self.opinion: + path = os.path.join(path, "log") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user, e.g. ``~/Documents`` + """ + documents_dir = _get_user_dirs_folder("XDG_DOCUMENTS_DIR") + if documents_dir is None: + documents_dir = os.environ.get("XDG_DOCUMENTS_DIR", "").strip() + if not documents_dir: + documents_dir = os.path.expanduser("~/Documents") + + return documents_dir + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, e.g. ``/run/user/$(id -u)/$appname/$version`` or + ``$XDG_RUNTIME_DIR/$appname/$version`` + """ + path = os.environ.get("XDG_RUNTIME_DIR", "") + if not path.strip(): + path = f"/run/user/{getuid()}" + return self._append_app_name_and_version(path) + + @property + def site_data_path(self) -> Path: + """:return: data path shared by users. Only return first item, even if ``multipath`` is set to ``True``""" + return self._first_item_as_path_if_multipath(self.site_data_dir) + + @property + def site_config_path(self) -> Path: + """:return: config path shared by the users. Only return first item, even if ``multipath`` is set to ``True``""" + return self._first_item_as_path_if_multipath(self.site_config_dir) + + def _first_item_as_path_if_multipath(self, directory: str) -> Path: + if self.multipath: + # If multipath is True, the first path is returned. + directory = directory.split(os.pathsep)[0] + return Path(directory) + + +def _get_user_dirs_folder(key: str) -> str | None: + """Return directory from user-dirs.dirs config file. See https://freedesktop.org/wiki/Software/xdg-user-dirs/""" + user_dirs_config_path = os.path.join(Unix().user_config_dir, "user-dirs.dirs") + if os.path.exists(user_dirs_config_path): + parser = ConfigParser() + + with open(user_dirs_config_path) as stream: + # Add fake section header, so ConfigParser doesn't complain + parser.read_string(f"[top]\n{stream.read()}") + + if key not in parser["top"]: + return None + + path = parser["top"][key].strip('"') + # Handle relative home paths + path = path.replace("$HOME", os.path.expanduser("~")) + return path + + return None + + +__all__ = [ + "Unix", +] diff --git a/lib/pkg_resources/_vendor/platformdirs/version.py b/lib/pkg_resources/_vendor/platformdirs/version.py new file mode 100644 index 00000000..9f6eb98e --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '2.6.2' +__version_tuple__ = version_tuple = (2, 6, 2) diff --git a/lib/pkg_resources/_vendor/platformdirs/windows.py b/lib/pkg_resources/_vendor/platformdirs/windows.py new file mode 100644 index 00000000..d5c27b34 --- /dev/null +++ b/lib/pkg_resources/_vendor/platformdirs/windows.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +import ctypes +import os +import sys +from functools import lru_cache +from typing import Callable + +from .api import PlatformDirsABC + + +class Windows(PlatformDirsABC): + """`MSDN on where to store app data files + `_. + Makes use of the + `appname `, + `appauthor `, + `version `, + `roaming `, + `opinion `.""" + + @property + def user_data_dir(self) -> str: + """ + :return: data directory tied to the user, e.g. + ``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname`` (not roaming) or + ``%USERPROFILE%\\AppData\\Roaming\\$appauthor\\$appname`` (roaming) + """ + const = "CSIDL_APPDATA" if self.roaming else "CSIDL_LOCAL_APPDATA" + path = os.path.normpath(get_win_folder(const)) + return self._append_parts(path) + + def _append_parts(self, path: str, *, opinion_value: str | None = None) -> str: + params = [] + if self.appname: + if self.appauthor is not False: + author = self.appauthor or self.appname + params.append(author) + params.append(self.appname) + if opinion_value is not None and self.opinion: + params.append(opinion_value) + if self.version: + params.append(self.version) + return os.path.join(path, *params) + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, e.g. ``C:\\ProgramData\\$appauthor\\$appname``""" + path = os.path.normpath(get_win_folder("CSIDL_COMMON_APPDATA")) + return self._append_parts(path) + + @property + def user_config_dir(self) -> str: + """:return: config directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, same as `site_data_dir`""" + return self.site_data_dir + + @property + def user_cache_dir(self) -> str: + """ + :return: cache directory tied to the user (if opinionated with ``Cache`` folder within ``$appname``) e.g. + ``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname\\Cache\\$version`` + """ + path = os.path.normpath(get_win_folder("CSIDL_LOCAL_APPDATA")) + return self._append_parts(path, opinion_value="Cache") + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``Logs`` in it + """ + path = self.user_data_dir + if self.opinion: + path = os.path.join(path, "Logs") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user e.g. ``%USERPROFILE%\\Documents`` + """ + return os.path.normpath(get_win_folder("CSIDL_PERSONAL")) + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, e.g. + ``%USERPROFILE%\\AppData\\Local\\Temp\\$appauthor\\$appname`` + """ + path = os.path.normpath(os.path.join(get_win_folder("CSIDL_LOCAL_APPDATA"), "Temp")) + return self._append_parts(path) + + +def get_win_folder_from_env_vars(csidl_name: str) -> str: + """Get folder from environment variables.""" + if csidl_name == "CSIDL_PERSONAL": # does not have an environment name + return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Documents") + + env_var_name = { + "CSIDL_APPDATA": "APPDATA", + "CSIDL_COMMON_APPDATA": "ALLUSERSPROFILE", + "CSIDL_LOCAL_APPDATA": "LOCALAPPDATA", + }.get(csidl_name) + if env_var_name is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + result = os.environ.get(env_var_name) + if result is None: + raise ValueError(f"Unset environment variable: {env_var_name}") + return result + + +def get_win_folder_from_registry(csidl_name: str) -> str: + """Get folder from the registry. + + This is a fallback technique at best. I'm not sure if using the + registry for this guarantees us the correct answer for all CSIDL_* + names. + """ + shell_folder_name = { + "CSIDL_APPDATA": "AppData", + "CSIDL_COMMON_APPDATA": "Common AppData", + "CSIDL_LOCAL_APPDATA": "Local AppData", + "CSIDL_PERSONAL": "Personal", + }.get(csidl_name) + if shell_folder_name is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + if sys.platform != "win32": # only needed for mypy type checker to know that this code runs only on Windows + raise NotImplementedError + import winreg + + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders") + directory, _ = winreg.QueryValueEx(key, shell_folder_name) + return str(directory) + + +def get_win_folder_via_ctypes(csidl_name: str) -> str: + """Get folder with ctypes.""" + csidl_const = { + "CSIDL_APPDATA": 26, + "CSIDL_COMMON_APPDATA": 35, + "CSIDL_LOCAL_APPDATA": 28, + "CSIDL_PERSONAL": 5, + }.get(csidl_name) + if csidl_const is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + + buf = ctypes.create_unicode_buffer(1024) + windll = getattr(ctypes, "windll") # noqa: B009 # using getattr to avoid false positive with mypy type checker + windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) + + # Downgrade to short path name if it has highbit chars. + if any(ord(c) > 255 for c in buf): + buf2 = ctypes.create_unicode_buffer(1024) + if windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): + buf = buf2 + + return buf.value + + +def _pick_get_win_folder() -> Callable[[str], str]: + if hasattr(ctypes, "windll"): + return get_win_folder_via_ctypes + try: + import winreg # noqa: F401 + except ImportError: + return get_win_folder_from_env_vars + else: + return get_win_folder_from_registry + + +get_win_folder = lru_cache(maxsize=None)(_pick_get_win_folder()) + +__all__ = [ + "Windows", +] diff --git a/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt b/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt deleted file mode 100644 index 1bf98523..00000000 --- a/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt +++ /dev/null @@ -1,18 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/pkg_resources/_vendor/pyparsing.py b/lib/pkg_resources/_vendor/pyparsing.py deleted file mode 100644 index cf75e1e5..00000000 --- a/lib/pkg_resources/_vendor/pyparsing.py +++ /dev/null @@ -1,5742 +0,0 @@ -# module pyparsing.py -# -# Copyright (c) 2003-2018 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -__doc__ = \ -""" -pyparsing module - Classes and methods to define and execute parsing grammars -============================================================================= - -The pyparsing module is an alternative approach to creating and executing simple grammars, -vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module -provides a library of classes that you use to construct the grammar directly in Python. - -Here is a program to parse "Hello, World!" (or any greeting of the form -C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements -(L{'+'} operator gives L{And} expressions, strings are auto-converted to -L{Literal} expressions):: - - from pyparsing import Word, alphas - - # define grammar of a greeting - greet = Word(alphas) + "," + Word(alphas) + "!" - - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - -The program outputs the following:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - -The Python representation of the grammar is quite readable, owing to the self-explanatory -class names, and the use of '+', '|' and '^' operators. - -The L{ParseResults} object returned from L{ParserElement.parseString} can be accessed as a nested list, a dictionary, or an -object with named attributes. - -The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - - quoted strings - - embedded comments - - -Getting Started - ------------------ -Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing -classes inherit from. Use the docstrings for examples of how to: - - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes - - construct character word-group expressions using the L{Word} class - - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes - - use L{'+'}, L{'|'}, L{'^'}, and L{'&'} operators to combine simple expressions into more complex ones - - associate names with your parsed results using L{ParserElement.setResultsName} - - find some helpful expression short-cuts like L{delimitedList} and L{oneOf} - - find more useful common expressions in the L{pyparsing_common} namespace class -""" - -__version__ = "2.2.1" -__versionTime__ = "18 Sep 2018 00:49 UTC" -__author__ = "Paul McGuire " - -import string -from weakref import ref as wkref -import copy -import sys -import warnings -import re -import sre_constants -import collections -import pprint -import traceback -import types -from datetime import datetime - -try: - from _thread import RLock -except ImportError: - from threading import RLock - -try: - # Python 3 - from collections.abc import Iterable - from collections.abc import MutableMapping -except ImportError: - # Python 2.7 - from collections import Iterable - from collections import MutableMapping - -try: - from collections import OrderedDict as _OrderedDict -except ImportError: - try: - from ordereddict import OrderedDict as _OrderedDict - except ImportError: - _OrderedDict = None - -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) - -__all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', -'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', -'CloseMatch', 'tokenMap', 'pyparsing_common', -] - -system_version = tuple(sys.version_info)[:3] -PY_3 = system_version[0] == 3 -if PY_3: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - - # build list of single arg builtins, that can be used as parse actions - singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] - -else: - _MAX_INT = sys.maxint - range = xrange - - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj - - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) - - except UnicodeEncodeError: - # Else encode it - ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') - xmlcharref = Regex(r'&#\d+;') - xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) - return xmlcharref.transformString(ret) - - # build list of single arg builtins, tolerant of Python version, that can be used as parse actions - singleArgBuiltins = [] - import __builtin__ - for fname in "sum len sorted reversed list tuple set any all min max".split(): - try: - singleArgBuiltins.append(getattr(__builtin__,fname)) - except AttributeError: - continue - -_generatorType = type((y for y in range(1))) - -def _xml_escape(data): - """Escape &, <, >, ", ', etc. in a string of data.""" - - # ampersand must be replaced first - from_symbols = '&><"\'' - to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) - for from_,to_ in zip(from_symbols, to_symbols): - data = data.replace(from_, to_) - return data - -class _Constants(object): - pass - -alphas = string.ascii_uppercase + string.ascii_lowercase -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) -printables = "".join(c for c in string.printable if c not in string.whitespace) - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parserElement = elem - self.args = (pstr, loc, msg) - - @classmethod - def _from_exception(cls, pe): - """ - internal factory method to simplify creating one type of ParseException - from another - avoids having __init__ signature conflicts among subclasses - """ - return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) - - def __getattr__( self, aname ): - """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) - else: - raise AttributeError(aname) - - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): - return _ustr(self) - def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join((line_str[:line_column], - markerString, line_str[line_column:])) - return line_str.strip() - def __dir__(self): - return "lineno col line".split() + dir(type(self)) - -class ParseException(ParseBaseException): - """ - Exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - - Example:: - try: - Word(nums).setName("integer").parseString("ABC") - except ParseException as pe: - print(pe) - print("column: {}".format(pe.col)) - - prints:: - Expected integer (at char 0), (line:1, col:1) - column: 1 - """ - pass - -class ParseFatalException(ParseBaseException): - """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - pass - -class ParseSyntaxException(ParseFatalException): - """just like L{ParseFatalException}, but thrown internally when an - L{ErrorStop} ('-' operator) indicates that parsing is to stop - immediately because an unbacktrackable syntax error has been found""" - pass - -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc - -class RecursiveGrammarException(Exception): - """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): - self.parseElementTrace = parseElementList - - def __str__( self ): - return "RecursiveGrammarException: %s" % self.parseElementTrace - -class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): - return self.tup[i] - def __repr__(self): - return repr(self.tup[0]) - def setOffset(self,i): - self.tup = (self.tup[0],i) - -class ParseResults(object): - """ - Structured parse results, to provide multiple means of access to the parsed data: - - as a list (C{len(results)}) - - by list index (C{results[0], results[1]}, etc.) - - by attribute (C{results.} - see L{ParserElement.setResultsName}) - - Example:: - integer = Word(nums) - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - # equivalent form: - # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - # parseString returns a ParseResults object - result = date_str.parseString("1999/12/31") - - def test(s, fn=repr): - print("%s -> %s" % (s, fn(eval(s)))) - test("list(result)") - test("result[0]") - test("result['month']") - test("result.day") - test("'month' in result") - test("'minutes' in result") - test("result.dump()", str) - prints:: - list(result) -> ['1999', '/', '12', '/', '31'] - result[0] -> '1999' - result['month'] -> '12' - result.day -> '31' - 'month' in result -> True - 'minutes' in result -> False - result.dump() -> ['1999', '/', '12', '/', '31'] - - day: 31 - - month: 12 - - year: 1999 - """ - def __new__(cls, toklist=None, name=None, asList=True, modal=True ): - if isinstance(toklist, cls): - return toklist - retobj = object.__new__(cls) - retobj.__doinit = True - return retobj - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): - if self.__doinit: - self.__doinit = False - self.__name = None - self.__parent = None - self.__accumNames = {} - self.__asList = asList - self.__modal = modal - if toklist is None: - toklist = [] - if isinstance(toklist, list): - self.__toklist = toklist[:] - elif isinstance(toklist, _generatorType): - self.__toklist = list(toklist) - else: - self.__toklist = [toklist] - self.__tokdict = dict() - - if name is not None and name: - if not modal: - self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency - self.__name = name - if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): - if isinstance(toklist,basestring): - toklist = [ toklist ] - if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),0) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) - self[name].__name = name - else: - try: - self[name] = toklist[0] - except (KeyError,TypeError,IndexError): - self[name] = toklist - - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): - return self.__toklist[i] - else: - if i not in self.__accumNames: - return self.__tokdict[i][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) - - def __setitem__( self, k, v, isinstance=isinstance ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] - sub = v[0] - elif isinstance(k,(int,slice)): - self.__toklist[k] = v - sub = v - else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] - sub = v - if isinstance(sub,ParseResults): - sub.__parent = wkref(self) - - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) - del self.__toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i+1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) - else: - del self.__tokdict[i] - - def __contains__( self, k ): - return k in self.__tokdict - - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return ( not not self.__toklist ) - __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( self.__toklist[::-1] ) - def _iterkeys( self ): - if hasattr(self.__tokdict, "iterkeys"): - return self.__tokdict.iterkeys() - else: - return iter(self.__tokdict) - - def _itervalues( self ): - return (self[k] for k in self._iterkeys()) - - def _iteritems( self ): - return ((k, self[k]) for k in self._iterkeys()) - - if PY_3: - keys = _iterkeys - """Returns an iterator of all named result keys (Python 3.x only).""" - - values = _itervalues - """Returns an iterator of all named result values (Python 3.x only).""" - - items = _iteritems - """Returns an iterator of all named result key-value tuples (Python 3.x only).""" - - else: - iterkeys = _iterkeys - """Returns an iterator of all named result keys (Python 2.x only).""" - - itervalues = _itervalues - """Returns an iterator of all named result values (Python 2.x only).""" - - iteritems = _iteritems - """Returns an iterator of all named result key-value tuples (Python 2.x only).""" - - def keys( self ): - """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iterkeys()) - - def values( self ): - """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" - return list(self.itervalues()) - - def items( self ): - """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" - return list(self.iteritems()) - - def haskeys( self ): - """Since keys() returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" - return bool(self.__tokdict) - - def pop( self, *args, **kwargs): - """ - Removes and returns item at specified index (default=C{last}). - Supports both C{list} and C{dict} semantics for C{pop()}. If passed no - argument or an integer argument, it will use C{list} semantics - and pop tokens from the list of parsed tokens. If passed a - non-integer argument (most likely a string), it will use C{dict} - semantics and pop the corresponding value from any defined - results names. A second default return value argument is - supported, just as in C{dict.pop()}. - - Example:: - def remove_first(tokens): - tokens.pop(0) - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] - - label = Word(alphas) - patt = label("LABEL") + OneOrMore(Word(nums)) - print(patt.parseString("AAB 123 321").dump()) - - # Use pop() in a parse action to remove named result (note that corresponding value is not - # removed from list form of results) - def remove_LABEL(tokens): - tokens.pop("LABEL") - return tokens - patt.addParseAction(remove_LABEL) - print(patt.parseString("AAB 123 321").dump()) - prints:: - ['AAB', '123', '321'] - - LABEL: AAB - - ['AAB', '123', '321'] - """ - if not args: - args = [-1] - for k,v in kwargs.items(): - if k == 'default': - args = (args[0], v) - else: - raise TypeError("pop() got an unexpected keyword argument '%s'" % k) - if (isinstance(args[0], int) or - len(args) == 1 or - args[0] in self): - index = args[0] - ret = self[index] - del self[index] - return ret - else: - defaultvalue = args[1] - return defaultvalue - - def get(self, key, defaultValue=None): - """ - Returns named result matching the given key, or if there is no - such name, then returns the given C{defaultValue} or C{None} if no - C{defaultValue} is specified. - - Similar to C{dict.get()}. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString("1999/12/31") - print(result.get("year")) # -> '1999' - print(result.get("hour", "not specified")) # -> 'not specified' - print(result.get("hour")) # -> None - """ - if key in self: - return self[key] - else: - return defaultValue - - def insert( self, index, insStr ): - """ - Inserts new element at location index in the list of parsed tokens. - - Similar to C{list.insert()}. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to insert the parse location in the front of the parsed results - def insert_locn(locn, tokens): - tokens.insert(0, locn) - print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] - """ - self.__toklist.insert(index, insStr) - # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) - - def append( self, item ): - """ - Add single element to end of ParseResults list of elements. - - Example:: - print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to compute the sum of the parsed integers, and add it to the end - def append_sum(tokens): - tokens.append(sum(map(int, tokens))) - print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] - """ - self.__toklist.append(item) - - def extend( self, itemseq ): - """ - Add sequence of elements to end of ParseResults list of elements. - - Example:: - patt = OneOrMore(Word(alphas)) - - # use a parse action to append the reverse of the matched strings, to make a palindrome - def make_palindrome(tokens): - tokens.extend(reversed([t[::-1] for t in tokens])) - return ''.join(tokens) - print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' - """ - if isinstance(itemseq, ParseResults): - self += itemseq - else: - self.__toklist.extend(itemseq) - - def clear( self ): - """ - Clear all elements and results names. - """ - del self.__toklist[:] - self.__tokdict.clear() - - def __getattr__( self, name ): - try: - return self[name] - except KeyError: - return "" - - if name in self.__tokdict: - if name not in self.__accumNames: - return self.__tokdict[name][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) - else: - return "" - - def __add__( self, other ): - ret = self.copy() - ret += other - return ret - - def __iadd__( self, other ): - if other.__tokdict: - offset = len(self.__toklist) - addoffset = lambda a: offset if a<0 else a+offset - otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: - self[k] = v - if isinstance(v[0],ParseResults): - v[0].__parent = wkref(self) - - self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) - return self - - def __radd__(self, other): - if isinstance(other,int) and other == 0: - # useful for merging many ParseResults using sum() builtin - return self.copy() - else: - # this may raise a TypeError - so be it - return other + self - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - - def __str__( self ): - return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' - - def _asStringList( self, sep='' ): - out = [] - for item in self.__toklist: - if out and sep: - out.append(sep) - if isinstance( item, ParseResults ): - out += item._asStringList() - else: - out.append( _ustr(item) ) - return out - - def asList( self ): - """ - Returns the parse results as a nested list of matching tokens, all converted to strings. - - Example:: - patt = OneOrMore(Word(alphas)) - result = patt.parseString("sldkj lsdkj sldkj") - # even though the result prints in string-like form, it is actually a pyparsing ParseResults - print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - - # Use asList() to create an actual list - result_list = result.asList() - print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] - """ - return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] - - def asDict( self ): - """ - Returns the named parse results as a nested dictionary. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - - result_dict = result.asDict() - print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} - - # even though a ParseResults supports dict-like access, sometime you just need to have a dict - import json - print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable - print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} - """ - if PY_3: - item_fn = self.items - else: - item_fn = self.iteritems - - def toItem(obj): - if isinstance(obj, ParseResults): - if obj.haskeys(): - return obj.asDict() - else: - return [toItem(v) for v in obj] - else: - return obj - - return dict((k,toItem(v)) for k,v in item_fn()) - - def copy( self ): - """ - Returns a new copy of a C{ParseResults} object. - """ - ret = ParseResults( self.__toklist ) - ret.__tokdict = self.__tokdict.copy() - ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) - ret.__name = self.__name - return ret - - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """ - (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. - """ - nl = "\n" - out = [] - namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist) - nextLevelIndent = indent + " " - - # collapse out indents if formatting is not desired - if not formatted: - indent = "" - nextLevelIndent = "" - nl = "" - - selfTag = None - if doctag is not None: - selfTag = doctag - else: - if self.__name: - selfTag = self.__name - - if not selfTag: - if namedItemsOnly: - return "" - else: - selfTag = "ITEM" - - out += [ nl, indent, "<", selfTag, ">" ] - - for i,res in enumerate(self.__toklist): - if isinstance(res,ParseResults): - if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - # individual token, see if there is a name for it - resTag = None - if i in namedItems: - resTag = namedItems[i] - if not resTag: - if namedItemsOnly: - continue - else: - resTag = "ITEM" - xmlBodyText = _xml_escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] - return "".join(out) - - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: - if sub is v: - return k - return None - - def getName(self): - r""" - Returns the results name for this token expression. Useful when several - different expressions might match at a particular location. - - Example:: - integer = Word(nums) - ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") - house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") - | Group(ssn_expr)("ssn") - | Group(integer)("age")) - user_info = OneOrMore(user_data) - - result = user_info.parseString("22 111-22-3333 #221B") - for item in result: - print(item.getName(), ':', item[0]) - prints:: - age : 22 - ssn : 111-22-3333 - house_number : 221B - """ - if self.__name: - return self.__name - elif self.__parent: - par = self.__parent() - if par: - return par.__lookup(self) - else: - return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - next(iter(self.__tokdict.values()))[0][1] in (0,-1)): - return next(iter(self.__tokdict.keys())) - else: - return None - - def dump(self, indent='', depth=0, full=True): - """ - Diagnostic method for listing out the contents of a C{ParseResults}. - Accepts an optional C{indent} argument so that this string can be embedded - in a nested display of other data. - - Example:: - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parseString('12/31/1999') - print(result.dump()) - prints:: - ['12', '/', '31', '/', '1999'] - - day: 1999 - - month: 31 - - year: 12 - """ - out = [] - NL = '\n' - out.append( indent+_ustr(self.asList()) ) - if full: - if self.haskeys(): - items = sorted((str(k), v) for k,v in self.items()) - for k,v in items: - if out: - out.append(NL) - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): - if v: - out.append( v.dump(indent,depth+1) ) - else: - out.append(_ustr(v)) - else: - out.append(repr(v)) - elif any(isinstance(vv,ParseResults) for vv in self): - v = self - for i,vv in enumerate(v): - if isinstance(vv,ParseResults): - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) - else: - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) - - return "".join(out) - - def pprint(self, *args, **kwargs): - """ - Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the - C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) - - Example:: - ident = Word(alphas, alphanums) - num = Word(nums) - func = Forward() - term = ident | num | Group('(' + func + ')') - func <<= ident + Group(Optional(delimitedList(term))) - result = func.parseString("fna a,b,(fnb c,d,200),100") - result.pprint(width=40) - prints:: - ['fna', - ['a', - 'b', - ['(', 'fnb', ['c', 'd', '200'], ')'], - '100']] - """ - pprint.pprint(self.asList(), *args, **kwargs) - - # add support for pickle protocol - def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): - self.__toklist = state[0] - (self.__tokdict, - par, - inAccumNames, - self.__name) = state[1] - self.__accumNames = {} - self.__accumNames.update(inAccumNames) - if par is not None: - self.__parent = wkref(par) - else: - self.__parent = None - - def __getnewargs__(self): - return self.__toklist, self.__name, self.__asList, self.__modal - - def __dir__(self): - return (dir(type(self)) + list(self.keys())) - -MutableMapping.register(ParseResults) - -def col (loc,strg): - """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - s = strg - return 1 if 0} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ - lastCR = strg.rfind("\n", 0, loc) - nextCR = strg.find("\n", loc) - if nextCR >= 0: - return strg[lastCR+1:nextCR] - else: - return strg[lastCR+1:] - -def _defaultStartDebugAction( instring, loc, expr ): - print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) - -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) - -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) - -def nullDebugAction(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - pass - -# Only works on Python 3.x - nonlocal is toxic to Python 2 installs -#~ 'decorator to trim function calls to match the arity of the target' -#~ def _trim_arity(func, maxargs=3): - #~ if func in singleArgBuiltins: - #~ return lambda s,l,t: func(t) - #~ limit = 0 - #~ foundArity = False - #~ def wrapper(*args): - #~ nonlocal limit,foundArity - #~ while 1: - #~ try: - #~ ret = func(*args[limit:]) - #~ foundArity = True - #~ return ret - #~ except TypeError: - #~ if limit == maxargs or foundArity: - #~ raise - #~ limit += 1 - #~ continue - #~ return wrapper - -# this version is Python 2.x-3.x cross-compatible -'decorator to trim function calls to match the arity of the target' -def _trim_arity(func, maxargs=2): - if func in singleArgBuiltins: - return lambda s,l,t: func(t) - limit = [0] - foundArity = [False] - - # traceback return data structure changed in Py3.5 - normalize back to plain tuples - if system_version[:2] >= (3,5): - def extract_stack(limit=0): - # special handling for Python 3.5.0 - extra deep call stack by 1 - offset = -3 if system_version == (3,5,0) else -2 - frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] - return [frame_summary[:2]] - def extract_tb(tb, limit=0): - frames = traceback.extract_tb(tb, limit=limit) - frame_summary = frames[-1] - return [frame_summary[:2]] - else: - extract_stack = traceback.extract_stack - extract_tb = traceback.extract_tb - - # synthesize what would be returned by traceback.extract_stack at the call to - # user's parse action 'func', so that we don't incur call penalty at parse time - - LINE_DIFF = 6 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND - # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - this_line = extract_stack(limit=2)[-1] - pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) - - def wrapper(*args): - while 1: - try: - ret = func(*args[limit[0]:]) - foundArity[0] = True - return ret - except TypeError: - # re-raise TypeErrors if they did not come from our arity testing - if foundArity[0]: - raise - else: - try: - tb = sys.exc_info()[-1] - if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: - raise - finally: - del tb - - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - - # copy func name to wrapper for sensible debug output - func_name = "" - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - wrapper.__name__ = func_name - - return wrapper - -class ParserElement(object): - """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" - verbose_stacktrace = False - - @staticmethod - def setDefaultWhitespaceChars( chars ): - r""" - Overrides the default whitespace chars - - Example:: - # default whitespace chars are space, and newline - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - - # change to just treat newline as significant - ParserElement.setDefaultWhitespaceChars(" \t") - OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - - @staticmethod - def inlineLiteralsUsing(cls): - """ - Set class to be used for inclusion of string literals into a parser. - - Example:: - # default literal class used is Literal - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - - # change to Suppress - ParserElement.inlineLiteralsUsing(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] - """ - ParserElement._literalStringClass = cls - - def __init__( self, savelist=False ): - self.parseAction = list() - self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall - self.strRepr = None - self.resultsName = None - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion - self.keepTabs = False - self.ignoreExprs = list() - self.debug = False - self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index - self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions - self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse - self.callDuringTry = False - - def copy( self ): - """ - Make a copy of this C{ParserElement}. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element. - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") - integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - - print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) - prints:: - [5120, 100, 655360, 268435456] - Equivalent form of C{expr.copy()} is just C{expr()}:: - integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - """ - cpy = copy.copy( self ) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - return cpy - - def setName( self, name ): - """ - Define name for this expression, makes debugging and exception messages clearer. - - Example:: - Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) - Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) - """ - self.name = name - self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg - return self - - def setResultsName( self, name, listAllMatches=False ): - """ - Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original C{ParserElement} object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - - see L{I{__call__}<__call__>}. - - Example:: - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' - + integer.setResultsName("day")) - - # equivalent form: - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - """ - newself = self.copy() - if name.endswith("*"): - name = name[:-1] - listAllMatches=True - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def setBreak(self,breakFlag = True): - """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. - """ - if breakFlag: - _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - pdb.set_trace() - return _parseMethod( instring, loc, doActions, callPreParse ) - breaker._originalParseMethod = _parseMethod - self._parse = breaker - else: - if hasattr(self._parse,"_originalParseMethod"): - self._parse = self._parse._originalParseMethod - return self - - def setParseAction( self, *fns, **kwargs ): - """ - Define one or more actions to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, - C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Optional keyword arguments: - - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - - Example:: - integer = Word(nums) - date_str = integer + '/' + integer + '/' + integer - - date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - # use parse action to convert to ints at parse time - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - date_str = integer + '/' + integer + '/' + integer - - # note that integer fields are now ints, not strings - date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] - """ - self.parseAction = list(map(_trim_arity, list(fns))) - self.callDuringTry = kwargs.get("callDuringTry", False) - return self - - def addParseAction( self, *fns, **kwargs ): - """ - Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}. - - See examples in L{I{copy}}. - """ - self.parseAction += list(map(_trim_arity, list(fns))) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def addCondition(self, *fns, **kwargs): - """Add a boolean predicate function to expression's list of parse actions. See - L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, - functions passed to C{addCondition} need to return boolean success/fail of the condition. - - Optional keyword arguments: - - message = define a custom message to be used in the raised exception - - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - - Example:: - integer = Word(nums).setParseAction(lambda toks: int(toks[0])) - year_int = integer.copy() - year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") - date_str = year_int + '/' + integer + '/' + integer - - result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) - """ - msg = kwargs.get("message", "failed user-defined condition") - exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException - for fn in fns: - def pa(s,l,t): - if not bool(_trim_arity(fn)(s,l,t)): - raise exc_type(s,l,msg) - self.parseAction.append(pa) - self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) - return self - - def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{L{ParseFatalException}} - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables( self, instring, loc ): - exprsFound = True - while exprsFound: - exprsFound = False - for e in self.ignoreExprs: - try: - while 1: - loc,dummy = e._parse( instring, loc ) - exprsFound = True - except ParseException: - pass - return loc - - def preParse( self, instring, loc ): - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - - if self.skipWhitespace: - wt = self.whiteChars - instrlen = len(instring) - while loc < instrlen and instring[loc] in wt: - loc += 1 - - return loc - - def parseImpl( self, instring, loc, doActions=True ): - return loc, [] - - def postParse( self, instring, loc, tokenlist ): - return tokenlist - - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) - - if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException as err: - #~ print ("Exception raised:", err) - if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) - if self.failAction: - self.failAction( instring, tokensStart, self, err ) - raise - else: - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - if self.mayIndexError or preloc >= len(instring): - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - - tokens = self.postParse( instring, loc, tokens ) - - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - except ParseBaseException as err: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) - raise - else: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) - - return loc, retTokens - - def tryParse( self, instring, loc ): - try: - return self._parse( instring, loc, doActions=False )[0] - except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - - def canParseNext(self, instring, loc): - try: - self.tryParse(instring, loc) - except (ParseException, IndexError): - return False - else: - return True - - class _UnboundedCache(object): - def __init__(self): - cache = {} - self.not_in_cache = not_in_cache = object() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - if _OrderedDict is not None: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = _OrderedDict() - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(cache) > size: - try: - cache.popitem(False) - except KeyError: - pass - - def clear(self): - cache.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - else: - class _FifoCache(object): - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - - cache = {} - key_fifo = collections.deque([], size) - - def get(self, key): - return cache.get(key, not_in_cache) - - def set(self, key, value): - cache[key] = value - while len(key_fifo) > size: - cache.pop(key_fifo.popleft(), None) - key_fifo.append(key) - - def clear(self): - cache.clear() - key_fifo.clear() - - def cache_len(self): - return len(cache) - - self.get = types.MethodType(get, self) - self.set = types.MethodType(set, self) - self.clear = types.MethodType(clear, self) - self.__len__ = types.MethodType(cache_len, self) - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail - packrat_cache_lock = RLock() - packrat_cache_stats = [0, 0] - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): - HIT, MISS = 0, 1 - lookup = (self, instring, loc, callPreParse, doActions) - with ParserElement.packrat_cache_lock: - cache = ParserElement.packrat_cache - value = cache.get(lookup) - if value is cache.not_in_cache: - ParserElement.packrat_cache_stats[MISS] += 1 - try: - value = self._parseNoCache(instring, loc, doActions, callPreParse) - except ParseBaseException as pe: - # cache a copy of the exception, without the traceback - cache.set(lookup, pe.__class__(*pe.args)) - raise - else: - cache.set(lookup, (value[0], value[1].copy())) - return value - else: - ParserElement.packrat_cache_stats[HIT] += 1 - if isinstance(value, Exception): - raise value - return (value[0], value[1].copy()) - - _parse = _parseNoCache - - @staticmethod - def resetCache(): - ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) - - _packratEnabled = False - @staticmethod - def enablePackrat(cache_size_limit=128): - """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - cache_size_limit - (default=C{128}) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - - Example:: - import pyparsing - pyparsing.ParserElement.enablePackrat() - """ - if not ParserElement._packratEnabled: - ParserElement._packratEnabled = True - if cache_size_limit is None: - ParserElement.packrat_cache = ParserElement._UnboundedCache() - else: - ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) - ParserElement._parse = ParserElement._parseCache - - def parseString( self, instring, parseAll=False ): - """ - Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{L{StringEnd()}}). - - Note: C{parseString} implicitly calls C{expandtabs()} on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the C{loc} argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling C{parseWithTabs} on your grammar before calling C{parseString} - (see L{I{parseWithTabs}}) - - define your parse action using the full C{(s,loc,toks)} signature, and - reference the input string using the parse action's C{s} argument - - explictly expand the tabs in your input string before calling - C{parseString} - - Example:: - Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] - Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text - """ - ParserElement.resetCache() - if not self.streamlined: - self.streamline() - #~ self.saveAsList = True - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - try: - loc, tokens = self._parse( instring, 0 ) - if parseAll: - loc = self.preParse( instring, loc ) - se = Empty() + StringEnd() - se._parse( instring, loc ) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - else: - return tokens - - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): - """ - Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. If - C{overlap} is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs. - - Example:: - source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" - print(source) - for tokens,start,end in Word(alphas).scanString(source): - print(' '*start + '^'*(end-start)) - print(' '*start + tokens[0]) - - prints:: - - sldjf123lsdjjkf345sldkjf879lkjsfd987 - ^^^^^ - sldjf - ^^^^^^^ - lsdjjkf - ^^^^^^ - sldkjf - ^^^^^^ - lkjsfd - """ - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = _ustr(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - try: - while loc <= instrlen and matches < maxMatches: - try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) - except ParseException: - loc = preloc+1 - else: - if nextLoc > loc: - matches += 1 - yield tokens, preloc, nextLoc - if overlap: - nextloc = preparseFn( instring, loc ) - if nextloc > loc: - loc = nextLoc - else: - loc += 1 - else: - loc = nextLoc - else: - loc = preloc+1 - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def transformString( self, instring ): - """ - Extension to C{L{scanString}}, to modify matching text with modified tokens that may - be returned from a parse action. To use C{transformString}, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking C{transformString()} on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. C{transformString()} returns the resulting transformed string. - - Example:: - wd = Word(alphas) - wd.setParseAction(lambda toks: toks[0].title()) - - print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) - Prints:: - Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. - """ - out = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transformString and scanString - self.keepTabs = True - try: - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) - if t: - if isinstance(t,ParseResults): - out += t.asList() - elif isinstance(t,list): - out += t - else: - out.append(t) - lastE = e - out.append(instring[lastE:]) - out = [o for o in out if o] - return "".join(map(_ustr,_flatten(out))) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def searchString( self, instring, maxMatches=_MAX_INT ): - """ - Another extension to C{L{scanString}}, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - C{maxMatches} argument, to clip searching after 'n' matches are found. - - Example:: - # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters - cap_word = Word(alphas.upper(), alphas.lower()) - - print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) - - # the sum() builtin can be used to merge results into a single ParseResults object - print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) - prints:: - [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] - ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] - """ - try: - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): - """ - Generator method to split a string using the given expression as a separator. - May be called with optional C{maxsplit} argument, to limit the number of splits; - and the optional C{includeSeparators} argument (default=C{False}), if the separating - matching text should be included in the split results. - - Example:: - punc = oneOf(list(".,;:/-!?")) - print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) - prints:: - ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] - """ - splits = 0 - last = 0 - for t,s,e in self.scanString(instring, maxMatches=maxsplit): - yield instring[last:s] - if includeSeparators: - yield t[0] - last = e - yield instring[last:] - - def __add__(self, other ): - """ - Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement - converts them to L{Literal}s by default. - - Example:: - greet = Word(alphas) + "," + Word(alphas) + "!" - hello = "Hello, World!" - print (hello, "->", greet.parseString(hello)) - Prints:: - Hello, World! -> ['Hello', ',', 'World', '!'] - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, other ] ) - - def __radd__(self, other ): - """ - Implementation of + operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other + self - - def __sub__(self, other): - """ - Implementation of - operator, returns C{L{And}} with error stop - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return self + And._ErrorStop() + other - - def __rsub__(self, other ): - """ - Implementation of - operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other - self - - def __mul__(self,other): - """ - Implementation of * operator, allows use of C{expr * 3} in place of - C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer - tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples - may also include C{None} as in: - - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + L{ZeroOrMore}(expr)} - (read as "at least n instances of C{expr}") - - C{expr*(None,n)} is equivalent to C{expr*(0,n)} - (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} - - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} - - Note that C{expr*(None,n)} does not raise an exception if - more than n exprs exist in the input stream; that is, - C{expr*(None,n)} does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - C{expr*(None,n) + ~expr} - """ - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): - other = (other + (None, None))[:2] - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): - minElements, optElements = other - optElements -= minElements - else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) - else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") - if minElements == optElements == 0: - raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - - if (optElements): - def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) - else: - return Optional(self) - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self]*minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self]*minElements) - return ret - - def __rmul__(self, other): - return self.__mul__(other) - - def __or__(self, other ): - """ - Implementation of | operator - returns C{L{MatchFirst}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return MatchFirst( [ self, other ] ) - - def __ror__(self, other ): - """ - Implementation of | operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other | self - - def __xor__(self, other ): - """ - Implementation of ^ operator - returns C{L{Or}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Or( [ self, other ] ) - - def __rxor__(self, other ): - """ - Implementation of ^ operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other ^ self - - def __and__(self, other ): - """ - Implementation of & operator - returns C{L{Each}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Each( [ self, other ] ) - - def __rand__(self, other ): - """ - Implementation of & operator when left operand is not a C{L{ParserElement}} - """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other & self - - def __invert__( self ): - """ - Implementation of ~ operator - returns C{L{NotAny}} - """ - return NotAny( self ) - - def __call__(self, name=None): - """ - Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. - - If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be - passed as C{True}. - - If C{name} is omitted, same as calling C{L{copy}}. - - Example:: - # these are equivalent - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - """ - if name is not None: - return self.setResultsName(name) - else: - return self.copy() - - def suppress( self ): - """ - Suppresses the output of this C{ParserElement}; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress( self ) - - def leaveWhitespace( self ): - """ - Disables the skipping of whitespace before matching the characters in the - C{ParserElement}'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - """ - self.skipWhitespace = False - return self - - def setWhitespaceChars( self, chars ): - """ - Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = chars - self.copyDefaultWhiteChars = False - return self - - def parseWithTabs( self ): - """ - Overrides default behavior to expand C{}s to spaces before parsing the input string. - Must be called before C{parseString} when the input grammar contains elements that - match C{} characters. - """ - self.keepTabs = True - return self - - def ignore( self, other ): - """ - Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - - Example:: - patt = OneOrMore(Word(alphas)) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] - - patt.ignore(cStyleComment) - patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] - """ - if isinstance(other, basestring): - other = Suppress(other) - - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - self.ignoreExprs.append(other) - else: - self.ignoreExprs.append( Suppress( other.copy() ) ) - return self - - def setDebugActions( self, startAction, successAction, exceptionAction ): - """ - Enable display of debugging messages while doing pattern matching. - """ - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) - self.debug = True - return self - - def setDebug( self, flag=True ): - """ - Enable display of debugging messages while doing pattern matching. - Set C{flag} to True to enable, False to disable. - - Example:: - wd = Word(alphas).setName("alphaword") - integer = Word(nums).setName("numword") - term = wd | integer - - # turn on debugging for wd - wd.setDebug() - - OneOrMore(term).parseString("abc 123 xyz 890") - - prints:: - Match alphaword at loc 0(1,1) - Matched alphaword -> ['abc'] - Match alphaword at loc 3(1,4) - Exception raised:Expected alphaword (at char 4), (line:1, col:5) - Match alphaword at loc 7(1,8) - Matched alphaword -> ['xyz'] - Match alphaword at loc 11(1,12) - Exception raised:Expected alphaword (at char 12), (line:1, col:13) - Match alphaword at loc 15(1,16) - Exception raised:Expected alphaword (at char 15), (line:1, col:16) - - The output shown is that produced by the default debug actions - custom debug actions can be - specified using L{setDebugActions}. Prior to attempting - to match the C{wd} expression, the debugging message C{"Match at loc (,)"} - is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} - message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, - which makes debugging and exception messages easier to understand - for instance, the default - name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. - """ - if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) - else: - self.debug = False - return self - - def __str__( self ): - return self.name - - def __repr__( self ): - return _ustr(self) - - def streamline( self ): - self.streamlined = True - self.strRepr = None - return self - - def checkRecursion( self, parseElementList ): - pass - - def validate( self, validateTrace=[] ): - """ - Check defined expressions for valid structure, check for infinite recursive definitions. - """ - self.checkRecursion( [] ) - - def parseFile( self, file_or_filename, parseAll=False ): - """ - Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - try: - file_contents = file_or_filename.read() - except AttributeError: - with open(file_or_filename, "r") as f: - file_contents = f.read() - try: - return self.parseString(file_contents, parseAll) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc - - def __eq__(self,other): - if isinstance(other, ParserElement): - return self is other or vars(self) == vars(other) - elif isinstance(other, basestring): - return self.matches(other) - else: - return super(ParserElement,self)==other - - def __ne__(self,other): - return not (self == other) - - def __hash__(self): - return hash(id(self)) - - def __req__(self,other): - return self == other - - def __rne__(self,other): - return not (self == other) - - def matches(self, testString, parseAll=True): - """ - Method for quick testing of a parser against a test string. Good for simple - inline microtests of sub expressions while building up larger parser. - - Parameters: - - testString - to test against this expression for a match - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - Example:: - expr = Word(nums) - assert expr.matches("100") - """ - try: - self.parseString(_ustr(testString), parseAll=parseAll) - return True - except ParseBaseException: - return False - - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): - """ - Execute the parse expression on a series of test strings, showing each - test, the parsed results or where the parse failed. Quick and easy way to - run a parse expression against a list of sample strings. - - Parameters: - - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - comment - (default=C{'#'}) - expression for indicating embedded comments in the test - string; pass None to disable comment filtering - - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; - if False, only dump nested list - - printResults - (default=C{True}) prints test output to stdout - - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing - - Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTests} is True), and the results contain a list of lines of each - test's output - - Example:: - number_expr = pyparsing_common.number.copy() - - result = number_expr.runTests(''' - # unsigned integer - 100 - # negative integer - -100 - # float with scientific notation - 6.02e23 - # integer with scientific notation - 1e-12 - ''') - print("Success" if result[0] else "Failed!") - - result = number_expr.runTests(''' - # stray character - 100Z - # missing leading digit before '.' - -.100 - # too many '.' - 3.14.159 - ''', failureTests=True) - print("Success" if result[0] else "Failed!") - prints:: - # unsigned integer - 100 - [100] - - # negative integer - -100 - [-100] - - # float with scientific notation - 6.02e23 - [6.02e+23] - - # integer with scientific notation - 1e-12 - [1e-12] - - Success - - # stray character - 100Z - ^ - FAIL: Expected end of text (at char 3), (line:1, col:4) - - # missing leading digit before '.' - -.100 - ^ - FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) - - # too many '.' - 3.14.159 - ^ - FAIL: Expected end of text (at char 4), (line:1, col:5) - - Success - - Each test string must be on a single line. If you want to test a string that spans multiple - lines, create a test like this:: - - expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") - - (Note that this is a raw string literal, you must include the leading 'r'.) - """ - if isinstance(tests, basestring): - tests = list(map(str.strip, tests.rstrip().splitlines())) - if isinstance(comment, basestring): - comment = Literal(comment) - allResults = [] - comments = [] - success = True - for t in tests: - if comment is not None and comment.matches(t, False) or comments and not t: - comments.append(t) - continue - if not t: - continue - out = ['\n'.join(comments), t] - comments = [] - try: - t = t.replace(r'\n','\n') - result = self.parseString(t, parseAll=parseAll) - out.append(result.dump(full=fullDump)) - success = success and not failureTests - except ParseBaseException as pe: - fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if '\n' in t: - out.append(line(pe.loc, t)) - out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) - else: - out.append(' '*pe.loc + '^' + fatal) - out.append("FAIL: " + str(pe)) - success = success and failureTests - result = pe - except Exception as exc: - out.append("FAIL-EXCEPTION: " + str(exc)) - success = success and failureTests - result = exc - - if printResults: - if fullDump: - out.append('') - print('\n'.join(out)) - - allResults.append((t, result)) - - return success, allResults - - -class Token(ParserElement): - """ - Abstract C{ParserElement} subclass, for defining atomic matching patterns. - """ - def __init__( self ): - super(Token,self).__init__( savelist=False ) - - -class Empty(Token): - """ - An empty token, will always match. - """ - def __init__( self ): - super(Empty,self).__init__() - self.name = "Empty" - self.mayReturnEmpty = True - self.mayIndexError = False - - -class NoMatch(Token): - """ - A token that will never match. - """ - def __init__( self ): - super(NoMatch,self).__init__() - self.name = "NoMatch" - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - - def parseImpl( self, instring, loc, doActions=True ): - raise ParseException(instring, loc, self.errmsg, self) - - -class Literal(Token): - """ - Token to exactly match a specified string. - - Example:: - Literal('blah').parseString('blah') # -> ['blah'] - Literal('blah').parseString('blahfooblah') # -> ['blah'] - Literal('blah').parseString('bla') # -> Exception: Expected "blah" - - For case-insensitive matching, use L{CaselessLiteral}. - - For keyword matching (force word break before and after the matched string), - use L{Keyword} or L{CaselessKeyword}. - """ - def __init__( self, matchString ): - super(Literal,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.__class__ = Empty - self.name = '"%s"' % _ustr(self.match) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - - # Performance tuning: this routine gets called a *lot* - # if this is a single character match string and the first character matches, - # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) -_L = Literal -ParserElement._literalStringClass = Literal - -class Keyword(Token): - """ - Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{L{Literal}}: - - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. - - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} - Accepts two optional constructor arguments in addition to the keyword string: - - C{identChars} is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$" - - C{caseless} allows case-insensitive matching, default is C{False}. - - Example:: - Keyword("start").parseString("start") # -> ['start'] - Keyword("start").parseString("starting") # -> Exception - - For case-insensitive matching, use L{CaselessKeyword}. - """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - - def __init__( self, matchString, identChars=None, caseless=False ): - super(Keyword,self).__init__() - if identChars is None: - identChars = Keyword.DEFAULT_KEYWORD_CHARS - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = matchString.upper() - identChars = identChars.upper() - self.identChars = set(identChars) - - def parseImpl( self, instring, loc, doActions=True ): - if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - - def copy(self): - c = super(Keyword,self).copy() - c.identChars = Keyword.DEFAULT_KEYWORD_CHARS - return c - - @staticmethod - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - -class CaselessLiteral(Literal): - """ - Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - - Example:: - OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] - - (Contrast with example for L{CaselessKeyword}.) - """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) - # Preserve the defining literal. - self.returnString = matchString - self.name = "'%s'" % self.returnString - self.errmsg = "Expected " + self.name - - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString - raise ParseException(instring, loc, self.errmsg, self) - -class CaselessKeyword(Keyword): - """ - Caseless version of L{Keyword}. - - Example:: - OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] - - (Contrast with example for L{CaselessLiteral}.) - """ - def __init__( self, matchString, identChars=None ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - -class CloseMatch(Token): - """ - A variation on L{Literal} which matches "close" matches, that is, - strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: - - C{match_string} - string to be matched - - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match - - The results from a successful parse will contain the matched text from the input string and the following named results: - - C{mismatches} - a list of the positions within the match_string where mismatches were found - - C{original} - the original match_string used to compare against the input string - - If C{mismatches} is an empty list, then the match was an exact match. - - Example:: - patt = CloseMatch("ATCATCGAATGGA") - patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) - patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) - - # exact match - patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) - - # close match allowing up to 2 mismatches - patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) - patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) - """ - def __init__(self, match_string, maxMismatches=1): - super(CloseMatch,self).__init__() - self.name = match_string - self.match_string = match_string - self.maxMismatches = maxMismatches - self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) - self.mayIndexError = False - self.mayReturnEmpty = False - - def parseImpl( self, instring, loc, doActions=True ): - start = loc - instrlen = len(instring) - maxloc = start + len(self.match_string) - - if maxloc <= instrlen: - match_string = self.match_string - match_stringloc = 0 - mismatches = [] - maxMismatches = self.maxMismatches - - for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): - src,mat = s_m - if src != mat: - mismatches.append(match_stringloc) - if len(mismatches) > maxMismatches: - break - else: - loc = match_stringloc + 1 - results = ParseResults([instring[start:loc]]) - results['original'] = self.match_string - results['mismatches'] = mismatches - return loc, results - - raise ParseException(instring, loc, self.errmsg, self) - - -class Word(Token): - """ - Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in - the input C{bodyChars} string; useful to define a word of all printables - except for one or two characters, for instance. - - L{srange} is useful for defining custom character set strings for defining - C{Word} expressions, using range notation from regular expression character sets. - - A common mistake is to use C{Word} to match a specific literal string, as in - C{Word("Address")}. Remember that C{Word} uses the string argument to define - I{sets} of matchable characters. This expression would match "Add", "AAA", - "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. - To match an exact literal string, use L{Literal} or L{Keyword}. - - pyparsing includes helper strings for building Words: - - L{alphas} - - L{nums} - - L{alphanums} - - L{hexnums} - - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) - - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) - - L{printables} (any non-whitespace character) - - Example:: - # a word composed of digits - integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - - # a word with a leading capital, and zero or more lowercase - capital_word = Word(alphas.upper(), alphas.lower()) - - # hostnames are alphanumeric, with leading alpha, and '-' - hostname = Word(alphas, alphanums+'-') - - # roman numeral (not a strict parser, accepts invalid mix of characters) - roman = Word("IVXLCDM") - - # any string of non-whitespace characters, except for ',' - csv_value = Word(printables, excludeChars=",") - """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): - super(Word,self).__init__() - if excludeChars: - initChars = ''.join(c for c in initChars if c not in excludeChars) - if bodyChars: - bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) - self.initCharsOrig = initChars - self.initChars = set(initChars) - if bodyChars : - self.bodyCharsOrig = bodyChars - self.bodyChars = set(bodyChars) - else: - self.bodyCharsOrig = initChars - self.bodyChars = set(initChars) - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.asKeyword = asKeyword - - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): - if self.bodyCharsOrig == self.initCharsOrig: - self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) - elif len(self.initCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" - try: - self.re = re.compile( self.reString ) - except Exception: - self.re = None - - def parseImpl( self, instring, loc, doActions=True ): - if self.re: - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - return loc, result.group() - - if not(instring[ loc ] in self.initChars): - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc4: - return s[:4]+"..." - else: - return s - - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) - else: - self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) - - return self.strRepr - - -class Regex(Token): - r""" - Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as - named parse results. - - Example:: - realnum = Regex(r"[+-]?\d+\.\d*") - date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') - # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression - roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") - """ - compiledREtype = type(re.compile("[A-Z]")) - def __init__( self, pattern, flags=0): - """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() - - if isinstance(pattern, basestring): - if not pattern: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise - - elif isinstance(pattern, Regex.compiledREtype): - self.re = pattern - self.pattern = \ - self.reString = str(pattern) - self.flags = flags - - else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - d = result.groupdict() - ret = ParseResults(result.group()) - if d: - for k in d: - ret[k] = d[k] - return loc,ret - - def __str__( self ): - try: - return super(Regex,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "Re:(%s)" % repr(self.pattern) - - return self.strRepr - - -class QuotedString(Token): - r""" - Token for matching strings that are delimited by quoting characters. - - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=C{None}) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) - - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) - - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) - - Example:: - qs = QuotedString('"') - print(qs.searchString('lsjdf "This is the quote" sldjf')) - complex_qs = QuotedString('{{', endQuoteChar='}}') - print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) - sql_qs = QuotedString('"', escQuote='""') - print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) - prints:: - [['This is the quote']] - [['This is the "quote"']] - [['This is the quote with "embedded" quotes']] - """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - super(QuotedString,self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if not quoteChar: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if not endQuoteChar: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - self.convertWhitespaceEscapes = convertWhitespaceEscapes - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' - ) - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - - if isinstance(ret,basestring): - # replace escaped whitespace - if '\\' in ret and self.convertWhitespaceEscapes: - ws_map = { - r'\t' : '\t', - r'\n' : '\n', - r'\f' : '\f', - r'\r' : '\r', - } - for wslit,wschar in ws_map.items(): - ret = ret.replace(wslit, wschar) - - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__( self ): - try: - return super(QuotedString,self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """ - Token for matching words composed of characters I{not} in a given set (will - include whitespace in matched characters if not listed in the provided exclusion set - see example). - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. - - Example:: - # define a comma-separated-value as anything that is not a ',' - csv_value = CharsNotIn(',') - print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) - prints:: - ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] - """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) - self.mayIndexError = False - - def parseImpl( self, instring, loc, doActions=True ): - if instring[loc] in self.notChars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - def __str__( self ): - try: - return super(CharsNotIn, self).__str__() - except Exception: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """ - Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{L{Word}} class. - """ - whiteStrs = { - " " : "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() - self.matchWhite = ws - self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) - #~ self.leaveWhitespace() - self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): - raise ParseException(instring, loc, self.errmsg, self) - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """ - Token to advance to a specific column of input text; useful for tabular report scraping. - """ - def __init__( self, colno ): - super(GoToColumn,self).__init__() - self.col = colno - - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) - if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) - newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] - return newloc, ret - - -class LineStart(_PositionToken): - """ - Matches if current position is at the beginning of a line within the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (LineStart() + 'AAA' + restOfLine).searchString(test): - print(t) - - Prints:: - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - def __init__( self ): - super(LineStart,self).__init__() - self.errmsg = "Expected start of line" - - def parseImpl( self, instring, loc, doActions=True ): - if col(loc, instring) == 1: - return loc, [] - raise ParseException(instring, loc, self.errmsg, self) - -class LineEnd(_PositionToken): - """ - Matches if current position is at the end of a line within the parse string - """ - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) - self.errmsg = "Expected end of line" - - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): - return loc, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - -class WordStart(_PositionToken): - """ - Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of - the string being parsed, or at the beginning of a line. - """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True ): - if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - -class WordEnd(_PositionToken): - """ - Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of - the string being parsed, or at the end of a line. - """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True ): - instrlen = len(instring) - if instrlen>0 and loc maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - else: - # save match among all matches, to retry longest to shortest - matches.append((loc2, e)) - - if matches: - matches.sort(key=lambda x: -x[0]) - for _,e in matches: - try: - return e._parse( instring, loc, doActions ) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #Or( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class MatchFirst(ParseExpression): - """ - Requires that at least one C{ParseExpression} is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the C{'|'} operator. - - Example:: - # construct MatchFirst using '|' operator - - # watch the order of expressions to match - number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) - print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] - - # put more selective expression first - number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) - print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] - """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse( instring, loc, doActions ) - return ret - except ParseException as err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class Each(ParseExpression): - """ - Requires all given C{ParseExpression}s to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the C{'&'} operator. - - Example:: - color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") - shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") - integer = Word(nums) - shape_attr = "shape:" + shape_type("shape") - posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") - color_attr = "color:" + color("color") - size_attr = "size:" + integer("size") - - # use Each (using operator '&') to accept attributes in any order - # (shape and posn are required, color and size are optional) - shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) - - shape_spec.runTests(''' - shape: SQUARE color: BLACK posn: 100, 120 - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - color:GREEN size:20 shape:TRIANGLE posn:20,40 - ''' - ) - prints:: - shape: SQUARE color: BLACK posn: 100, 120 - ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] - - color: BLACK - - posn: ['100', ',', '120'] - - x: 100 - - y: 120 - - shape: SQUARE - - - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] - - color: BLUE - - posn: ['50', ',', '80'] - - x: 50 - - y: 80 - - shape: CIRCLE - - size: 50 - - - color: GREEN size: 20 shape: TRIANGLE posn: 20,40 - ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] - - color: GREEN - - posn: ['20', ',', '40'] - - x: 20 - - y: 40 - - shape: TRIANGLE - - size: 20 - """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = True - self.initExprGroups = True - - def parseImpl( self, instring, loc, doActions=True ): - if self.initExprGroups: - self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) - opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] - self.optionals = opt1 + opt2 - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse( instring, tmpLoc ) - except ParseException: - failed.append(e) - else: - matchOrder.append(self.opt1map.get(id(e),e)) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] - - resultlist = [] - for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) - resultlist.append(results) - - finalResults = sum(resultlist, ParseResults([])) - return loc, finalResults - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class ParseElementEnhance(ParserElement): - """ - Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. - """ - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): - if issubclass(ParserElement._literalStringClass, Token): - expr = ParserElement._literalStringClass(expr) - else: - expr = ParserElement._literalStringClass(Literal(expr)) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) - else: - raise ParseException("",loc,self.errmsg,self) - - def leaveWhitespace( self ): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore( self, other ): - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - else: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - return self - - def streamline( self ): - super(ParseElementEnhance,self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion( self, parseElementList ): - if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] - if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) - - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion( [] ) - - def __str__( self ): - try: - return super(ParseElementEnhance,self).__str__() - except Exception: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """ - Lookahead matching of the given parse expression. C{FollowedBy} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current - position. C{FollowedBy} always returns a null token list. - - Example:: - # use FollowedBy to match a label only if it is followed by a ':' - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() - prints:: - [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] - """ - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) - return loc, [] - - -class NotAny(ParseElementEnhance): - """ - Lookahead to disallow matching with the given parse expression. C{NotAny} - does I{not} advance the parsing position within the input string, it only - verifies that the specified parse expression does I{not} match at the current - position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} - always returns a null token list. May be constructed using the '~' operator. - - Example:: - - """ - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr.canParseNext(instring, loc): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - -class _MultipleMatch(ParseElementEnhance): - def __init__( self, expr, stopOn=None): - super(_MultipleMatch, self).__init__(expr) - self.saveAsList = True - ender = stopOn - if isinstance(ender, basestring): - ender = ParserElement._literalStringClass(ender) - self.not_ender = ~ender if ender is not None else None - - def parseImpl( self, instring, loc, doActions=True ): - self_expr_parse = self.expr._parse - self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: - try_not_ender = self.not_ender.tryParse - - # must be at least one (but first see if we are the stopOn sentinel; - # if so, fail) - if check_ender: - try_not_ender(instring, loc) - loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) - try: - hasIgnoreExprs = (not not self.ignoreExprs) - while 1: - if check_ender: - try_not_ender(instring, loc) - if hasIgnoreExprs: - preloc = self_skip_ignorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self_expr_parse( instring, preloc, doActions ) - if tmptokens or tmptokens.haskeys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - -class OneOrMore(_MultipleMatch): - """ - Repetition of one or more of the given expression. - - Parameters: - - expr - expression that must match one or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: BLACK" - OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] - - # use stopOn attribute for OneOrMore to avoid reading label string as part of the data - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - - # could also be written as - (attr_expr * (1,)).parseString(text).pprint() - """ - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - -class ZeroOrMore(_MultipleMatch): - """ - Optional repetition of zero or more of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example: similar to L{OneOrMore} - """ - def __init__( self, expr, stopOn=None): - super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException,IndexError): - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -_optionalNotMatched = _NullToken() -class Optional(ParseElementEnhance): - """ - Optional matching of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - default (optional) - value to be returned if the optional expression is not found. - - Example:: - # US postal code can be a 5-digit zip, plus optional 4-digit qualifier - zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) - zip.runTests(''' - # traditional ZIP code - 12345 - - # ZIP+4 form - 12101-0001 - - # invalid ZIP - 98765- - ''') - prints:: - # traditional ZIP code - 12345 - ['12345'] - - # ZIP+4 form - 12101-0001 - ['12101-0001'] - - # invalid ZIP - 98765- - ^ - FAIL: Expected end of text (at char 5), (line:1, col:6) - """ - def __init__( self, expr, default=_optionalNotMatched ): - super(Optional,self).__init__( expr, savelist=False ) - self.saveAsList = self.expr.saveAsList - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): - if self.defaultValue is not _optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [ self.defaultValue ] - else: - tokens = [] - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - -class SkipTo(ParseElementEnhance): - """ - Token for skipping over all undefined text until the matched expression is found. - - Parameters: - - expr - target expression marking the end of the data to be skipped - - include - (default=C{False}) if True, the target expression is also parsed - (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=C{None}) used to define grammars (typically quoted strings and - comments) that might contain false matches to the target expression - - failOn - (default=C{None}) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, - the SkipTo is not a match - - Example:: - report = ''' - Outstanding Issues Report - 1 Jan 2000 - - # | Severity | Description | Days Open - -----+----------+-------------------------------------------+----------- - 101 | Critical | Intermittent system crash | 6 - 94 | Cosmetic | Spelling error on Login ('log|n') | 14 - 79 | Minor | System slow when running too many reports | 47 - ''' - integer = Word(nums) - SEP = Suppress('|') - # use SkipTo to simply match everything up until the next SEP - # - ignore quoted strings, so that a '|' character inside a quoted string does not match - # - parse action will call token.strip() for each matched token, i.e., the description body - string_data = SkipTo(SEP, ignore=quotedString) - string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP - + integer("days_open")) - - for tkt in ticket_expr.searchString(report): - print tkt.dump() - prints:: - ['101', 'Critical', 'Intermittent system crash', '6'] - - days_open: 6 - - desc: Intermittent system crash - - issue_num: 101 - - sev: Critical - ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] - - days_open: 14 - - desc: Spelling error on Login ('log|n') - - issue_num: 94 - - sev: Cosmetic - ['79', 'Minor', 'System slow when running too many reports', '47'] - - days_open: 47 - - desc: System slow when running too many reports - - issue_num: 79 - - sev: Minor - """ - def __init__( self, other, include=False, ignore=None, failOn=None ): - super( SkipTo, self ).__init__( other ) - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.asList = False - if isinstance(failOn, basestring): - self.failOn = ParserElement._literalStringClass(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - startloc = loc - instrlen = len(instring) - expr = self.expr - expr_parse = self.expr._parse - self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None - self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - - tmploc = loc - while tmploc <= instrlen: - if self_failOn_canParseNext is not None: - # break if failOn expression matches - if self_failOn_canParseNext(instring, tmploc): - break - - if self_ignoreExpr_tryParse is not None: - # advance past ignore expressions - while 1: - try: - tmploc = self_ignoreExpr_tryParse(instring, tmploc) - except ParseBaseException: - break - - try: - expr_parse(instring, tmploc, doActions=False, callPreParse=False) - except (ParseException, IndexError): - # no match, advance loc in string - tmploc += 1 - else: - # matched skipto expr, done - break - - else: - # ran off the end of the input string without matching skipto expr, fail - raise ParseException(instring, loc, self.errmsg, self) - - # build up return values - loc = tmploc - skiptext = instring[startloc:loc] - skipresult = ParseResults(skiptext) - - if self.includeMatch: - loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) - skipresult += mat - - return loc, skipresult - -class Forward(ParseElementEnhance): - """ - Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. - - Note: take care when assigning to C{Forward} not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the C{Forward}:: - fwdExpr << (a | b | c) - Converting to use the '<<=' operator instead will avoid this problem. - - See L{ParseResults.pprint} for an example of a recursive parser created using - C{Forward}. - """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - - def __lshift__( self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass(other) - self.expr = other - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return self - - def __ilshift__(self, other): - return self << other - - def leaveWhitespace( self ): - self.skipWhitespace = False - return self - - def streamline( self ): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate( self, validateTrace=[] ): - if self not in validateTrace: - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - return self.__class__.__name__ + ": ..." - - # stubbed out for now - creates awful memory and perf issues - self._revertClass = self.__class__ - self.__class__ = _ForwardNoRecurse - try: - if self.expr is not None: - retString = _ustr(self.expr) - else: - retString = "None" - finally: - self.__class__ = self._revertClass - return self.__class__.__name__ + ": " + retString - - def copy(self): - if self.expr is not None: - return super(Forward,self).copy() - else: - ret = Forward() - ret <<= self - return ret - -class _ForwardNoRecurse(Forward): - def __str__( self ): - return "..." - -class TokenConverter(ParseElementEnhance): - """ - Abstract subclass of C{ParseExpression}, for converting parsed results. - """ - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) - self.saveAsList = False - -class Combine(TokenConverter): - """ - Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying C{'adjacent=False'} in the constructor. - - Example:: - real = Word(nums) + '.' + Word(nums) - print(real.parseString('3.1416')) # -> ['3', '.', '1416'] - # will also erroneously match the following - print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] - - real = Combine(Word(nums) + '.' + Word(nums)) - print(real.parseString('3.1416')) # -> ['3.1416'] - # no match when there are internal spaces - print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) - """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore( self, other ): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super( Combine, self).ignore( other ) - return self - - def postParse( self, instring, loc, tokenlist ): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) - - if self.resultsName and retToks.haskeys(): - return [ retToks ] - else: - return retToks - -class Group(TokenConverter): - """ - Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. - - Example:: - ident = Word(alphas) - num = Word(nums) - term = ident | num - func = ident + Optional(delimitedList(term)) - print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] - - func = ident + Group(Optional(delimitedList(term))) - print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] - """ - def __init__( self, expr ): - super(Group,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] - -class Dict(TokenConverter): - """ - Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. - - Example:: - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - # print attributes as plain groups - print(OneOrMore(attr_expr).parseString(text).dump()) - - # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names - result = Dict(OneOrMore(Group(attr_expr))).parseString(text) - print(result.dump()) - - # access named fields as dict entries, or output as dict - print(result['shape']) - print(result.asDict()) - prints:: - ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} - See more examples at L{ParseResults} of accessing fields by results name. - """ - def __init__( self, expr ): - super(Dict,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey,int): - ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) - else: - dictvalue = tok.copy() #ParseResults(i) - del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) - - if self.resultsName: - return [ tokenlist ] - else: - return tokenlist - - -class Suppress(TokenConverter): - """ - Converter for ignoring the results of a parsed expression. - - Example:: - source = "a, b, c,d" - wd = Word(alphas) - wd_list1 = wd + ZeroOrMore(',' + wd) - print(wd_list1.parseString(source)) - - # often, delimiters that are useful during parsing are just in the - # way afterward - use Suppress to keep them out of the parsed output - wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) - print(wd_list2.parseString(source)) - prints:: - ['a', ',', 'b', ',', 'c', ',', 'd'] - ['a', 'b', 'c', 'd'] - (See also L{delimitedList}.) - """ - def postParse( self, instring, loc, tokenlist ): - return [] - - def suppress( self ): - return self - - -class OnlyOnce(object): - """ - Wrapper for parse actions, to ensure they are only called once. - """ - def __init__(self, methodCall): - self.callable = _trim_arity(methodCall) - self.called = False - def __call__(self,s,l,t): - if not self.called: - results = self.callable(s,l,t) - self.called = True - return results - raise ParseException(s,l,"") - def reset(self): - self.called = False - -def traceParseAction(f): - """ - Decorator for debugging parse actions. - - When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} - When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. - - Example:: - wd = Word(alphas) - - @traceParseAction - def remove_duplicate_chars(tokens): - return ''.join(sorted(set(''.join(tokens)))) - - wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) - print(wds.parseString("slkdjs sld sldd sdlf sdljf")) - prints:: - >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) - <3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) - try: - ret = f(*paArgs) - except Exception as exc: - sys.stderr.write( "< ['aa', 'bb', 'cc'] - delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." - if combine: - return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) - else: - return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) - -def countedArray( expr, intExpr=None ): - """ - Helper to define a counted list of expressions. - This helper defines a pattern of the form:: - integer expr expr expr... - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. - - If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. - - Example:: - countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) - countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] - """ - arrayExpr = Forward() - def countFieldParseAction(s,l,t): - n = t[0] - arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) - return [] - if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t:int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.setName("arrayLen") - intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') - -def _flatten(L): - ret = [] - for i in L: - if isinstance(i,list): - ret.extend(_flatten(i)) - else: - ret.append(i) - return ret - -def matchPreviousLiteral(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousLiteral(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches a - previous literal, will also match the leading C{"1:1"} in C{"1:10"}. - If this is not desired, use C{matchPreviousExpr}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - def copyTokenToRepeater(s,l,t): - if t: - if len(t) == 1: - rep << t[0] - else: - # flatten t tokens - tflat = _flatten(t.asList()) - rep << And(Literal(tt) for tt in tflat) - else: - rep << Empty() - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def matchPreviousExpr(expr): - """ - Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks - for a 'repeat' of a previous expression. For example:: - first = Word(nums) - second = matchPreviousExpr(first) - matchExpr = first + ":" + second - will match C{"1:1"}, but not C{"1:2"}. Because this matches by - expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; - the expressions are evaluated first, and then compared, so - C{"1"} is compared with C{"10"}. - Do I{not} use with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - def copyTokenToRepeater(s,l,t): - matchTokens = _flatten(t.asList()) - def mustMatchTheseTokens(s,l,t): - theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException("",0,"") - rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def _escapeRegexRangeChars(s): - #~ escape these chars: ^-] - for c in r"\^-]": - s = s.replace(c,_bslash+c) - s = s.replace("\n",r"\n") - s = s.replace("\t",r"\t") - return _ustr(s) - -def oneOf( strs, caseless=False, useRegex=True ): - """ - Helper to quickly define a set of alternative Literals, and makes sure to do - longest-first testing when there is a conflict, regardless of the input order, - but returns a C{L{MatchFirst}} for best performance. - - Parameters: - - strs - a string of space-delimited literals, or a collection of string literals - - caseless - (default=C{False}) - treat all literals as caseless - - useRegex - (default=C{True}) - as an optimization, will generate a Regex - object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or - if creating a C{Regex} raises an exception) - - Example:: - comp_oper = oneOf("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) - prints:: - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - if caseless: - isequal = ( lambda a,b: a.upper() == b.upper() ) - masks = ( lambda a,b: b.upper().startswith(a.upper()) ) - parseElementClass = CaselessLiteral - else: - isequal = ( lambda a,b: a == b ) - masks = ( lambda a,b: b.startswith(a) ) - parseElementClass = Literal - - symbols = [] - if isinstance(strs,basestring): - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - warnings.warn("Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, stacklevel=2) - if not symbols: - return NoMatch() - - i = 0 - while i < len(symbols)-1: - cur = symbols[i] - for j,other in enumerate(symbols[i+1:]): - if ( isequal(other, cur) ): - del symbols[i+j+1] - break - elif ( masks(cur, other) ): - del symbols[i+j+1] - symbols.insert(i,other) - cur = other - break - else: - i += 1 - - if not caseless and useRegex: - #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) - try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) - else: - return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) - except Exception: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) - -def dictOf( key, value ): - """ - Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the C{Dict} results can include named token - fields. - - Example:: - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - print(OneOrMore(attr_expr).parseString(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) - - # similar to Dict, but simpler call format - result = dictOf(attr_label, attr_value).parseString(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.asDict()) - prints:: - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) - -def originalTextFor(expr, asString=True): - """ - Helper to return the original, untokenized text for a given expression. Useful to - restore the parsed fields of an HTML start tag into the raw tag text itself, or to - revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if - the expression passed to C{L{originalTextFor}} contains expressions with defined - results names, you must set C{asString} to C{False} if you want to preserve those - results name values. - - Example:: - src = "this is test bold text normal text " - for tag in ("b","i"): - opener,closer = makeHTMLTags(tag) - patt = originalTextFor(opener + SkipTo(closer) + closer) - print(patt.searchString(src)[0]) - prints:: - [' bold text '] - ['text'] - """ - locMarker = Empty().setParseAction(lambda s,loc,t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s,l,t: s[t._original_start:t._original_end] - else: - def extractText(s,l,t): - t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] - matchExpr.setParseAction(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - return matchExpr - -def ungroup(expr): - """ - Helper to undo pyparsing's default grouping of And expressions, even - if all but one are non-empty. - """ - return TokenConverter(expr).setParseAction(lambda t:t[0]) - -def locatedExpr(expr): - """ - Helper to decorate a returned token with its starting and ending locations in the input string. - This helper adds the following results names: - - locn_start = location where matched expression begins - - locn_end = location where matched expression ends - - value = the actual parsed results - - Be careful if the input text contains C{} characters, you may want to call - C{L{ParserElement.parseWithTabs}} - - Example:: - wd = Word(alphas) - for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): - print(match) - prints:: - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().setParseAction(lambda s,l,t: l) - return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) - - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" - -def srange(s): - r""" - Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be: - - a single character - - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) - - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) - """ - _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) - try: - return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) - except Exception: - return "" - -def matchOnlyAtCol(n): - """ - Helper method for defining parse actions that require matching at a specific - column in the input text. - """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """ - Helper method for common parse actions that simply return a literal value. Especially - useful when used with C{L{transformString}()}. - - Example:: - num = Word(nums).setParseAction(lambda toks: int(toks[0])) - na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) - term = na | num - - OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] - """ - return lambda s,l,t: [replStr] - -def removeQuotes(s,l,t): - """ - Helper parse action for removing quotation marks from parsed quoted strings. - - Example:: - # by default, quotation marks are included in parsed results - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] - - # use removeQuotes to strip quotation marks from parsed results - quotedString.setParseAction(removeQuotes) - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] - """ - return t[0][1:-1] - -def tokenMap(func, *args): - """ - Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional - args are passed, they are forwarded to the given function as additional arguments after - the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the - parsed data to an integer using base 16. - - Example (compare the last to example in L{ParserElement.transformString}:: - hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) - hex_ints.runTests(''' - 00 11 22 aa FF 0a 0d 1a - ''') - - upperword = Word(alphas).setParseAction(tokenMap(str.upper)) - OneOrMore(upperword).runTests(''' - my kingdom for a horse - ''') - - wd = Word(alphas).setParseAction(tokenMap(str.title)) - OneOrMore(wd).setParseAction(' '.join).runTests(''' - now is the winter of our discontent made glorious summer by this sun of york - ''') - prints:: - 00 11 22 aa FF 0a 0d 1a - [0, 17, 34, 170, 255, 10, 13, 26] - - my kingdom for a horse - ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] - - now is the winter of our discontent made glorious summer by this sun of york - ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] - """ - def pa(s,l,t): - return [func(tokn, *args) for tokn in t] - - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - pa.__name__ = func_name - - return pa - -upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) -"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" - -downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) -"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" - -def _makeTags(tagStr, xml): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - else: - printablesLessRAbrack = "".join(c for c in printables if c not in ">") - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - closeTag = Combine(_L("") - - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname) - openTag.tag = resname - closeTag.tag = resname - return openTag, closeTag - -def makeHTMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches - tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. - - Example:: - text = 'More info at the pyparsing wiki page' - # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple - a,a_end = makeHTMLTags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.searchString(text): - # attributes in the tag (like "href" shown here) are also accessible as named results - print(link.link_text, '->', link.href) - prints:: - pyparsing -> http://pyparsing.wikispaces.com - """ - return _makeTags( tagStr, False ) - -def makeXMLTags(tagStr): - """ - Helper to construct opening and closing tag expressions for XML, given a tag name. Matches - tags only in the given upper/lower case. - - Example: similar to L{makeHTMLTags} - """ - return _makeTags( tagStr, True ) - -def withAttribute(*args,**attrDict): - """ - Helper to create a validating parse action to be used with start tags created - with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - C{} or C{
}. - - Call C{withAttribute} with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in C{(align="right")}, or - - as an explicit dict with C{**} operator, when an attribute name is also a Python - reserved word, as in C{**{"class":"Customer", "align":"right"}} - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - If just testing for C{class} (with or without a namespace), use C{L{withClass}}. - - To verify that the attribute exists, but without specifying a value, pass - C{withAttribute.ANY_VALUE} as the value. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this has no type
-
- - ''' - div,div_end = makeHTMLTags("div") - - # only match div tag having a type attribute with value "grid" - div_grid = div().setParseAction(withAttribute(type="grid")) - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - # construct a match with any div tag having a type attribute, regardless of the value - div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: - if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -def withClass(classname, namespace=''): - """ - Simplified version of C{L{withAttribute}} when matching on a div class - made - difficult because C{class} is a reserved word in Python. - - Example:: - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this <div> has no class
-
- - ''' - div,div_end = makeHTMLTags("div") - div_grid = div().setParseAction(withClass("grid")) - - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - prints:: - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr : classname}) - -opAssoc = _Constants() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): - """ - Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. The generated parser will also recognize the use - of parentheses to override operator precedences (see example below). - - Note: if you define a deep operator list, you may see performance issues - when using infixNotation. See L{ParserElement.enablePackrat} for a - mechanism to potentially improve your parser performance. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted); if the parse action - is passed a tuple or list of functions, this is equivalent to - calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) - - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) - - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) - - Example:: - # simple example of four-function arithmetic with ints and variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infixNotation(integer | varname, - [ - ('-', 1, opAssoc.RIGHT), - (oneOf('* /'), 2, opAssoc.LEFT), - (oneOf('+ -'), 2, opAssoc.LEFT), - ]) - - arith_expr.runTests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', fullDump=False) - prints:: - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - ret = Forward() - lastExpr = baseExpr | ( lpar + ret + rpar ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] - termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward().setName(termName) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) - else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) - else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.setParseAction(*pa) - else: - matchExpr.setParseAction(pa) - thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) - lastExpr = thisExpr - ret <<= lastExpr - return ret - -operatorPrecedence = infixNotation -"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" - -dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") -sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") -quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """ - Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression - - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression - - content - expression for items within the nested lists (default=C{None}) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the C{ignoreExpr} argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. - The default is L{quotedString}, but if no expressions are to be ignored, - then pass C{None} for this argument. - - Example:: - data_type = oneOf("void int short long char float double") - decl_data_type = Combine(data_type + Optional(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR,RPAR = map(Suppress, "()") - - code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(cStyleComment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.searchString(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - prints:: - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if len(opener) == 1 and len(closer)==1: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t:t[0].strip())) - else: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - ~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) - else: - ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - ret.setName('nested %s%s expression' % (opener,closer)) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """ - Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=C{True}) - - A valid block must contain at least one C{blockStatement}. - - Example:: - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group( funcDecl + func_body ) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << ( funcDef | assignment | identifier ) - - module_body = OneOrMore(stmt) - - parseTree = module_body.parseString(data) - parseTree.pprint() - prints:: - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") - - def checkSubIndent(s,l,t): - curCol = col(l,s) - if curCol > indentStack[-1]: - indentStack.append( curCol ) - else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') - PEER = Empty().setParseAction(checkPeerIndent).setName('') - UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') - if indent: - smExpr = Group( Optional(NL) + - #~ FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) - else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName('indented block') - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) -commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") -def replaceHTMLEntity(t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") -"Comment of the form C{/* ... */}" - -htmlComment = Regex(r"").setName("HTML comment") -"Comment of the form C{}" - -restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") -dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") -"Comment of the form C{// ... (to end of line)}" - -cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") -"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" - -javaStyleComment = cppStyleComment -"Same as C{L{cppStyleComment}}" - -pythonStyleComment = Regex(r"#.*").setName("Python style comment") -"Comment of the form C{# ... (to end of line)}" - -_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + - Optional( Word(" \t") + - ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") -commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") -"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. - This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """ - Here are some common low-level expressions that may be useful in jump-starting parser development: - - numeric forms (L{integers}, L{reals}, L{scientific notation}) - - common L{programming identifiers} - - network addresses (L{MAC}, L{IPv4}, L{IPv6}) - - ISO8601 L{dates} and L{datetime} - - L{UUID} - - L{comma-separated list} - Parse actions: - - C{L{convertToInteger}} - - C{L{convertToFloat}} - - C{L{convertToDate}} - - C{L{convertToDatetime}} - - C{L{stripHTMLTags}} - - C{L{upcaseTokens}} - - C{L{downcaseTokens}} - - Example:: - pyparsing_common.number.runTests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.runTests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.runTests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.runTests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.runTests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - prints:: - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convertToInteger = tokenMap(int) - """ - Parse action for converting parsed integers to Python int - """ - - convertToFloat = tokenMap(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).setName("integer").setParseAction(convertToInteger) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0]/t[-1]) - - mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.addParseAction(sum) - - real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) - """expression that parses a floating point number and returns a float""" - - sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) - """expression that parses a floating point number with optional scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) - """any int or real number, returned as float""" - - identifier = Word(alphas+'_', alphanums+'_').setName("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") - "IPv4 address (C{0.0.0.0 - 255.255.255.255})" - - _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") - _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") - _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convertToDate(fmt="%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) - - Example:: - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) - prints:: - [datetime.date(1999, 12, 31)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt).date() - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - @staticmethod - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): - """ - Helper to create a parse action for converting parsed datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) - - Example:: - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) - prints:: - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - def cvt_fn(s,l,t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") - "ISO8601 date (C{yyyy-mm-dd})" - - iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") - "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" - - uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") - "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" - - _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - @staticmethod - def stripHTMLTags(s, l, tokens): - """ - Parse action to remove HTML tags from web page HTML source - - Example:: - # strip HTML links from normal text - text = 'More info at the
pyparsing wiki page' - td,td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - - print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' - """ - return pyparsing_common._html_stripper.transformString(tokens[0]) - - _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') - + Optional( White(" \t") ) ) ).streamline().setName("commaItem") - comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) - """Parse action to convert tokens to upper case.""" - - downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) - """Parse action to convert tokens to lower case.""" - - -if __name__ == "__main__": - - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") - - ident = Word(alphas, alphanums + "_$") - - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = ('*' | columnNameList) - - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") - - # demo runTests method, including embedded comments in test string - simpleSQL.runTests(""" - # '*' as column list and dotted table name - select * from SYS.XYZZY - - # caseless match on "SELECT", and casts back to "select" - SELECT * from XYZZY, ABC - - # list of column names, and mixed case SELECT keyword - Select AA,BB,CC from Sys.dual - - # multiple tables - Select A, B, C from Sys.dual, Table2 - - # invalid SELECT keyword - should fail - Xelect A, B, C from Sys.dual - - # incomplete command - should fail - Select - - # invalid column name - should fail - Select ^^^ frox Sys.dual - - """) - - pyparsing_common.number.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - # any int or real number, returned as float - pyparsing_common.fnumber.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - pyparsing_common.hex_integer.runTests(""" - 100 - FF - """) - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(""" - 12345678-1234-5678-1234-567812345678 - """) diff --git a/lib/pkg_resources/_vendor/typing_extensions.py b/lib/pkg_resources/_vendor/typing_extensions.py new file mode 100644 index 00000000..ef42417c --- /dev/null +++ b/lib/pkg_resources/_vendor/typing_extensions.py @@ -0,0 +1,2209 @@ +import abc +import collections +import collections.abc +import functools +import operator +import sys +import types as _types +import typing + + +__all__ = [ + # Super-special typing primitives. + 'Any', + 'ClassVar', + 'Concatenate', + 'Final', + 'LiteralString', + 'ParamSpec', + 'ParamSpecArgs', + 'ParamSpecKwargs', + 'Self', + 'Type', + 'TypeVar', + 'TypeVarTuple', + 'Unpack', + + # ABCs (from collections.abc). + 'Awaitable', + 'AsyncIterator', + 'AsyncIterable', + 'Coroutine', + 'AsyncGenerator', + 'AsyncContextManager', + 'ChainMap', + + # Concrete collection types. + 'ContextManager', + 'Counter', + 'Deque', + 'DefaultDict', + 'NamedTuple', + 'OrderedDict', + 'TypedDict', + + # Structural checks, a.k.a. protocols. + 'SupportsIndex', + + # One-off things. + 'Annotated', + 'assert_never', + 'assert_type', + 'clear_overloads', + 'dataclass_transform', + 'get_overloads', + 'final', + 'get_args', + 'get_origin', + 'get_type_hints', + 'IntVar', + 'is_typeddict', + 'Literal', + 'NewType', + 'overload', + 'override', + 'Protocol', + 'reveal_type', + 'runtime', + 'runtime_checkable', + 'Text', + 'TypeAlias', + 'TypeGuard', + 'TYPE_CHECKING', + 'Never', + 'NoReturn', + 'Required', + 'NotRequired', +] + +# for backward compatibility +PEP_560 = True +GenericMeta = type + +# The functions below are modified copies of typing internal helpers. +# They are needed by _ProtocolMeta and they provide support for PEP 646. + +_marker = object() + + +def _check_generic(cls, parameters, elen=_marker): + """Check correct count for parameters of a generic cls (internal helper). + This gives a nice error message in case of count mismatch. + """ + if not elen: + raise TypeError(f"{cls} is not a generic class") + if elen is _marker: + if not hasattr(cls, "__parameters__") or not cls.__parameters__: + raise TypeError(f"{cls} is not a generic class") + elen = len(cls.__parameters__) + alen = len(parameters) + if alen != elen: + if hasattr(cls, "__parameters__"): + parameters = [p for p in cls.__parameters__ if not _is_unpack(p)] + num_tv_tuples = sum(isinstance(p, TypeVarTuple) for p in parameters) + if (num_tv_tuples > 0) and (alen >= elen - num_tv_tuples): + return + raise TypeError(f"Too {'many' if alen > elen else 'few'} parameters for {cls};" + f" actual {alen}, expected {elen}") + + +if sys.version_info >= (3, 10): + def _should_collect_from_parameters(t): + return isinstance( + t, (typing._GenericAlias, _types.GenericAlias, _types.UnionType) + ) +elif sys.version_info >= (3, 9): + def _should_collect_from_parameters(t): + return isinstance(t, (typing._GenericAlias, _types.GenericAlias)) +else: + def _should_collect_from_parameters(t): + return isinstance(t, typing._GenericAlias) and not t._special + + +def _collect_type_vars(types, typevar_types=None): + """Collect all type variable contained in types in order of + first appearance (lexicographic order). For example:: + + _collect_type_vars((T, List[S, T])) == (T, S) + """ + if typevar_types is None: + typevar_types = typing.TypeVar + tvars = [] + for t in types: + if ( + isinstance(t, typevar_types) and + t not in tvars and + not _is_unpack(t) + ): + tvars.append(t) + if _should_collect_from_parameters(t): + tvars.extend([t for t in t.__parameters__ if t not in tvars]) + return tuple(tvars) + + +NoReturn = typing.NoReturn + +# Some unconstrained type variables. These are used by the container types. +# (These are not for export.) +T = typing.TypeVar('T') # Any type. +KT = typing.TypeVar('KT') # Key type. +VT = typing.TypeVar('VT') # Value type. +T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers. +T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant. + + +if sys.version_info >= (3, 11): + from typing import Any +else: + + class _AnyMeta(type): + def __instancecheck__(self, obj): + if self is Any: + raise TypeError("typing_extensions.Any cannot be used with isinstance()") + return super().__instancecheck__(obj) + + def __repr__(self): + if self is Any: + return "typing_extensions.Any" + return super().__repr__() + + class Any(metaclass=_AnyMeta): + """Special type indicating an unconstrained type. + - Any is compatible with every type. + - Any assumed to have all methods. + - All values assumed to be instances of Any. + Note that all the above statements are true from the point of view of + static type checkers. At runtime, Any should not be used with instance + checks. + """ + def __new__(cls, *args, **kwargs): + if cls is Any: + raise TypeError("Any cannot be instantiated") + return super().__new__(cls, *args, **kwargs) + + +ClassVar = typing.ClassVar + +# On older versions of typing there is an internal class named "Final". +# 3.8+ +if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7): + Final = typing.Final +# 3.7 +else: + class _FinalForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + Final = _FinalForm('Final', + doc="""A special typing construct to indicate that a name + cannot be re-assigned or overridden in a subclass. + For example: + + MAX_SIZE: Final = 9000 + MAX_SIZE += 1 # Error reported by type checker + + class Connection: + TIMEOUT: Final[int] = 10 + class FastConnector(Connection): + TIMEOUT = 1 # Error reported by type checker + + There is no runtime checking of these properties.""") + +if sys.version_info >= (3, 11): + final = typing.final +else: + # @final exists in 3.8+, but we backport it for all versions + # before 3.11 to keep support for the __final__ attribute. + # See https://bugs.python.org/issue46342 + def final(f): + """This decorator can be used to indicate to type checkers that + the decorated method cannot be overridden, and decorated class + cannot be subclassed. For example: + + class Base: + @final + def done(self) -> None: + ... + class Sub(Base): + def done(self) -> None: # Error reported by type checker + ... + @final + class Leaf: + ... + class Other(Leaf): # Error reported by type checker + ... + + There is no runtime checking of these properties. The decorator + sets the ``__final__`` attribute to ``True`` on the decorated object + to allow runtime introspection. + """ + try: + f.__final__ = True + except (AttributeError, TypeError): + # Skip the attribute silently if it is not writable. + # AttributeError happens if the object has __slots__ or a + # read-only property, TypeError if it's a builtin class. + pass + return f + + +def IntVar(name): + return typing.TypeVar(name) + + +# 3.8+: +if hasattr(typing, 'Literal'): + Literal = typing.Literal +# 3.7: +else: + class _LiteralForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return typing._GenericAlias(self, parameters) + + Literal = _LiteralForm('Literal', + doc="""A type that can be used to indicate to type checkers + that the corresponding value has a value literally equivalent + to the provided parameter. For example: + + var: Literal[4] = 4 + + The type checker understands that 'var' is literally equal to + the value 4 and no other value. + + Literal[...] cannot be subclassed. There is no runtime + checking verifying that the parameter is actually a value + instead of a type.""") + + +_overload_dummy = typing._overload_dummy # noqa + + +if hasattr(typing, "get_overloads"): # 3.11+ + overload = typing.overload + get_overloads = typing.get_overloads + clear_overloads = typing.clear_overloads +else: + # {module: {qualname: {firstlineno: func}}} + _overload_registry = collections.defaultdict( + functools.partial(collections.defaultdict, dict) + ) + + def overload(func): + """Decorator for overloaded functions/methods. + + In a stub file, place two or more stub definitions for the same + function in a row, each decorated with @overload. For example: + + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... + + In a non-stub file (i.e. a regular .py file), do the same but + follow it with an implementation. The implementation should *not* + be decorated with @overload. For example: + + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... + def utf8(value): + # implementation goes here + + The overloads for a function can be retrieved at runtime using the + get_overloads() function. + """ + # classmethod and staticmethod + f = getattr(func, "__func__", func) + try: + _overload_registry[f.__module__][f.__qualname__][ + f.__code__.co_firstlineno + ] = func + except AttributeError: + # Not a normal function; ignore. + pass + return _overload_dummy + + def get_overloads(func): + """Return all defined overloads for *func* as a sequence.""" + # classmethod and staticmethod + f = getattr(func, "__func__", func) + if f.__module__ not in _overload_registry: + return [] + mod_dict = _overload_registry[f.__module__] + if f.__qualname__ not in mod_dict: + return [] + return list(mod_dict[f.__qualname__].values()) + + def clear_overloads(): + """Clear all overloads in the registry.""" + _overload_registry.clear() + + +# This is not a real generic class. Don't use outside annotations. +Type = typing.Type + +# Various ABCs mimicking those in collections.abc. +# A few are simply re-exported for completeness. + + +Awaitable = typing.Awaitable +Coroutine = typing.Coroutine +AsyncIterable = typing.AsyncIterable +AsyncIterator = typing.AsyncIterator +Deque = typing.Deque +ContextManager = typing.ContextManager +AsyncContextManager = typing.AsyncContextManager +DefaultDict = typing.DefaultDict + +# 3.7.2+ +if hasattr(typing, 'OrderedDict'): + OrderedDict = typing.OrderedDict +# 3.7.0-3.7.2 +else: + OrderedDict = typing._alias(collections.OrderedDict, (KT, VT)) + +Counter = typing.Counter +ChainMap = typing.ChainMap +AsyncGenerator = typing.AsyncGenerator +NewType = typing.NewType +Text = typing.Text +TYPE_CHECKING = typing.TYPE_CHECKING + + +_PROTO_WHITELIST = ['Callable', 'Awaitable', + 'Iterable', 'Iterator', 'AsyncIterable', 'AsyncIterator', + 'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', + 'ContextManager', 'AsyncContextManager'] + + +def _get_protocol_attrs(cls): + attrs = set() + for base in cls.__mro__[:-1]: # without object + if base.__name__ in ('Protocol', 'Generic'): + continue + annotations = getattr(base, '__annotations__', {}) + for attr in list(base.__dict__.keys()) + list(annotations.keys()): + if (not attr.startswith('_abc_') and attr not in ( + '__abstractmethods__', '__annotations__', '__weakref__', + '_is_protocol', '_is_runtime_protocol', '__dict__', + '__args__', '__slots__', + '__next_in_mro__', '__parameters__', '__origin__', + '__orig_bases__', '__extra__', '__tree_hash__', + '__doc__', '__subclasshook__', '__init__', '__new__', + '__module__', '_MutableMapping__marker', '_gorg')): + attrs.add(attr) + return attrs + + +def _is_callable_members_only(cls): + return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls)) + + +def _maybe_adjust_parameters(cls): + """Helper function used in Protocol.__init_subclass__ and _TypedDictMeta.__new__. + + The contents of this function are very similar + to logic found in typing.Generic.__init_subclass__ + on the CPython main branch. + """ + tvars = [] + if '__orig_bases__' in cls.__dict__: + tvars = typing._collect_type_vars(cls.__orig_bases__) + # Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn]. + # If found, tvars must be a subset of it. + # If not found, tvars is it. + # Also check for and reject plain Generic, + # and reject multiple Generic[...] and/or Protocol[...]. + gvars = None + for base in cls.__orig_bases__: + if (isinstance(base, typing._GenericAlias) and + base.__origin__ in (typing.Generic, Protocol)): + # for error messages + the_base = base.__origin__.__name__ + if gvars is not None: + raise TypeError( + "Cannot inherit from Generic[...]" + " and/or Protocol[...] multiple types.") + gvars = base.__parameters__ + if gvars is None: + gvars = tvars + else: + tvarset = set(tvars) + gvarset = set(gvars) + if not tvarset <= gvarset: + s_vars = ', '.join(str(t) for t in tvars if t not in gvarset) + s_args = ', '.join(str(g) for g in gvars) + raise TypeError(f"Some type variables ({s_vars}) are" + f" not listed in {the_base}[{s_args}]") + tvars = gvars + cls.__parameters__ = tuple(tvars) + + +# 3.8+ +if hasattr(typing, 'Protocol'): + Protocol = typing.Protocol +# 3.7 +else: + + def _no_init(self, *args, **kwargs): + if type(self)._is_protocol: + raise TypeError('Protocols cannot be instantiated') + + class _ProtocolMeta(abc.ABCMeta): # noqa: B024 + # This metaclass is a bit unfortunate and exists only because of the lack + # of __instancehook__. + def __instancecheck__(cls, instance): + # We need this method for situations where attributes are + # assigned in __init__. + if ((not getattr(cls, '_is_protocol', False) or + _is_callable_members_only(cls)) and + issubclass(instance.__class__, cls)): + return True + if cls._is_protocol: + if all(hasattr(instance, attr) and + (not callable(getattr(cls, attr, None)) or + getattr(instance, attr) is not None) + for attr in _get_protocol_attrs(cls)): + return True + return super().__instancecheck__(instance) + + class Protocol(metaclass=_ProtocolMeta): + # There is quite a lot of overlapping code with typing.Generic. + # Unfortunately it is hard to avoid this while these live in two different + # modules. The duplicated code will be removed when Protocol is moved to typing. + """Base class for protocol classes. Protocol classes are defined as:: + + class Proto(Protocol): + def meth(self) -> int: + ... + + Such classes are primarily used with static type checkers that recognize + structural subtyping (static duck-typing), for example:: + + class C: + def meth(self) -> int: + return 0 + + def func(x: Proto) -> int: + return x.meth() + + func(C()) # Passes static type check + + See PEP 544 for details. Protocol classes decorated with + @typing_extensions.runtime act as simple-minded runtime protocol that checks + only the presence of given attributes, ignoring their type signatures. + + Protocol classes can be generic, they are defined as:: + + class GenProto(Protocol[T]): + def meth(self) -> T: + ... + """ + __slots__ = () + _is_protocol = True + + def __new__(cls, *args, **kwds): + if cls is Protocol: + raise TypeError("Type Protocol cannot be instantiated; " + "it can only be used as a base class") + return super().__new__(cls) + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple): + params = (params,) + if not params and cls is not typing.Tuple: + raise TypeError( + f"Parameter list to {cls.__qualname__}[...] cannot be empty") + msg = "Parameters to generic types must be types." + params = tuple(typing._type_check(p, msg) for p in params) # noqa + if cls is Protocol: + # Generic can only be subscripted with unique type variables. + if not all(isinstance(p, typing.TypeVar) for p in params): + i = 0 + while isinstance(params[i], typing.TypeVar): + i += 1 + raise TypeError( + "Parameters to Protocol[...] must all be type variables." + f" Parameter {i + 1} is {params[i]}") + if len(set(params)) != len(params): + raise TypeError( + "Parameters to Protocol[...] must all be unique") + else: + # Subscripting a regular Generic subclass. + _check_generic(cls, params, len(cls.__parameters__)) + return typing._GenericAlias(cls, params) + + def __init_subclass__(cls, *args, **kwargs): + if '__orig_bases__' in cls.__dict__: + error = typing.Generic in cls.__orig_bases__ + else: + error = typing.Generic in cls.__bases__ + if error: + raise TypeError("Cannot inherit from plain Generic") + _maybe_adjust_parameters(cls) + + # Determine if this is a protocol or a concrete subclass. + if not cls.__dict__.get('_is_protocol', None): + cls._is_protocol = any(b is Protocol for b in cls.__bases__) + + # Set (or override) the protocol subclass hook. + def _proto_hook(other): + if not cls.__dict__.get('_is_protocol', None): + return NotImplemented + if not getattr(cls, '_is_runtime_protocol', False): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Instance and class checks can only be used with" + " @runtime protocols") + if not _is_callable_members_only(cls): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Protocols with non-method members" + " don't support issubclass()") + if not isinstance(other, type): + # Same error as for issubclass(1, int) + raise TypeError('issubclass() arg 1 must be a class') + for attr in _get_protocol_attrs(cls): + for base in other.__mro__: + if attr in base.__dict__: + if base.__dict__[attr] is None: + return NotImplemented + break + annotations = getattr(base, '__annotations__', {}) + if (isinstance(annotations, typing.Mapping) and + attr in annotations and + isinstance(other, _ProtocolMeta) and + other._is_protocol): + break + else: + return NotImplemented + return True + if '__subclasshook__' not in cls.__dict__: + cls.__subclasshook__ = _proto_hook + + # We have nothing more to do for non-protocols. + if not cls._is_protocol: + return + + # Check consistency of bases. + for base in cls.__bases__: + if not (base in (object, typing.Generic) or + base.__module__ == 'collections.abc' and + base.__name__ in _PROTO_WHITELIST or + isinstance(base, _ProtocolMeta) and base._is_protocol): + raise TypeError('Protocols can only inherit from other' + f' protocols, got {repr(base)}') + cls.__init__ = _no_init + + +# 3.8+ +if hasattr(typing, 'runtime_checkable'): + runtime_checkable = typing.runtime_checkable +# 3.7 +else: + def runtime_checkable(cls): + """Mark a protocol class as a runtime protocol, so that it + can be used with isinstance() and issubclass(). Raise TypeError + if applied to a non-protocol class. + + This allows a simple-minded structural check very similar to the + one-offs in collections.abc such as Hashable. + """ + if not isinstance(cls, _ProtocolMeta) or not cls._is_protocol: + raise TypeError('@runtime_checkable can be only applied to protocol classes,' + f' got {cls!r}') + cls._is_runtime_protocol = True + return cls + + +# Exists for backwards compatibility. +runtime = runtime_checkable + + +# 3.8+ +if hasattr(typing, 'SupportsIndex'): + SupportsIndex = typing.SupportsIndex +# 3.7 +else: + @runtime_checkable + class SupportsIndex(Protocol): + __slots__ = () + + @abc.abstractmethod + def __index__(self) -> int: + pass + + +if hasattr(typing, "Required"): + # The standard library TypedDict in Python 3.8 does not store runtime information + # about which (if any) keys are optional. See https://bugs.python.org/issue38834 + # The standard library TypedDict in Python 3.9.0/1 does not honour the "total" + # keyword with old-style TypedDict(). See https://bugs.python.org/issue42059 + # The standard library TypedDict below Python 3.11 does not store runtime + # information about optional and required keys when using Required or NotRequired. + # Generic TypedDicts are also impossible using typing.TypedDict on Python <3.11. + TypedDict = typing.TypedDict + _TypedDictMeta = typing._TypedDictMeta + is_typeddict = typing.is_typeddict +else: + def _check_fails(cls, other): + try: + if sys._getframe(1).f_globals['__name__'] not in ['abc', + 'functools', + 'typing']: + # Typed dicts are only for static structural subtyping. + raise TypeError('TypedDict does not support instance and class checks') + except (AttributeError, ValueError): + pass + return False + + def _dict_new(*args, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + return dict(*args, **kwargs) + + _dict_new.__text_signature__ = '($cls, _typename, _fields=None, /, **kwargs)' + + def _typeddict_new(*args, total=True, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + if args: + typename, args = args[0], args[1:] # allow the "_typename" keyword be passed + elif '_typename' in kwargs: + typename = kwargs.pop('_typename') + import warnings + warnings.warn("Passing '_typename' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + raise TypeError("TypedDict.__new__() missing 1 required positional " + "argument: '_typename'") + if args: + try: + fields, = args # allow the "_fields" keyword be passed + except ValueError: + raise TypeError('TypedDict.__new__() takes from 2 to 3 ' + f'positional arguments but {len(args) + 2} ' + 'were given') + elif '_fields' in kwargs and len(kwargs) == 1: + fields = kwargs.pop('_fields') + import warnings + warnings.warn("Passing '_fields' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + fields = None + + if fields is None: + fields = kwargs + elif kwargs: + raise TypeError("TypedDict takes either a dict or keyword arguments," + " but not both") + + ns = {'__annotations__': dict(fields)} + try: + # Setting correct module is necessary to make typed dict classes pickleable. + ns['__module__'] = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return _TypedDictMeta(typename, (), ns, total=total) + + _typeddict_new.__text_signature__ = ('($cls, _typename, _fields=None,' + ' /, *, total=True, **kwargs)') + + class _TypedDictMeta(type): + def __init__(cls, name, bases, ns, total=True): + super().__init__(name, bases, ns) + + def __new__(cls, name, bases, ns, total=True): + # Create new typed dict class object. + # This method is called directly when TypedDict is subclassed, + # or via _typeddict_new when TypedDict is instantiated. This way + # TypedDict supports all three syntaxes described in its docstring. + # Subclasses and instances of TypedDict return actual dictionaries + # via _dict_new. + ns['__new__'] = _typeddict_new if name == 'TypedDict' else _dict_new + # Don't insert typing.Generic into __bases__ here, + # or Generic.__init_subclass__ will raise TypeError + # in the super().__new__() call. + # Instead, monkey-patch __bases__ onto the class after it's been created. + tp_dict = super().__new__(cls, name, (dict,), ns) + + if any(issubclass(base, typing.Generic) for base in bases): + tp_dict.__bases__ = (typing.Generic, dict) + _maybe_adjust_parameters(tp_dict) + + annotations = {} + own_annotations = ns.get('__annotations__', {}) + msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" + own_annotations = { + n: typing._type_check(tp, msg) for n, tp in own_annotations.items() + } + required_keys = set() + optional_keys = set() + + for base in bases: + annotations.update(base.__dict__.get('__annotations__', {})) + required_keys.update(base.__dict__.get('__required_keys__', ())) + optional_keys.update(base.__dict__.get('__optional_keys__', ())) + + annotations.update(own_annotations) + for annotation_key, annotation_type in own_annotations.items(): + annotation_origin = get_origin(annotation_type) + if annotation_origin is Annotated: + annotation_args = get_args(annotation_type) + if annotation_args: + annotation_type = annotation_args[0] + annotation_origin = get_origin(annotation_type) + + if annotation_origin is Required: + required_keys.add(annotation_key) + elif annotation_origin is NotRequired: + optional_keys.add(annotation_key) + elif total: + required_keys.add(annotation_key) + else: + optional_keys.add(annotation_key) + + tp_dict.__annotations__ = annotations + tp_dict.__required_keys__ = frozenset(required_keys) + tp_dict.__optional_keys__ = frozenset(optional_keys) + if not hasattr(tp_dict, '__total__'): + tp_dict.__total__ = total + return tp_dict + + __instancecheck__ = __subclasscheck__ = _check_fails + + TypedDict = _TypedDictMeta('TypedDict', (dict,), {}) + TypedDict.__module__ = __name__ + TypedDict.__doc__ = \ + """A simple typed name space. At runtime it is equivalent to a plain dict. + + TypedDict creates a dictionary type that expects all of its + instances to have a certain set of keys, with each key + associated with a value of a consistent type. This expectation + is not checked at runtime but is only enforced by type checkers. + Usage:: + + class Point2D(TypedDict): + x: int + y: int + label: str + + a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK + b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check + + assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first') + + The type info can be accessed via the Point2D.__annotations__ dict, and + the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets. + TypedDict supports two additional equivalent forms:: + + Point2D = TypedDict('Point2D', x=int, y=int, label=str) + Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str}) + + The class syntax is only supported in Python 3.6+, while two other + syntax forms work for Python 2.7 and 3.2+ + """ + + if hasattr(typing, "_TypedDictMeta"): + _TYPEDDICT_TYPES = (typing._TypedDictMeta, _TypedDictMeta) + else: + _TYPEDDICT_TYPES = (_TypedDictMeta,) + + def is_typeddict(tp): + """Check if an annotation is a TypedDict class + + For example:: + class Film(TypedDict): + title: str + year: int + + is_typeddict(Film) # => True + is_typeddict(Union[list, str]) # => False + """ + return isinstance(tp, tuple(_TYPEDDICT_TYPES)) + + +if hasattr(typing, "assert_type"): + assert_type = typing.assert_type + +else: + def assert_type(__val, __typ): + """Assert (to the type checker) that the value is of the given type. + + When the type checker encounters a call to assert_type(), it + emits an error if the value is not of the specified type:: + + def greet(name: str) -> None: + assert_type(name, str) # ok + assert_type(name, int) # type checker error + + At runtime this returns the first argument unchanged and otherwise + does nothing. + """ + return __val + + +if hasattr(typing, "Required"): + get_type_hints = typing.get_type_hints +else: + import functools + import types + + # replaces _strip_annotations() + def _strip_extras(t): + """Strips Annotated, Required and NotRequired from a given type.""" + if isinstance(t, _AnnotatedAlias): + return _strip_extras(t.__origin__) + if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired): + return _strip_extras(t.__args__[0]) + if isinstance(t, typing._GenericAlias): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return t.copy_with(stripped_args) + if hasattr(types, "GenericAlias") and isinstance(t, types.GenericAlias): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return types.GenericAlias(t.__origin__, stripped_args) + if hasattr(types, "UnionType") and isinstance(t, types.UnionType): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return functools.reduce(operator.or_, stripped_args) + + return t + + def get_type_hints(obj, globalns=None, localns=None, include_extras=False): + """Return type hints for an object. + + This is often the same as obj.__annotations__, but it handles + forward references encoded as string literals, adds Optional[t] if a + default value equal to None is set and recursively replaces all + 'Annotated[T, ...]', 'Required[T]' or 'NotRequired[T]' with 'T' + (unless 'include_extras=True'). + + The argument may be a module, class, method, or function. The annotations + are returned as a dictionary. For classes, annotations include also + inherited members. + + TypeError is raised if the argument is not of a type that can contain + annotations, and an empty dictionary is returned if no annotations are + present. + + BEWARE -- the behavior of globalns and localns is counterintuitive + (unless you are familiar with how eval() and exec() work). The + search order is locals first, then globals. + + - If no dict arguments are passed, an attempt is made to use the + globals from obj (or the respective module's globals for classes), + and these are also used as the locals. If the object does not appear + to have globals, an empty dictionary is used. + + - If one dict argument is passed, it is used for both globals and + locals. + + - If two dict arguments are passed, they specify globals and + locals, respectively. + """ + if hasattr(typing, "Annotated"): + hint = typing.get_type_hints( + obj, globalns=globalns, localns=localns, include_extras=True + ) + else: + hint = typing.get_type_hints(obj, globalns=globalns, localns=localns) + if include_extras: + return hint + return {k: _strip_extras(t) for k, t in hint.items()} + + +# Python 3.9+ has PEP 593 (Annotated) +if hasattr(typing, 'Annotated'): + Annotated = typing.Annotated + # Not exported and not a public API, but needed for get_origin() and get_args() + # to work. + _AnnotatedAlias = typing._AnnotatedAlias +# 3.7-3.8 +else: + class _AnnotatedAlias(typing._GenericAlias, _root=True): + """Runtime representation of an annotated type. + + At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't' + with extra annotations. The alias behaves like a normal typing alias, + instantiating is the same as instantiating the underlying type, binding + it to types is also the same. + """ + def __init__(self, origin, metadata): + if isinstance(origin, _AnnotatedAlias): + metadata = origin.__metadata__ + metadata + origin = origin.__origin__ + super().__init__(origin, origin) + self.__metadata__ = metadata + + def copy_with(self, params): + assert len(params) == 1 + new_type = params[0] + return _AnnotatedAlias(new_type, self.__metadata__) + + def __repr__(self): + return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, " + f"{', '.join(repr(a) for a in self.__metadata__)}]") + + def __reduce__(self): + return operator.getitem, ( + Annotated, (self.__origin__,) + self.__metadata__ + ) + + def __eq__(self, other): + if not isinstance(other, _AnnotatedAlias): + return NotImplemented + if self.__origin__ != other.__origin__: + return False + return self.__metadata__ == other.__metadata__ + + def __hash__(self): + return hash((self.__origin__, self.__metadata__)) + + class Annotated: + """Add context specific metadata to a type. + + Example: Annotated[int, runtime_check.Unsigned] indicates to the + hypothetical runtime_check module that this type is an unsigned int. + Every other consumer of this type can ignore this metadata and treat + this type as int. + + The first argument to Annotated must be a valid type (and will be in + the __origin__ field), the remaining arguments are kept as a tuple in + the __extra__ field. + + Details: + + - It's an error to call `Annotated` with less than two arguments. + - Nested Annotated are flattened:: + + Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] + + - Instantiating an annotated type is equivalent to instantiating the + underlying type:: + + Annotated[C, Ann1](5) == C(5) + + - Annotated can be used as a generic type alias:: + + Optimized = Annotated[T, runtime.Optimize()] + Optimized[int] == Annotated[int, runtime.Optimize()] + + OptimizedList = Annotated[List[T], runtime.Optimize()] + OptimizedList[int] == Annotated[List[int], runtime.Optimize()] + """ + + __slots__ = () + + def __new__(cls, *args, **kwargs): + raise TypeError("Type Annotated cannot be instantiated.") + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple) or len(params) < 2: + raise TypeError("Annotated[...] should be used " + "with at least two arguments (a type and an " + "annotation).") + allowed_special_forms = (ClassVar, Final) + if get_origin(params[0]) in allowed_special_forms: + origin = params[0] + else: + msg = "Annotated[t, ...]: t must be a type." + origin = typing._type_check(params[0], msg) + metadata = tuple(params[1:]) + return _AnnotatedAlias(origin, metadata) + + def __init_subclass__(cls, *args, **kwargs): + raise TypeError( + f"Cannot subclass {cls.__module__}.Annotated" + ) + +# Python 3.8 has get_origin() and get_args() but those implementations aren't +# Annotated-aware, so we can't use those. Python 3.9's versions don't support +# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do. +if sys.version_info[:2] >= (3, 10): + get_origin = typing.get_origin + get_args = typing.get_args +# 3.7-3.9 +else: + try: + # 3.9+ + from typing import _BaseGenericAlias + except ImportError: + _BaseGenericAlias = typing._GenericAlias + try: + # 3.9+ + from typing import GenericAlias as _typing_GenericAlias + except ImportError: + _typing_GenericAlias = typing._GenericAlias + + def get_origin(tp): + """Get the unsubscripted version of a type. + + This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar + and Annotated. Return None for unsupported types. Examples:: + + get_origin(Literal[42]) is Literal + get_origin(int) is None + get_origin(ClassVar[int]) is ClassVar + get_origin(Generic) is Generic + get_origin(Generic[T]) is Generic + get_origin(Union[T, int]) is Union + get_origin(List[Tuple[T, T]][int]) == list + get_origin(P.args) is P + """ + if isinstance(tp, _AnnotatedAlias): + return Annotated + if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias, _BaseGenericAlias, + ParamSpecArgs, ParamSpecKwargs)): + return tp.__origin__ + if tp is typing.Generic: + return typing.Generic + return None + + def get_args(tp): + """Get type arguments with all substitutions performed. + + For unions, basic simplifications used by Union constructor are performed. + Examples:: + get_args(Dict[str, int]) == (str, int) + get_args(int) == () + get_args(Union[int, Union[T, int], str][int]) == (int, str) + get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) + get_args(Callable[[], T][int]) == ([], int) + """ + if isinstance(tp, _AnnotatedAlias): + return (tp.__origin__,) + tp.__metadata__ + if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias)): + if getattr(tp, "_special", False): + return () + res = tp.__args__ + if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis: + res = (list(res[:-1]), res[-1]) + return res + return () + + +# 3.10+ +if hasattr(typing, 'TypeAlias'): + TypeAlias = typing.TypeAlias +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeAliasForm + def TypeAlias(self, parameters): + """Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example above. + """ + raise TypeError(f"{self} is not subscriptable") +# 3.7-3.8 +else: + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + TypeAlias = _TypeAliasForm('TypeAlias', + doc="""Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example + above.""") + + +class _DefaultMixin: + """Mixin for TypeVarLike defaults.""" + + __slots__ = () + + def __init__(self, default): + if isinstance(default, (tuple, list)): + self.__default__ = tuple((typing._type_check(d, "Default must be a type") + for d in default)) + elif default: + self.__default__ = typing._type_check(default, "Default must be a type") + else: + self.__default__ = None + + +# Add default and infer_variance parameters from PEP 696 and 695 +class TypeVar(typing.TypeVar, _DefaultMixin, _root=True): + """Type variable.""" + + __module__ = 'typing' + + def __init__(self, name, *constraints, bound=None, + covariant=False, contravariant=False, + default=None, infer_variance=False): + super().__init__(name, *constraints, bound=bound, covariant=covariant, + contravariant=contravariant) + _DefaultMixin.__init__(self, default) + self.__infer_variance__ = infer_variance + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + +# Python 3.10+ has PEP 612 +if hasattr(typing, 'ParamSpecArgs'): + ParamSpecArgs = typing.ParamSpecArgs + ParamSpecKwargs = typing.ParamSpecKwargs +# 3.7-3.9 +else: + class _Immutable: + """Mixin to indicate that object should not be copied.""" + __slots__ = () + + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + class ParamSpecArgs(_Immutable): + """The args for a ParamSpec object. + + Given a ParamSpec object P, P.args is an instance of ParamSpecArgs. + + ParamSpecArgs objects have a reference back to their ParamSpec: + + P.args.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.args" + + def __eq__(self, other): + if not isinstance(other, ParamSpecArgs): + return NotImplemented + return self.__origin__ == other.__origin__ + + class ParamSpecKwargs(_Immutable): + """The kwargs for a ParamSpec object. + + Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs. + + ParamSpecKwargs objects have a reference back to their ParamSpec: + + P.kwargs.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.kwargs" + + def __eq__(self, other): + if not isinstance(other, ParamSpecKwargs): + return NotImplemented + return self.__origin__ == other.__origin__ + +# 3.10+ +if hasattr(typing, 'ParamSpec'): + + # Add default Parameter - PEP 696 + class ParamSpec(typing.ParamSpec, _DefaultMixin, _root=True): + """Parameter specification variable.""" + + __module__ = 'typing' + + def __init__(self, name, *, bound=None, covariant=False, contravariant=False, + default=None): + super().__init__(name, bound=bound, covariant=covariant, + contravariant=contravariant) + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + +# 3.7-3.9 +else: + + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class ParamSpec(list, _DefaultMixin): + """Parameter specification variable. + + Usage:: + + P = ParamSpec('P') + + Parameter specification variables exist primarily for the benefit of static + type checkers. They are used to forward the parameter types of one + callable to another callable, a pattern commonly found in higher order + functions and decorators. They are only valid when used in ``Concatenate``, + or s the first argument to ``Callable``. In Python 3.10 and higher, + they are also supported in user-defined Generics at runtime. + See class Generic for more information on generic types. An + example for annotating a decorator:: + + T = TypeVar('T') + P = ParamSpec('P') + + def add_logging(f: Callable[P, T]) -> Callable[P, T]: + '''A type-safe decorator to add logging to a function.''' + def inner(*args: P.args, **kwargs: P.kwargs) -> T: + logging.info(f'{f.__name__} was called') + return f(*args, **kwargs) + return inner + + @add_logging + def add_two(x: float, y: float) -> float: + '''Add two numbers together.''' + return x + y + + Parameter specification variables defined with covariant=True or + contravariant=True can be used to declare covariant or contravariant + generic types. These keyword arguments are valid, but their actual semantics + are yet to be decided. See PEP 612 for details. + + Parameter specification variables can be introspected. e.g.: + + P.__name__ == 'T' + P.__bound__ == None + P.__covariant__ == False + P.__contravariant__ == False + + Note that only parameter specification variables defined in global scope can + be pickled. + """ + + # Trick Generic __parameters__. + __class__ = typing.TypeVar + + @property + def args(self): + return ParamSpecArgs(self) + + @property + def kwargs(self): + return ParamSpecKwargs(self) + + def __init__(self, name, *, bound=None, covariant=False, contravariant=False, + default=None): + super().__init__([self]) + self.__name__ = name + self.__covariant__ = bool(covariant) + self.__contravariant__ = bool(contravariant) + if bound: + self.__bound__ = typing._type_check(bound, 'Bound must be a type.') + else: + self.__bound__ = None + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + def __repr__(self): + if self.__covariant__: + prefix = '+' + elif self.__contravariant__: + prefix = '-' + else: + prefix = '~' + return prefix + self.__name__ + + def __hash__(self): + return object.__hash__(self) + + def __eq__(self, other): + return self is other + + def __reduce__(self): + return self.__name__ + + # Hack to get typing._type_check to pass. + def __call__(self, *args, **kwargs): + pass + + +# 3.7-3.9 +if not hasattr(typing, 'Concatenate'): + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class _ConcatenateGenericAlias(list): + + # Trick Generic into looking into this for __parameters__. + __class__ = typing._GenericAlias + + # Flag in 3.8. + _special = False + + def __init__(self, origin, args): + super().__init__(args) + self.__origin__ = origin + self.__args__ = args + + def __repr__(self): + _type_repr = typing._type_repr + return (f'{_type_repr(self.__origin__)}' + f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]') + + def __hash__(self): + return hash((self.__origin__, self.__args__)) + + # Hack to get typing._type_check to pass in Generic. + def __call__(self, *args, **kwargs): + pass + + @property + def __parameters__(self): + return tuple( + tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec)) + ) + + +# 3.7-3.9 +@typing._tp_cache +def _concatenate_getitem(self, parameters): + if parameters == (): + raise TypeError("Cannot take a Concatenate of no types.") + if not isinstance(parameters, tuple): + parameters = (parameters,) + if not isinstance(parameters[-1], ParamSpec): + raise TypeError("The last parameter to Concatenate should be a " + "ParamSpec variable.") + msg = "Concatenate[arg, ...]: each arg must be a type." + parameters = tuple(typing._type_check(p, msg) for p in parameters) + return _ConcatenateGenericAlias(self, parameters) + + +# 3.10+ +if hasattr(typing, 'Concatenate'): + Concatenate = typing.Concatenate + _ConcatenateGenericAlias = typing._ConcatenateGenericAlias # noqa +# 3.9 +elif sys.version_info[:2] >= (3, 9): + @_TypeAliasForm + def Concatenate(self, parameters): + """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """ + return _concatenate_getitem(self, parameters) +# 3.7-8 +else: + class _ConcatenateForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return _concatenate_getitem(self, parameters) + + Concatenate = _ConcatenateForm( + 'Concatenate', + doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """) + +# 3.10+ +if hasattr(typing, 'TypeGuard'): + TypeGuard = typing.TypeGuard +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeGuardForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeGuardForm + def TypeGuard(self, parameters): + """Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """ + item = typing._type_check(parameters, f'{self} accepts only a single type.') + return typing._GenericAlias(self, (item,)) +# 3.7-3.8 +else: + class _TypeGuardForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type') + return typing._GenericAlias(self, (item,)) + + TypeGuard = _TypeGuardForm( + 'TypeGuard', + doc="""Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """) + + +# Vendored from cpython typing._SpecialFrom +class _SpecialForm(typing._Final, _root=True): + __slots__ = ('_name', '__doc__', '_getitem') + + def __init__(self, getitem): + self._getitem = getitem + self._name = getitem.__name__ + self.__doc__ = getitem.__doc__ + + def __getattr__(self, item): + if item in {'__name__', '__qualname__'}: + return self._name + + raise AttributeError(item) + + def __mro_entries__(self, bases): + raise TypeError(f"Cannot subclass {self!r}") + + def __repr__(self): + return f'typing_extensions.{self._name}' + + def __reduce__(self): + return self._name + + def __call__(self, *args, **kwds): + raise TypeError(f"Cannot instantiate {self!r}") + + def __or__(self, other): + return typing.Union[self, other] + + def __ror__(self, other): + return typing.Union[other, self] + + def __instancecheck__(self, obj): + raise TypeError(f"{self} cannot be used with isinstance()") + + def __subclasscheck__(self, cls): + raise TypeError(f"{self} cannot be used with issubclass()") + + @typing._tp_cache + def __getitem__(self, parameters): + return self._getitem(self, parameters) + + +if hasattr(typing, "LiteralString"): + LiteralString = typing.LiteralString +else: + @_SpecialForm + def LiteralString(self, params): + """Represents an arbitrary literal string. + + Example:: + + from typing_extensions import LiteralString + + def query(sql: LiteralString) -> ...: + ... + + query("SELECT * FROM table") # ok + query(f"SELECT * FROM {input()}") # not ok + + See PEP 675 for details. + + """ + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, "Self"): + Self = typing.Self +else: + @_SpecialForm + def Self(self, params): + """Used to spell the type of "self" in classes. + + Example:: + + from typing import Self + + class ReturnsSelf: + def parse(self, data: bytes) -> Self: + ... + return self + + """ + + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, "Never"): + Never = typing.Never +else: + @_SpecialForm + def Never(self, params): + """The bottom type, a type that has no members. + + This can be used to define a function that should never be + called, or a function that never returns:: + + from typing_extensions import Never + + def never_call_me(arg: Never) -> None: + pass + + def int_or_str(arg: int | str) -> None: + never_call_me(arg) # type checker error + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + never_call_me(arg) # ok, arg is of type Never + + """ + + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, 'Required'): + Required = typing.Required + NotRequired = typing.NotRequired +elif sys.version_info[:2] >= (3, 9): + class _ExtensionsSpecialForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_ExtensionsSpecialForm + def Required(self, parameters): + """A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + @_ExtensionsSpecialForm + def NotRequired(self, parameters): + """A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + +else: + class _RequiredForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + Required = _RequiredForm( + 'Required', + doc="""A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """) + NotRequired = _RequiredForm( + 'NotRequired', + doc="""A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """) + + +if hasattr(typing, "Unpack"): # 3.11+ + Unpack = typing.Unpack +elif sys.version_info[:2] >= (3, 9): + class _UnpackSpecialForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + class _UnpackAlias(typing._GenericAlias, _root=True): + __class__ = typing.TypeVar + + @_UnpackSpecialForm + def Unpack(self, parameters): + """A special typing construct to unpack a variadic type. For example: + + Shape = TypeVarTuple('Shape') + Batch = NewType('Batch', int) + + def add_batch_axis( + x: Array[Unpack[Shape]] + ) -> Array[Batch, Unpack[Shape]]: ... + + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return _UnpackAlias(self, (item,)) + + def _is_unpack(obj): + return isinstance(obj, _UnpackAlias) + +else: + class _UnpackAlias(typing._GenericAlias, _root=True): + __class__ = typing.TypeVar + + class _UnpackForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return _UnpackAlias(self, (item,)) + + Unpack = _UnpackForm( + 'Unpack', + doc="""A special typing construct to unpack a variadic type. For example: + + Shape = TypeVarTuple('Shape') + Batch = NewType('Batch', int) + + def add_batch_axis( + x: Array[Unpack[Shape]] + ) -> Array[Batch, Unpack[Shape]]: ... + + """) + + def _is_unpack(obj): + return isinstance(obj, _UnpackAlias) + + +if hasattr(typing, "TypeVarTuple"): # 3.11+ + + # Add default Parameter - PEP 696 + class TypeVarTuple(typing.TypeVarTuple, _DefaultMixin, _root=True): + """Type variable tuple.""" + + def __init__(self, name, *, default=None): + super().__init__(name) + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + +else: + class TypeVarTuple(_DefaultMixin): + """Type variable tuple. + + Usage:: + + Ts = TypeVarTuple('Ts') + + In the same way that a normal type variable is a stand-in for a single + type such as ``int``, a type variable *tuple* is a stand-in for a *tuple* + type such as ``Tuple[int, str]``. + + Type variable tuples can be used in ``Generic`` declarations. + Consider the following example:: + + class Array(Generic[*Ts]): ... + + The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``, + where ``T1`` and ``T2`` are type variables. To use these type variables + as type parameters of ``Array``, we must *unpack* the type variable tuple using + the star operator: ``*Ts``. The signature of ``Array`` then behaves + as if we had simply written ``class Array(Generic[T1, T2]): ...``. + In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows + us to parameterise the class with an *arbitrary* number of type parameters. + + Type variable tuples can be used anywhere a normal ``TypeVar`` can. + This includes class definitions, as shown above, as well as function + signatures and variable annotations:: + + class Array(Generic[*Ts]): + + def __init__(self, shape: Tuple[*Ts]): + self._shape: Tuple[*Ts] = shape + + def get_shape(self) -> Tuple[*Ts]: + return self._shape + + shape = (Height(480), Width(640)) + x: Array[Height, Width] = Array(shape) + y = abs(x) # Inferred type is Array[Height, Width] + z = x + x # ... is Array[Height, Width] + x.get_shape() # ... is tuple[Height, Width] + + """ + + # Trick Generic __parameters__. + __class__ = typing.TypeVar + + def __iter__(self): + yield self.__unpacked__ + + def __init__(self, name, *, default=None): + self.__name__ = name + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + self.__unpacked__ = Unpack[self] + + def __repr__(self): + return self.__name__ + + def __hash__(self): + return object.__hash__(self) + + def __eq__(self, other): + return self is other + + def __reduce__(self): + return self.__name__ + + def __init_subclass__(self, *args, **kwds): + if '_root' not in kwds: + raise TypeError("Cannot subclass special typing classes") + + +if hasattr(typing, "reveal_type"): + reveal_type = typing.reveal_type +else: + def reveal_type(__obj: T) -> T: + """Reveal the inferred type of a variable. + + When a static type checker encounters a call to ``reveal_type()``, + it will emit the inferred type of the argument:: + + x: int = 1 + reveal_type(x) + + Running a static type checker (e.g., ``mypy``) on this example + will produce output similar to 'Revealed type is "builtins.int"'. + + At runtime, the function prints the runtime type of the + argument and returns it unchanged. + + """ + print(f"Runtime type is {type(__obj).__name__!r}", file=sys.stderr) + return __obj + + +if hasattr(typing, "assert_never"): + assert_never = typing.assert_never +else: + def assert_never(__arg: Never) -> Never: + """Assert to the type checker that a line of code is unreachable. + + Example:: + + def int_or_str(arg: int | str) -> None: + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + assert_never(arg) + + If a type checker finds that a call to assert_never() is + reachable, it will emit an error. + + At runtime, this throws an exception when called. + + """ + raise AssertionError("Expected code to be unreachable") + + +if hasattr(typing, 'dataclass_transform'): + dataclass_transform = typing.dataclass_transform +else: + def dataclass_transform( + *, + eq_default: bool = True, + order_default: bool = False, + kw_only_default: bool = False, + field_specifiers: typing.Tuple[ + typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]], + ... + ] = (), + **kwargs: typing.Any, + ) -> typing.Callable[[T], T]: + """Decorator that marks a function, class, or metaclass as providing + dataclass-like behavior. + + Example: + + from typing_extensions import dataclass_transform + + _T = TypeVar("_T") + + # Used on a decorator function + @dataclass_transform() + def create_model(cls: type[_T]) -> type[_T]: + ... + return cls + + @create_model + class CustomerModel: + id: int + name: str + + # Used on a base class + @dataclass_transform() + class ModelBase: ... + + class CustomerModel(ModelBase): + id: int + name: str + + # Used on a metaclass + @dataclass_transform() + class ModelMeta(type): ... + + class ModelBase(metaclass=ModelMeta): ... + + class CustomerModel(ModelBase): + id: int + name: str + + Each of the ``CustomerModel`` classes defined in this example will now + behave similarly to a dataclass created with the ``@dataclasses.dataclass`` + decorator. For example, the type checker will synthesize an ``__init__`` + method. + + The arguments to this decorator can be used to customize this behavior: + - ``eq_default`` indicates whether the ``eq`` parameter is assumed to be + True or False if it is omitted by the caller. + - ``order_default`` indicates whether the ``order`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``kw_only_default`` indicates whether the ``kw_only`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``field_specifiers`` specifies a static list of supported classes + or functions that describe fields, similar to ``dataclasses.field()``. + + At runtime, this decorator records its arguments in the + ``__dataclass_transform__`` attribute on the decorated object. + + See PEP 681 for details. + + """ + def decorator(cls_or_fn): + cls_or_fn.__dataclass_transform__ = { + "eq_default": eq_default, + "order_default": order_default, + "kw_only_default": kw_only_default, + "field_specifiers": field_specifiers, + "kwargs": kwargs, + } + return cls_or_fn + return decorator + + +if hasattr(typing, "override"): + override = typing.override +else: + _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any]) + + def override(__arg: _F) -> _F: + """Indicate that a method is intended to override a method in a base class. + + Usage: + + class Base: + def method(self) -> None: ... + pass + + class Child(Base): + @override + def method(self) -> None: + super().method() + + When this decorator is applied to a method, the type checker will + validate that it overrides a method with the same name on a base class. + This helps prevent bugs that may occur when a base class is changed + without an equivalent change to a child class. + + See PEP 698 for details. + + """ + return __arg + + +# We have to do some monkey patching to deal with the dual nature of +# Unpack/TypeVarTuple: +# - We want Unpack to be a kind of TypeVar so it gets accepted in +# Generic[Unpack[Ts]] +# - We want it to *not* be treated as a TypeVar for the purposes of +# counting generic parameters, so that when we subscript a generic, +# the runtime doesn't try to substitute the Unpack with the subscripted type. +if not hasattr(typing, "TypeVarTuple"): + typing._collect_type_vars = _collect_type_vars + typing._check_generic = _check_generic + + +# Backport typing.NamedTuple as it exists in Python 3.11. +# In 3.11, the ability to define generic `NamedTuple`s was supported. +# This was explicitly disallowed in 3.9-3.10, and only half-worked in <=3.8. +if sys.version_info >= (3, 11): + NamedTuple = typing.NamedTuple +else: + def _caller(): + try: + return sys._getframe(2).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): # For platforms without _getframe() + return None + + def _make_nmtuple(name, types, module, defaults=()): + fields = [n for n, t in types] + annotations = {n: typing._type_check(t, f"field {n} annotation must be a type") + for n, t in types} + nm_tpl = collections.namedtuple(name, fields, + defaults=defaults, module=module) + nm_tpl.__annotations__ = nm_tpl.__new__.__annotations__ = annotations + # The `_field_types` attribute was removed in 3.9; + # in earlier versions, it is the same as the `__annotations__` attribute + if sys.version_info < (3, 9): + nm_tpl._field_types = annotations + return nm_tpl + + _prohibited_namedtuple_fields = typing._prohibited + _special_namedtuple_fields = frozenset({'__module__', '__name__', '__annotations__'}) + + class _NamedTupleMeta(type): + def __new__(cls, typename, bases, ns): + assert _NamedTuple in bases + for base in bases: + if base is not _NamedTuple and base is not typing.Generic: + raise TypeError( + 'can only inherit from a NamedTuple type and Generic') + bases = tuple(tuple if base is _NamedTuple else base for base in bases) + types = ns.get('__annotations__', {}) + default_names = [] + for field_name in types: + if field_name in ns: + default_names.append(field_name) + elif default_names: + raise TypeError(f"Non-default namedtuple field {field_name} " + f"cannot follow default field" + f"{'s' if len(default_names) > 1 else ''} " + f"{', '.join(default_names)}") + nm_tpl = _make_nmtuple( + typename, types.items(), + defaults=[ns[n] for n in default_names], + module=ns['__module__'] + ) + nm_tpl.__bases__ = bases + if typing.Generic in bases: + class_getitem = typing.Generic.__class_getitem__.__func__ + nm_tpl.__class_getitem__ = classmethod(class_getitem) + # update from user namespace without overriding special namedtuple attributes + for key in ns: + if key in _prohibited_namedtuple_fields: + raise AttributeError("Cannot overwrite NamedTuple attribute " + key) + elif key not in _special_namedtuple_fields and key not in nm_tpl._fields: + setattr(nm_tpl, key, ns[key]) + if typing.Generic in bases: + nm_tpl.__init_subclass__() + return nm_tpl + + def NamedTuple(__typename, __fields=None, **kwargs): + if __fields is None: + __fields = kwargs.items() + elif kwargs: + raise TypeError("Either list of fields or keywords" + " can be provided to NamedTuple, not both") + return _make_nmtuple(__typename, __fields, module=_caller()) + + NamedTuple.__doc__ = typing.NamedTuple.__doc__ + _NamedTuple = type.__new__(_NamedTupleMeta, 'NamedTuple', (), {}) + + # On 3.8+, alter the signature so that it matches typing.NamedTuple. + # The signature of typing.NamedTuple on >=3.8 is invalid syntax in Python 3.7, + # so just leave the signature as it is on 3.7. + if sys.version_info >= (3, 8): + NamedTuple.__text_signature__ = '(typename, fields=None, /, **kwargs)' + + def _namedtuple_mro_entries(bases): + assert NamedTuple in bases + return (_NamedTuple,) + + NamedTuple.__mro_entries__ = _namedtuple_mro_entries diff --git a/lib/pkg_resources/_vendor/vendored.txt b/lib/pkg_resources/_vendor/vendored.txt index 444ed25b..4cd4ab8c 100644 --- a/lib/pkg_resources/_vendor/vendored.txt +++ b/lib/pkg_resources/_vendor/vendored.txt @@ -1,3 +1,11 @@ -packaging==21.2 -pyparsing==2.2.1 -appdirs==1.4.3 +packaging==23.1 + +platformdirs==2.6.2 +# required for platformdirs on Python < 3.8 +typing_extensions==4.4.0 + +jaraco.text==3.7.0 +# required for jaraco.text on older Pythons +importlib_resources==5.10.2 +# required for importlib_resources on older Pythons +zipp==3.7.0 diff --git a/lib/pkg_resources/_vendor/zipp-3.7.0.dist-info/top_level.txt b/lib/pkg_resources/_vendor/zipp-3.7.0.dist-info/top_level.txt new file mode 100644 index 00000000..e82f676f --- /dev/null +++ b/lib/pkg_resources/_vendor/zipp-3.7.0.dist-info/top_level.txt @@ -0,0 +1 @@ +zipp diff --git a/lib/pkg_resources/_vendor/zipp.py b/lib/pkg_resources/_vendor/zipp.py new file mode 100644 index 00000000..26b723c1 --- /dev/null +++ b/lib/pkg_resources/_vendor/zipp.py @@ -0,0 +1,329 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import sys +import pathlib + +if sys.version_info < (3, 7): + from collections import OrderedDict +else: + OrderedDict = dict + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = OrderedDict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class CompleteDirs(zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(_pathlib_compat(source)) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + +def _pathlib_compat(path): + """ + For path-like objects, convert to a filename for compatibility + on Python 3.6.1 and earlier. + """ + try: + return path.__fspath__() + except AttributeError: + return str(path) + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = zipfile.ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + return io.TextIOWrapper(stream, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + with self.open('r', *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *map(_pathlib_compat, other)) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/lib/pkg_resources/api_tests.txt b/lib/pkg_resources/api_tests.txt deleted file mode 100644 index ded18800..00000000 --- a/lib/pkg_resources/api_tests.txt +++ /dev/null @@ -1,401 +0,0 @@ -Pluggable Distributions of Python Software -========================================== - -Distributions -------------- - -A "Distribution" is a collection of files that represent a "Release" of a -"Project" as of a particular point in time, denoted by a -"Version":: - - >>> import sys, pkg_resources - >>> from pkg_resources import Distribution - >>> Distribution(project_name="Foo", version="1.2") - Foo 1.2 - -Distributions have a location, which can be a filename, URL, or really anything -else you care to use:: - - >>> dist = Distribution( - ... location="http://example.com/something", - ... project_name="Bar", version="0.9" - ... ) - - >>> dist - Bar 0.9 (http://example.com/something) - - -Distributions have various introspectable attributes:: - - >>> dist.location - 'http://example.com/something' - - >>> dist.project_name - 'Bar' - - >>> dist.version - '0.9' - - >>> dist.py_version == '{}.{}'.format(*sys.version_info) - True - - >>> print(dist.platform) - None - -Including various computed attributes:: - - >>> from pkg_resources import parse_version - >>> dist.parsed_version == parse_version(dist.version) - True - - >>> dist.key # case-insensitive form of the project name - 'bar' - -Distributions are compared (and hashed) by version first:: - - >>> Distribution(version='1.0') == Distribution(version='1.0') - True - >>> Distribution(version='1.0') == Distribution(version='1.1') - False - >>> Distribution(version='1.0') < Distribution(version='1.1') - True - -but also by project name (case-insensitive), platform, Python version, -location, etc.:: - - >>> Distribution(project_name="Foo",version="1.0") == \ - ... Distribution(project_name="Foo",version="1.0") - True - - >>> Distribution(project_name="Foo",version="1.0") == \ - ... Distribution(project_name="foo",version="1.0") - True - - >>> Distribution(project_name="Foo",version="1.0") == \ - ... Distribution(project_name="Foo",version="1.1") - False - - >>> Distribution(project_name="Foo",py_version="2.3",version="1.0") == \ - ... Distribution(project_name="Foo",py_version="2.4",version="1.0") - False - - >>> Distribution(location="spam",version="1.0") == \ - ... Distribution(location="spam",version="1.0") - True - - >>> Distribution(location="spam",version="1.0") == \ - ... Distribution(location="baz",version="1.0") - False - - - -Hash and compare distribution by prio/plat - -Get version from metadata -provider capabilities -egg_name() -as_requirement() -from_location, from_filename (w/path normalization) - -Releases may have zero or more "Requirements", which indicate -what releases of another project the release requires in order to -function. A Requirement names the other project, expresses some criteria -as to what releases of that project are acceptable, and lists any "Extras" -that the requiring release may need from that project. (An Extra is an -optional feature of a Release, that can only be used if its additional -Requirements are satisfied.) - - - -The Working Set ---------------- - -A collection of active distributions is called a Working Set. Note that a -Working Set can contain any importable distribution, not just pluggable ones. -For example, the Python standard library is an importable distribution that -will usually be part of the Working Set, even though it is not pluggable. -Similarly, when you are doing development work on a project, the files you are -editing are also a Distribution. (And, with a little attention to the -directory names used, and including some additional metadata, such a -"development distribution" can be made pluggable as well.) - - >>> from pkg_resources import WorkingSet - -A working set's entries are the sys.path entries that correspond to the active -distributions. By default, the working set's entries are the items on -``sys.path``:: - - >>> ws = WorkingSet() - >>> ws.entries == sys.path - True - -But you can also create an empty working set explicitly, and add distributions -to it:: - - >>> ws = WorkingSet([]) - >>> ws.add(dist) - >>> ws.entries - ['http://example.com/something'] - >>> dist in ws - True - >>> Distribution('foo',version="") in ws - False - -And you can iterate over its distributions:: - - >>> list(ws) - [Bar 0.9 (http://example.com/something)] - -Adding the same distribution more than once is a no-op:: - - >>> ws.add(dist) - >>> list(ws) - [Bar 0.9 (http://example.com/something)] - -For that matter, adding multiple distributions for the same project also does -nothing, because a working set can only hold one active distribution per -project -- the first one added to it:: - - >>> ws.add( - ... Distribution( - ... 'http://example.com/something', project_name="Bar", - ... version="7.2" - ... ) - ... ) - >>> list(ws) - [Bar 0.9 (http://example.com/something)] - -You can append a path entry to a working set using ``add_entry()``:: - - >>> ws.entries - ['http://example.com/something'] - >>> ws.add_entry(pkg_resources.__file__) - >>> ws.entries - ['http://example.com/something', '...pkg_resources...'] - -Multiple additions result in multiple entries, even if the entry is already in -the working set (because ``sys.path`` can contain the same entry more than -once):: - - >>> ws.add_entry(pkg_resources.__file__) - >>> ws.entries - ['...example.com...', '...pkg_resources...', '...pkg_resources...'] - -And you can specify the path entry a distribution was found under, using the -optional second parameter to ``add()``:: - - >>> ws = WorkingSet([]) - >>> ws.add(dist,"foo") - >>> ws.entries - ['foo'] - -But even if a distribution is found under multiple path entries, it still only -shows up once when iterating the working set: - - >>> ws.add_entry(ws.entries[0]) - >>> list(ws) - [Bar 0.9 (http://example.com/something)] - -You can ask a WorkingSet to ``find()`` a distribution matching a requirement:: - - >>> from pkg_resources import Requirement - >>> print(ws.find(Requirement.parse("Foo==1.0"))) # no match, return None - None - - >>> ws.find(Requirement.parse("Bar==0.9")) # match, return distribution - Bar 0.9 (http://example.com/something) - -Note that asking for a conflicting version of a distribution already in a -working set triggers a ``pkg_resources.VersionConflict`` error: - - >>> try: - ... ws.find(Requirement.parse("Bar==1.0")) - ... except pkg_resources.VersionConflict as exc: - ... print(str(exc)) - ... else: - ... raise AssertionError("VersionConflict was not raised") - (Bar 0.9 (http://example.com/something), Requirement.parse('Bar==1.0')) - -You can subscribe a callback function to receive notifications whenever a new -distribution is added to a working set. The callback is immediately invoked -once for each existing distribution in the working set, and then is called -again for new distributions added thereafter:: - - >>> def added(dist): print("Added %s" % dist) - >>> ws.subscribe(added) - Added Bar 0.9 - >>> foo12 = Distribution(project_name="Foo", version="1.2", location="f12") - >>> ws.add(foo12) - Added Foo 1.2 - -Note, however, that only the first distribution added for a given project name -will trigger a callback, even during the initial ``subscribe()`` callback:: - - >>> foo14 = Distribution(project_name="Foo", version="1.4", location="f14") - >>> ws.add(foo14) # no callback, because Foo 1.2 is already active - - >>> ws = WorkingSet([]) - >>> ws.add(foo12) - >>> ws.add(foo14) - >>> ws.subscribe(added) - Added Foo 1.2 - -And adding a callback more than once has no effect, either:: - - >>> ws.subscribe(added) # no callbacks - - # and no double-callbacks on subsequent additions, either - >>> just_a_test = Distribution(project_name="JustATest", version="0.99") - >>> ws.add(just_a_test) - Added JustATest 0.99 - - -Finding Plugins ---------------- - -``WorkingSet`` objects can be used to figure out what plugins in an -``Environment`` can be loaded without any resolution errors:: - - >>> from pkg_resources import Environment - - >>> plugins = Environment([]) # normally, a list of plugin directories - >>> plugins.add(foo12) - >>> plugins.add(foo14) - >>> plugins.add(just_a_test) - -In the simplest case, we just get the newest version of each distribution in -the plugin environment:: - - >>> ws = WorkingSet([]) - >>> ws.find_plugins(plugins) - ([JustATest 0.99, Foo 1.4 (f14)], {}) - -But if there's a problem with a version conflict or missing requirements, the -method falls back to older versions, and the error info dict will contain an -exception instance for each unloadable plugin:: - - >>> ws.add(foo12) # this will conflict with Foo 1.4 - >>> ws.find_plugins(plugins) - ([JustATest 0.99, Foo 1.2 (f12)], {Foo 1.4 (f14): VersionConflict(...)}) - -But if you disallow fallbacks, the failed plugin will be skipped instead of -trying older versions:: - - >>> ws.find_plugins(plugins, fallback=False) - ([JustATest 0.99], {Foo 1.4 (f14): VersionConflict(...)}) - - - -Platform Compatibility Rules ----------------------------- - -On the Mac, there are potential compatibility issues for modules compiled -on newer versions of macOS than what the user is running. Additionally, -macOS will soon have two platforms to contend with: Intel and PowerPC. - -Basic equality works as on other platforms:: - - >>> from pkg_resources import compatible_platforms as cp - >>> reqd = 'macosx-10.4-ppc' - >>> cp(reqd, reqd) - True - >>> cp("win32", reqd) - False - -Distributions made on other machine types are not compatible:: - - >>> cp("macosx-10.4-i386", reqd) - False - -Distributions made on earlier versions of the OS are compatible, as -long as they are from the same top-level version. The patchlevel version -number does not matter:: - - >>> cp("macosx-10.4-ppc", reqd) - True - >>> cp("macosx-10.3-ppc", reqd) - True - >>> cp("macosx-10.5-ppc", reqd) - False - >>> cp("macosx-9.5-ppc", reqd) - False - -Backwards compatibility for packages made via earlier versions of -setuptools is provided as well:: - - >>> cp("darwin-8.2.0-Power_Macintosh", reqd) - True - >>> cp("darwin-7.2.0-Power_Macintosh", reqd) - True - >>> cp("darwin-8.2.0-Power_Macintosh", "macosx-10.3-ppc") - False - - -Environment Markers -------------------- - - >>> from pkg_resources import invalid_marker as im, evaluate_marker as em - >>> import os - - >>> print(im("sys_platform")) - Invalid marker: 'sys_platform', parse error at '' - - >>> print(im("sys_platform==")) - Invalid marker: 'sys_platform==', parse error at '' - - >>> print(im("sys_platform=='win32'")) - False - - >>> print(im("sys=='x'")) - Invalid marker: "sys=='x'", parse error at "sys=='x'" - - >>> print(im("(extra)")) - Invalid marker: '(extra)', parse error at ')' - - >>> print(im("(extra")) - Invalid marker: '(extra', parse error at '' - - >>> print(im("os.open('foo')=='y'")) - Invalid marker: "os.open('foo')=='y'", parse error at 'os.open(' - - >>> print(im("'x'=='y' and os.open('foo')=='y'")) # no short-circuit! - Invalid marker: "'x'=='y' and os.open('foo')=='y'", parse error at 'and os.o' - - >>> print(im("'x'=='x' or os.open('foo')=='y'")) # no short-circuit! - Invalid marker: "'x'=='x' or os.open('foo')=='y'", parse error at 'or os.op' - - >>> print(im("'x' < 'y' < 'z'")) - Invalid marker: "'x' < 'y' < 'z'", parse error at "< 'z'" - - >>> print(im("r'x'=='x'")) - Invalid marker: "r'x'=='x'", parse error at "r'x'=='x" - - >>> print(im("'''x'''=='x'")) - Invalid marker: "'''x'''=='x'", parse error at "'x'''=='" - - >>> print(im('"""x"""=="x"')) - Invalid marker: '"""x"""=="x"', parse error at '"x"""=="' - - >>> print(im(r"x\n=='x'")) - Invalid marker: "x\\n=='x'", parse error at "x\\n=='x'" - - >>> print(im("os.open=='y'")) - Invalid marker: "os.open=='y'", parse error at 'os.open=' - - >>> em("sys_platform=='win32'") == (sys.platform=='win32') - True - - >>> em("python_version >= '2.7'") - True - - >>> em("python_version > '2.6'") - True - - >>> im("implementation_name=='cpython'") - False - - >>> im("platform_python_implementation=='CPython'") - False - - >>> im("implementation_version=='3.5.1'") - False diff --git a/lib/pkg_resources/extern/__init__.py b/lib/pkg_resources/extern/__init__.py index fed59295..948bcc60 100644 --- a/lib/pkg_resources/extern/__init__.py +++ b/lib/pkg_resources/extern/__init__.py @@ -58,7 +58,8 @@ class VendorImporter: """Return a module spec for vendored names.""" return ( importlib.util.spec_from_loader(fullname, self) - if self._module_matches_namespace(fullname) else None + if self._module_matches_namespace(fullname) + else None ) def install(self): @@ -69,5 +70,11 @@ class VendorImporter: sys.meta_path.append(self) -names = 'packaging', 'pyparsing', 'appdirs' +names = ( + 'packaging', + 'platformdirs', + 'jaraco', + 'importlib_resources', + 'more_itertools', +) VendorImporter(__name__, names).install() diff --git a/lib/pygazelle/torrent.py b/lib/pygazelle/torrent.py index 52ba13c7..a30fa7ea 100644 --- a/lib/pygazelle/torrent.py +++ b/lib/pygazelle/torrent.py @@ -65,7 +65,7 @@ class Torrent(object): self.snatched = torrent_json_response['torrent']['snatched'] self.time = torrent_json_response['torrent']['time'] self.description = torrent_json_response['torrent']['description'] - self.file_list = [ re.match("(.+){{{(\d+)}}}", item).groups() + self.file_list = [ re.match(r"(.+){{{(\d+)}}}", item).groups() for item in torrent_json_response['torrent']['fileList'].split("|||") ] # tuple ( filename, filesize ) self.file_path = torrent_json_response['torrent']['filePath'] self.user = self.parent_api.get_user(torrent_json_response['torrent']['userId']) @@ -122,7 +122,7 @@ class Torrent(object): self.free_torrent = group_torrent_json_response['freeTorrent'] self.time = group_torrent_json_response['time'] self.description = group_torrent_json_response['description'] - self.file_list = [ re.match("(.+){{{(\d+)}}}", item).groups() + self.file_list = [ re.match(r"(.+){{{(\d+)}}}", item).groups() for item in group_torrent_json_response['fileList'].split("|||") ] # tuple ( filename, filesize ) self.file_path = group_torrent_json_response['filePath'] self.user = self.parent_api.get_user(group_torrent_json_response['userId']) diff --git a/lib/slskd_api/__init__.py b/lib/slskd_api/__init__.py new file mode 100644 index 00000000..49671a41 --- /dev/null +++ b/lib/slskd_api/__init__.py @@ -0,0 +1,18 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .client import SlskdClient, MetricsApi + +__all__ = ('SlskdClient', 'MetricsApi') \ No newline at end of file diff --git a/lib/slskd_api/apis/__init__.py b/lib/slskd_api/apis/__init__.py new file mode 100644 index 00000000..5c749412 --- /dev/null +++ b/lib/slskd_api/apis/__init__.py @@ -0,0 +1,44 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .application import ApplicationApi +from .conversations import ConversationsApi +from .logs import LogsApi +from .options import OptionsApi +from .public_chat import PublicChatApi +from .relay import RelayApi +from .rooms import RoomsApi +from .searches import SearchesApi +from .server import ServerApi +from .session import SessionApi +from .shares import SharesApi +from .transfers import TransfersApi +from .users import UsersApi + +__all__ = ( + 'ApplicationApi', + 'ConversationsApi', + 'LogsApi', + 'OptionsApi', + 'PublicChatApi', + 'RelayApi', + 'RoomsApi', + 'SearchesApi', + 'ServerApi', + 'SessionApi', + 'SharesApi', + 'TransfersApi', + 'UsersApi' +) diff --git a/lib/slskd_api/apis/application.py b/lib/slskd_api/apis/application.py new file mode 100644 index 00000000..aa79f4a8 --- /dev/null +++ b/lib/slskd_api/apis/application.py @@ -0,0 +1,91 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class ApplicationApi(BaseApi): + """ + This class contains the methods to interact with the Application API. + """ + + def state(self) -> dict: + """ + Gets the current state of the application. + """ + url = self.api_url + '/application' + response = self.session.get(url) + return response.json() + + + def stop(self) -> bool: + """ + Stops the application. Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + + :return: True if successful. + """ + url = self.api_url + '/application' + response = self.session.delete(url) + return response.ok + + + def restart(self) -> bool: + """ + Restarts the application. Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + + :return: True if successful. + """ + url = self.api_url + '/application' + response = self.session.put(url) + return response.ok + + + def version(self) -> str: + """ + Gets the current application version. + """ + url = self.api_url + '/application/version' + response = self.session.get(url) + return response.json() + + + def check_updates(self, forceCheck: bool = False) -> dict: + """ + Checks for updates. + """ + url = self.api_url + '/application/version/latest' + params = dict( + forceCheck=forceCheck + ) + response = self.session.get(url, params=params) + return response.json() + + + def gc(self) -> bool: + """ + Forces garbage collection. + + :return: True if successful. + """ + url = self.api_url + '/application/gc' + response = self.session.post(url) + return response.ok + + +# Not supposed to be part of the external API +# More info in the Github discussion: https://github.com/slskd/slskd/discussions/910 + # def dump(self): + # url = self.api_url + '/application/dump' + # response = self.session.get(url) + # return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/base.py b/lib/slskd_api/apis/base.py new file mode 100644 index 00000000..7a2ace31 --- /dev/null +++ b/lib/slskd_api/apis/base.py @@ -0,0 +1,26 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import requests +from urllib.parse import quote + +class BaseApi: + """ + Base class where api-url and headers are set for all requests. + """ + + def __init__(self, api_url: str, session: requests.Session): + self.api_url = api_url + self.session = session \ No newline at end of file diff --git a/lib/slskd_api/apis/conversations.py b/lib/slskd_api/apis/conversations.py new file mode 100644 index 00000000..43557352 --- /dev/null +++ b/lib/slskd_api/apis/conversations.py @@ -0,0 +1,103 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class ConversationsApi(BaseApi): + """ + This class contains the methods to interact with the Conversations API. + """ + + def acknowledge(self, username: str, id: int) -> bool: + """ + Acknowledges the given message id for the given username. + + :return: True if successful. + """ + url = self.api_url + f'/conversations/{quote(username)}/{id}' + response = self.session.put(url) + return response.ok + + + def acknowledge_all(self, username: str) -> bool: + """ + Acknowledges all messages from the given username. + + :return: True if successful. + """ + url = self.api_url + f'/conversations/{quote(username)}' + response = self.session.put(url) + return response.ok + + + def delete(self, username: str) -> bool: + """ + Closes the conversation associated with the given username. + + :return: True if successful. + """ + url = self.api_url + f'/conversations/{quote(username)}' + response = self.session.delete(url) + return response.ok + + + def get(self, username: str, includeMessages: bool = True) -> dict: + """ + Gets the conversation associated with the specified username. + """ + url = self.api_url + f'/conversations/{quote(username)}' + params = dict( + includeMessages=includeMessages + ) + response = self.session.get(url, params=params) + return response.json() + + + def send(self, username: str, message: str) -> bool: + """ + Sends a private message to the specified username. + + :return: True if successful. + """ + url = self.api_url + f'/conversations/{quote(username)}' + response = self.session.post(url, json=message) + return response.ok + + + def get_all(self, includeInactive: bool = False, unAcknowledgedOnly : bool = False) -> list: + """ + Gets all active conversations. + """ + url = self.api_url + '/conversations' + params = dict( + includeInactive=includeInactive, + unAcknowledgedOnly=unAcknowledgedOnly + ) + response = self.session.get(url, params=params) + return response.json() + + + def get_messages(self, username: str, unAcknowledgedOnly : bool = False) -> list: + """ + Gets all messages associated with the specified username. + """ + url = self.api_url + f'/conversations/{quote(username)}/messages' + params = dict( + username=username, + unAcknowledgedOnly=unAcknowledgedOnly + ) + response = self.session.get(url, params=params) + return response.json() + \ No newline at end of file diff --git a/lib/slskd_api/apis/logs.py b/lib/slskd_api/apis/logs.py new file mode 100644 index 00000000..0248e3ab --- /dev/null +++ b/lib/slskd_api/apis/logs.py @@ -0,0 +1,29 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class LogsApi(BaseApi): + """ + This class contains the methods to interact with the Logs API. + """ + + def get(self) -> list: + """ + Gets the last few application logs. + """ + url = self.api_url + '/logs' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/options.py b/lib/slskd_api/apis/options.py new file mode 100644 index 00000000..30f655ad --- /dev/null +++ b/lib/slskd_api/apis/options.py @@ -0,0 +1,93 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class OptionsApi(BaseApi): + """ + This class contains the methods to interact with the Options API. + """ + + def get(self) -> dict: + """ + Gets the current application options. + """ + url = self.api_url + '/options' + response = self.session.get(url) + return response.json() + + + def get_startup(self) -> dict: + """ + Gets the application options provided at startup. + """ + url = self.api_url + '/options/startup' + response = self.session.get(url) + return response.json() + + + def debug(self) -> str: + """ + Gets the debug view of the current application options. + debug and remote_configuration must be set to true. + Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + """ + url = self.api_url + '/options/debug' + response = self.session.get(url) + return response.json() + + + def yaml_location(self) -> str: + """ + Gets the path of the yaml config file. remote_configuration must be set to true. + Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + """ + url = self.api_url + '/options/yaml/location' + response = self.session.get(url) + return response.json() + + + def download_yaml(self) -> str: + """ + Gets the content of the yaml config file as text. remote_configuration must be set to true. + Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + """ + url = self.api_url + '/options/yaml' + response = self.session.get(url) + return response.json() + + + def upload_yaml(self, yaml_content: str) -> bool: + """ + Sets the content of the yaml config file. remote_configuration must be set to true. + Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + + :return: True if successful. + """ + url = self.api_url + '/options/yaml' + response = self.session.post(url, json=yaml_content) + return response.ok + + + def validate_yaml(self, yaml_content: str) -> str: + """ + Validates the provided yaml string. remote_configuration must be set to true. + Only works with token (usr/pwd login). 'Unauthorized' with API-Key. + + :return: Empty string if validation successful. Error message otherwise. + """ + url = self.api_url + '/options/yaml/validate' + response = self.session.post(url, json=yaml_content) + return response.text \ No newline at end of file diff --git a/lib/slskd_api/apis/public_chat.py b/lib/slskd_api/apis/public_chat.py new file mode 100644 index 00000000..0dc17ff3 --- /dev/null +++ b/lib/slskd_api/apis/public_chat.py @@ -0,0 +1,42 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class PublicChatApi(BaseApi): + """ + [UNTESTED] This class contains the methods to interact with the PublicChat API. + """ + + def start(self) -> bool: + """ + Starts public chat. + + :return: True if successful. + """ + url = self.api_url + '/publicchat' + response = self.session.post(url) + return response.ok + + + def stop(self) -> bool: + """ + Stops public chat. + + :return: True if successful. + """ + url = self.api_url + '/publicchat' + response = self.session.delete(url) + return response.ok diff --git a/lib/slskd_api/apis/relay.py b/lib/slskd_api/apis/relay.py new file mode 100644 index 00000000..bd006f2a --- /dev/null +++ b/lib/slskd_api/apis/relay.py @@ -0,0 +1,75 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class RelayApi(BaseApi): + """ + [UNTESTED] This class contains the methods to interact with the Relay API. + """ + + def connect(self) -> bool: + """ + Connects to the configured controller. + + :return: True if successful. + """ + url = self.api_url + '/relay/agent' + response = self.session.put(url) + return response.ok + + + def disconnect(self) -> bool: + """ + Disconnects from the connected controller. + + :return: True if successful. + """ + url = self.api_url + '/relay/agent' + response = self.session.delete(url) + return response.ok + + + def download_file(self, token: str) -> bool: + """ + Downloads a file from the connected controller. + + :return: True if successful. + """ + url = self.api_url + f'/relay/controller/downloads/{token}' + response = self.session.get(url) + return response.ok + + + def upload_file(self, token: str) -> bool: + """ + Uploads a file from the connected controller. + + :return: True if successful. + """ + url = self.api_url + f'/relay/controller/files/{token}' + response = self.session.post(url) + return response.ok + + + def upload_share_info(self, token: str) -> bool: + """ + Uploads share information to the connected controller. + + :return: True if successful. + """ + url = self.api_url + f'/relay/controller/shares/{token}' + response = self.session.post(url) + return response.ok \ No newline at end of file diff --git a/lib/slskd_api/apis/rooms.py b/lib/slskd_api/apis/rooms.py new file mode 100644 index 00000000..98fb1933 --- /dev/null +++ b/lib/slskd_api/apis/rooms.py @@ -0,0 +1,124 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class RoomsApi(BaseApi): + """ + This class contains the methods to interact with the Rooms API. + """ + + def get_all_joined(self) -> list: + """ + Gets all joined rooms. + + :return: Names of the joined rooms. + """ + url = self.api_url + '/rooms/joined' + response = self.session.get(url) + return response.json() + + + def join(self, roomName: str) -> dict: + """ + Joins a room. + + :return: room info: name, isPrivate, users, messages + """ + url = self.api_url + '/rooms/joined' + response = self.session.post(url, json=roomName) + return response.json() + + + def get_joined(self, roomName: str) -> dict: + """ + Gets the specified room. + + :return: room info: name, isPrivate, users, messages + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}' + response = self.session.get(url) + return response.json() + + + def leave(self, roomName: str) -> bool: + """ + Leaves a room. + + :return: True if successful. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}' + response = self.session.delete(url) + return response.ok + + + def send(self, roomName: str, message: str) -> bool: + """ + Sends a message to the specified room. + + :return: True if successful. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}/messages' + response = self.session.post(url, json=message) + return response.ok + + + def get_messages(self, roomName: str) -> list: + """ + Gets the current list of messages for the specified room. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}/messages' + response = self.session.get(url) + return response.json() + + + def set_ticker(self, roomName: str, ticker: str) -> bool: + """ + Sets a ticker for the specified room. + + :return: True if successful. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}/ticker' + response = self.session.post(url, json=ticker) + return response.ok + + + def add_member(self, roomName: str, username: str) -> bool: + """ + Adds a member to a private room. + + :return: True if successful. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}/members' + response = self.session.post(url, json=username) + return response.ok + + + def get_users(self, roomName: str) -> list: + """ + Gets the current list of users for the specified joined room. + """ + url = self.api_url + f'/rooms/joined/{quote(roomName)}/users' + response = self.session.get(url) + return response.json() + + + def get_all(self) -> list: + """ + Gets a list of rooms from the server. + """ + url = self.api_url + '/rooms/available' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/searches.py b/lib/slskd_api/apis/searches.py new file mode 100644 index 00000000..1f521c72 --- /dev/null +++ b/lib/slskd_api/apis/searches.py @@ -0,0 +1,126 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * +import uuid +from typing import Optional + +class SearchesApi(BaseApi): + """ + Class that handles operations on searches. + """ + + def search_text(self, + searchText: str, + id: Optional[str] = None, + fileLimit: int = 10000, + filterResponses: bool = True, + maximumPeerQueueLength: int = 1000000, + minimumPeerUploadSpeed: int = 0, + minimumResponseFileCount: int = 1, + responseLimit: int = 100, + searchTimeout: int = 15000 + ) -> dict: + """ + Performs a search for the specified request. + + :param searchText: Search query + :param id: uuid of the search. One will be generated if None. + :param fileLimit: Max number of file results + :param filterResponses: Filter unreachable users from the results + :param maximumPeerQueueLength: Max queue length + :param minimumPeerUploadSpeed: Min upload speed in bit/s + :param minimumResponseFileCount: Min number of matching files per user + :param responseLimit: Max number of users results + :param searchTimeout: Search timeout in ms + :return: Info about the search (no results!) + """ + + url = self.api_url + '/searches' + + try: + id = str(uuid.UUID(id)) # check if given id is a valid uuid + except: + id = str(uuid.uuid1()) # otherwise generate a new one + + data = { + "id": id, + "fileLimit": fileLimit, + "filterResponses": filterResponses, + "maximumPeerQueueLength": maximumPeerQueueLength, + "minimumPeerUploadSpeed": minimumPeerUploadSpeed, + "minimumResponseFileCount": minimumResponseFileCount, + "responseLimit": responseLimit, + "searchText": searchText, + "searchTimeout": searchTimeout, + } + response = self.session.post(url, json=data) + return response.json() + + + def get_all(self) -> list: + """ + Gets the list of active and completed searches. + """ + url = self.api_url + '/searches' + response = self.session.get(url) + return response.json() + + + def state(self, id: str, includeResponses: bool = False) -> dict: + """ + Gets the state of the search corresponding to the specified id. + + :param id: uuid of the search. + :param includeResponses: Include responses (search result list) in the returned dict + :return: Info about the search + """ + url = self.api_url + f'/searches/{id}' + params = dict( + includeResponses=includeResponses + ) + response = self.session.get(url, params=params) + return response.json() + + + def stop(self, id: str) -> bool: + """ + Stops the search corresponding to the specified id. + + :return: True if successful. + """ + url = self.api_url + f'/searches/{id}' + response = self.session.put(url) + return response.ok + + + def delete(self, id: str): + """ + Deletes the search corresponding to the specified id. + + :return: True if successful. + """ + url = self.api_url + f'/searches/{id}' + response = self.session.delete(url) + return response.ok + + + def search_responses(self, id: str) -> list: + """ + Gets search responses corresponding to the specified id. + """ + url = self.api_url + f'/searches/{id}/responses' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/server.py b/lib/slskd_api/apis/server.py new file mode 100644 index 00000000..5365dde1 --- /dev/null +++ b/lib/slskd_api/apis/server.py @@ -0,0 +1,51 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class ServerApi(BaseApi): + """ + This class contains the methods to interact with the Server API. + """ + + def connect(self) -> bool: + """ + Connects the client. + + :return: True if successful. + """ + url = self.api_url + '/server' + response = self.session.put(url) + return response.ok + + + def disconnect(self) -> bool: + """ + Disconnects the client. + + :return: True if successful. + """ + url = self.api_url + '/server' + response = self.session.delete(url, json='') + return response.ok + + + def state(self) -> dict: + """ + Retrieves the current state of the server. + """ + url = self.api_url + '/server' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/session.py b/lib/slskd_api/apis/session.py new file mode 100644 index 00000000..76e28ed4 --- /dev/null +++ b/lib/slskd_api/apis/session.py @@ -0,0 +1,53 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class SessionApi(BaseApi): + """ + This class contains the methods to interact with the Session API. + """ + + def auth_valid(self) -> bool: + """ + Checks whether the provided authentication is valid. + """ + url = self.api_url + '/session' + response = self.session.get(url) + return response.ok + + + def login(self, username: str, password: str) -> dict: + """ + Logs in. + + :return: Session info for the given user incl. token. + """ + url = self.api_url + '/session' + data = { + 'username': username, + 'password': password + } + response = self.session.post(url, json=data) + return response.json() + + + def security_enabled(self) -> bool: + """ + Checks whether security is enabled. + """ + url = self.api_url + '/session/enabled' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/shares.py b/lib/slskd_api/apis/shares.py new file mode 100644 index 00000000..2e92642d --- /dev/null +++ b/lib/slskd_api/apis/shares.py @@ -0,0 +1,78 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class SharesApi(BaseApi): + """ + This class contains the methods to interact with the Shares API. + """ + + def get_all(self) -> dict: + """ + Gets the current list of shares. + """ + url = self.api_url + '/shares' + response = self.session.get(url) + return response.json() + + + def start_scan(self) -> bool: + """ + Initiates a scan of the configured shares. + + :return: True if successful. + """ + url = self.api_url + '/shares' + response = self.session.put(url) + return response.ok + + + def cancel_scan(self) -> bool: + """ + Cancels a share scan, if one is running. + + :return: True if successful. + """ + url = self.api_url + '/shares' + response = self.session.delete(url) + return response.ok + + + def get(self, id: str) -> dict: + """ + Gets the share associated with the specified id. + """ + url = self.api_url + f'/shares/{id}' + response = self.session.get(url) + return response.json() + + + def all_contents(self) -> list: + """ + Returns a list of all shared directories and files. + """ + url = self.api_url + '/shares/contents' + response = self.session.get(url) + return response.json() + + + def contents(self, id: str) -> list: + """ + Gets the contents of the share associated with the specified id. + """ + url = self.api_url + f'/shares/{id}/contents' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/transfers.py b/lib/slskd_api/apis/transfers.py new file mode 100644 index 00000000..6bbae544 --- /dev/null +++ b/lib/slskd_api/apis/transfers.py @@ -0,0 +1,157 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * +from typing import Union + +class TransfersApi(BaseApi): + """ + This class contains the methods to interact with the Transfers API. + """ + + def cancel_download(self, username: str, id:str, remove: bool = False) -> bool: + """ + Cancels the specified download. + + :return: True if successful. + """ + url = self.api_url + f'/transfers/downloads/{quote(username)}/{id}' + params = dict( + remove=remove + ) + response = self.session.delete(url, params=params) + return response.ok + + + def get_download(self, username: str, id: str) -> dict: + """ + Gets the specified download. + """ + url = self.api_url + f'/transfers/downloads/{quote(username)}/{id}' + response = self.session.get(url) + return response.json() + + + def remove_completed_downloads(self) -> bool: + """ + Removes all completed downloads, regardless of whether they failed or succeeded. + + :return: True if successful. + """ + url = self.api_url + '/transfers/downloads/all/completed' + response = self.session.delete(url) + return response.ok + + + def cancel_upload(self, username: str, id: str, remove: bool = False) -> bool: + """ + Cancels the specified upload. + + :return: True if successful. + """ + url = self.api_url + f'/transfers/uploads/{quote(username)}/{id}' + params = dict( + remove=remove + ) + response = self.session.delete(url, params=params) + return response.ok + + + def get_upload(self, username: str, id: str) -> dict: + """ + Gets the specified upload. + """ + url = self.api_url + f'/transfers/uploads/{quote(username)}/{id}' + response = self.session.get(url) + return response.json() + + + def remove_completed_uploads(self) -> bool: + """ + Removes all completed uploads, regardless of whether they failed or succeeded. + + :return: True if successful. + """ + url = self.api_url + '/transfers/uploads/all/completed' + response = self.session.delete(url) + return response.ok + + + def enqueue(self, username: str, files: list) -> bool: + """ + Enqueues the specified download. + + :param username: User to download from. + :param files: A list of dictionaries in the same form as what's returned + by :py:func:`~slskd_api.apis.SearchesApi.search_responses`: + [{'filename': , 'size': }...] + :return: True if successful. + """ + url = self.api_url + f'/transfers/downloads/{quote(username)}' + response = self.session.post(url, json=files) + return response.ok + + + def get_downloads(self, username: str) -> dict: + """ + Gets all downloads for the specified username. + """ + url = self.api_url + f'/transfers/downloads/{quote(username)}' + response = self.session.get(url) + return response.json() + + + def get_all_downloads(self, includeRemoved: bool = False) -> list: + """ + Gets all downloads. + """ + url = self.api_url + '/transfers/downloads/' + params = dict( + includeRemoved=includeRemoved + ) + response = self.session.get(url, params=params) + return response.json() + + + def get_queue_position(self, username: str, id: str) -> Union[int,str]: + """ + Gets the download for the specified username matching the specified filename, and requests the current place in the remote queue of the specified download. + + :return: Queue position or error message + """ + url = self.api_url + f'/transfers/downloads/{quote(username)}/{id}/position' + response = self.session.get(url) + return response.json() + + + def get_all_uploads(self, includeRemoved: bool = False) -> list: + """ + Gets all uploads. + """ + url = self.api_url + '/transfers/uploads/' + params = dict( + includeRemoved=includeRemoved + ) + response = self.session.get(url, params=params) + return response.json() + + + def get_uploads(self, username: str) -> dict: + """ + Gets all uploads for the specified username. + """ + url = self.api_url + f'/transfers/uploads/{quote(username)}' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/apis/users.py b/lib/slskd_api/apis/users.py new file mode 100644 index 00000000..82a1b9bf --- /dev/null +++ b/lib/slskd_api/apis/users.py @@ -0,0 +1,79 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .base import * + +class UsersApi(BaseApi): + """ + This class contains the methods to interact with the Users API. + """ + + def address(self, username: str) -> dict: + """ + Retrieves the address of the specified username. + """ + url = self.api_url + f'/users/{quote(username)}/endpoint' + response = self.session.get(url) + return response.json() + + + def browse(self, username: str) -> dict: + """ + Retrieves the files shared by the specified username. + """ + url = self.api_url + f'/users/{quote(username)}/browse' + response = self.session.get(url) + return response.json() + + + def browsing_status(self, username: str) -> dict: + """ + Retrieves the status of the current browse operation for the specified username, if any. + Will return error 404 if called after the browsing operation has ended. + Best called asynchronously while :py:func:`browse` is still running. + """ + url = self.api_url + f'/users/{quote(username)}/browse/status' + response = self.session.get(url) + return response.json() + + + def directory(self, username: str, directory: str) -> dict: + """ + Retrieves the files from the specified directory from the specified username. + """ + url = self.api_url + f'/users/{quote(username)}/directory' + data = { + "directory": directory + } + response = self.session.post(url, json=data) + return response.json() + + + def info(self, username: str) -> dict: + """ + Retrieves information about the specified username. + """ + url = self.api_url + f'/users/{quote(username)}/info' + response = self.session.get(url) + return response.json() + + + def status(self, username: str) -> dict: + """ + Retrieves status for the specified username. + """ + url = self.api_url + f'/users/{quote(username)}/status' + response = self.session.get(url) + return response.json() \ No newline at end of file diff --git a/lib/slskd_api/client.py b/lib/slskd_api/client.py new file mode 100644 index 00000000..961a6839 --- /dev/null +++ b/lib/slskd_api/client.py @@ -0,0 +1,123 @@ +# Copyright (C) 2023 bigoulours +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +API_VERSION = 'v0' + +import requests +from urllib.parse import urljoin +from functools import reduce +from base64 import b64encode +from slskd_api.apis import * + + +class HTTPAdapterTimeout(requests.adapters.HTTPAdapter): + def __init__(self, timeout=None, **kwargs): + super().__init__(**kwargs) + self.timeout = timeout + + def send(self, *args, **kwargs): + kwargs['timeout'] = self.timeout + return super().send(*args, **kwargs) + + +class SlskdClient: + """ + The main class that allows access to the different APIs of a slskd instance. + An API-Key with appropriate permissions (`readwrite` for most use cases) must be set in slskd config file. + Alternatively, provide your username and password. Requests error status raise corresponding error. + Usage:: + slskd = slskd_api.SlskdClient(host, api_key, url_base) + app_status = slskd.application.state() + """ + + def __init__(self, + host: str, + api_key: str = None, + url_base: str = '/', + username: str = None, + password: str = None, + token: str = None, + verify_ssl: bool = True, + timeout: float = None # requests timeout in seconds + ): + api_url = reduce(urljoin, [f'{host}/', f'{url_base}/', f'api/{API_VERSION}']) + + session = requests.Session() + session.adapters['http://'] = HTTPAdapterTimeout(timeout=timeout) + session.adapters['https://'] = HTTPAdapterTimeout(timeout=timeout) + session.hooks = {'response': lambda r, *args, **kwargs: r.raise_for_status()} + session.headers.update({'accept': '*/*'}) + session.verify = verify_ssl + + header = {} + + if api_key: + header['X-API-Key'] = api_key + elif username and password: + header['Authorization'] = 'Bearer ' + \ + SessionApi(api_url, session).login(username, password).get('token', '') + elif token: + header['Authorization'] = 'Bearer ' + token + else: + raise ValueError('Please provide an API-Key, a valid token or username/password.') + + session.headers.update(header) + + base_args = (api_url, session) + + self.application = ApplicationApi(*base_args) + self.conversations = ConversationsApi(*base_args) + self.logs = LogsApi(*base_args) + self.options = OptionsApi(*base_args) + self.public_chat = PublicChatApi(*base_args) + self.relay = RelayApi(*base_args) + self.rooms = RoomsApi(*base_args) + self.searches = SearchesApi(*base_args) + self.server = ServerApi(*base_args) + self.session = SessionApi(*base_args) + self.shares = SharesApi(*base_args) + self.transfers = TransfersApi(*base_args) + self.users = UsersApi(*base_args) + + +class MetricsApi: + """ + Getting the metrics works with a different endpoint. Default: :5030/metrics. + Metrics should be first activated in slskd config file. + User/pass is independent from the main application and default value (slskd:slskd) should be changed. + Usage:: + metrics_api = slskd_api.MetricsApi(host, metrics_usr='slskd', metrics_pwd='slskd') + metrics = metrics_api.get() + """ + + def __init__(self, + host: str, + metrics_usr: str = 'slskd', + metrics_pwd: str = 'slskd', + metrics_url_base: str = '/metrics' + ): + self.metrics_url = urljoin(host, metrics_url_base) + basic_auth = b64encode(bytes(f'{metrics_usr}:{metrics_pwd}', 'utf-8')) + self.header = { + 'accept': '*/*', + 'Authorization': f'Basic {basic_auth.decode()}' + } + + def get(self) -> str: + """ + Gets the Prometheus metrics as text. + """ + response = requests.get(self.metrics_url, headers=self.header) + return response.text diff --git a/lib/tzlocal/LICENSE.txt b/lib/tzlocal/LICENSE.txt deleted file mode 100644 index 0e259d42..00000000 --- a/lib/tzlocal/LICENSE.txt +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. diff --git a/lib/tzlocal/README.rst b/lib/tzlocal/README.rst deleted file mode 100644 index 09e9bfe8..00000000 --- a/lib/tzlocal/README.rst +++ /dev/null @@ -1,80 +0,0 @@ -tzlocal -======= - -This Python module returns a `tzinfo` object with the local timezone information under Unix and Win-32. -It requires `pytz`, and returns `pytz` `tzinfo` objects. - -This module attempts to fix a glaring hole in `pytz`, that there is no way to -get the local timezone information, unless you know the zoneinfo name, and -under several Linux distros that's hard or impossible to figure out. - -Also, with Windows different timezone system using pytz isn't of much use -unless you separately configure the zoneinfo timezone name. - -With `tzlocal` you only need to call `get_localzone()` and you will get a -`tzinfo` object with the local time zone info. On some Unices you will still -not get to know what the timezone name is, but you don't need that when you -have the tzinfo file. However, if the timezone name is readily available it -will be used. - - -Supported systems ------------------ - -These are the systems that are in theory supported: - - * Windows 2000 and later - - * Any unix-like system with a /etc/localtime or /usr/local/etc/localtime - -If you have one of the above systems and it does not work, it's a bug. -Please report it. - - -Usage ------ - -Load the local timezone: - - >>> from tzlocal import get_localzone - >>> tz = get_localzone() - >>> tz - - -Create a local datetime: - - >>> from datetime import datetime - >>> dt = tz.localize(datetime.now()) - >>> dt - datetime.datetime(2012, 9, 11, 14, 43, 42, 518871, tzinfo=) - -Lookup another timezone with `pytz`: - - >>> import pytz - >>> eastern = pytz.timezone('US/Eastern') - -Convert the datetime: - - >>> dt.astimezone(eastern) - datetime.datetime(2012, 9, 11, 8, 43, 42, 518871, tzinfo=) - - -Maintainer ----------- - -* Lennart Regebro, regebro@gmail.com - -Contributors ------------- - -* Marc Van Olmen -* Benjamen Meyer -* Manuel Ebert -* Xiaokun Zhu - -(Sorry if I forgot someone) - -License -------- - -* CC0 1.0 Universal http://creativecommons.org/publicdomain/zero/1.0/ diff --git a/lib/tzlocal/__init__.py b/lib/tzlocal/__init__.py index df7a66b9..8296a15d 100644 --- a/lib/tzlocal/__init__.py +++ b/lib/tzlocal/__init__.py @@ -1,7 +1,19 @@ import sys -if sys.platform == 'win32': - from tzlocal.win32 import get_localzone, reload_localzone -elif 'darwin' in sys.platform: - from tzlocal.darwin import get_localzone, reload_localzone + +if sys.platform == "win32": + from tzlocal.win32 import ( + get_localzone, + get_localzone_name, + reload_localzone, + ) else: - from tzlocal.unix import get_localzone, reload_localzone + from tzlocal.unix import get_localzone, get_localzone_name, reload_localzone + +from tzlocal.utils import assert_tz_offset + +__all__ = [ + "get_localzone", + "get_localzone_name", + "reload_localzone", + "assert_tz_offset", +] diff --git a/lib/tzlocal/darwin.py b/lib/tzlocal/darwin.py deleted file mode 100644 index a3a79eac..00000000 --- a/lib/tzlocal/darwin.py +++ /dev/null @@ -1,27 +0,0 @@ - -import os -import pytz - -_cache_tz = None - -def _get_localzone(): - tzname = os.popen("systemsetup -gettimezone").read().replace("Time Zone: ", "").strip() - if not tzname or tzname not in pytz.all_timezones_set: - # link will be something like /usr/share/zoneinfo/America/Los_Angeles. - link = os.readlink("/etc/localtime") - tzname = link[link.rfind("zoneinfo/") + 9:] - return pytz.timezone(tzname) - -def get_localzone(): - """Get the computers configured local timezone, if any.""" - global _cache_tz - if _cache_tz is None: - _cache_tz = _get_localzone() - return _cache_tz - -def reload_localzone(): - """Reload the cached localzone. You need to call this if the timezone has changed.""" - global _cache_tz - _cache_tz = _get_localzone() - return _cache_tz - diff --git a/lib/tzlocal/py.typed b/lib/tzlocal/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/tzlocal/tests.py b/lib/tzlocal/tests.py deleted file mode 100644 index 49dd0aef..00000000 --- a/lib/tzlocal/tests.py +++ /dev/null @@ -1,64 +0,0 @@ -import sys -import os -from datetime import datetime -import unittest -import pytz -import tzlocal.unix - -class TzLocalTests(unittest.TestCase): - - def test_env(self): - tz_harare = tzlocal.unix._tz_from_env(':Africa/Harare') - self.assertEqual(tz_harare.zone, 'Africa/Harare') - - # Some Unices allow this as well, so we must allow it: - tz_harare = tzlocal.unix._tz_from_env('Africa/Harare') - self.assertEqual(tz_harare.zone, 'Africa/Harare') - - local_path = os.path.split(__file__)[0] - tz_local = tzlocal.unix._tz_from_env(':' + os.path.join(local_path, 'test_data', 'Harare')) - self.assertEqual(tz_local.zone, 'local') - # Make sure the local timezone is the same as the Harare one above. - # We test this with a past date, so that we don't run into future changes - # of the Harare timezone. - dt = datetime(2012, 1, 1, 5) - self.assertEqual(tz_harare.localize(dt), tz_local.localize(dt)) - - # Non-zoneinfo timezones are not supported in the TZ environment. - self.assertRaises(pytz.UnknownTimeZoneError, tzlocal.unix._tz_from_env, 'GMT+03:00') - - def test_timezone(self): - # Most versions of Ubuntu - local_path = os.path.split(__file__)[0] - tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'timezone')) - self.assertEqual(tz.zone, 'Africa/Harare') - - def test_zone_setting(self): - # A ZONE setting in /etc/sysconfig/clock, f ex CentOS - local_path = os.path.split(__file__)[0] - tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'zone_setting')) - self.assertEqual(tz.zone, 'Africa/Harare') - - def test_timezone_setting(self): - # A ZONE setting in /etc/conf.d/clock, f ex Gentoo - local_path = os.path.split(__file__)[0] - tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'timezone_setting')) - self.assertEqual(tz.zone, 'Africa/Harare') - - def test_only_localtime(self): - local_path = os.path.split(__file__)[0] - tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'localtime')) - self.assertEqual(tz.zone, 'local') - dt = datetime(2012, 1, 1, 5) - self.assertEqual(pytz.timezone('Africa/Harare').localize(dt), tz.localize(dt)) - -if sys.platform == 'win32': - - import tzlocal.win32 - class TzWin32Tests(unittest.TestCase): - - def test_win32(self): - tzlocal.win32.get_localzone() - -if __name__ == '__main__': - unittest.main() diff --git a/lib/tzlocal/unix.py b/lib/tzlocal/unix.py index c0d5eb46..117c18d3 100644 --- a/lib/tzlocal/unix.py +++ b/lib/tzlocal/unix.py @@ -1,115 +1,231 @@ - +import logging import os import re -import pytz +import sys +import warnings +from datetime import timezone + +from tzlocal import utils + +if sys.version_info >= (3, 9): + import zoneinfo # pragma: no cover +else: + from backports import zoneinfo # pragma: no cover _cache_tz = None +_cache_tz_name = None -def _tz_from_env(tzenv): - if tzenv[0] == ':': - tzenv = tzenv[1:] +log = logging.getLogger("tzlocal") - # TZ specifies a file - if os.path.exists(tzenv): - with open(tzenv, 'rb') as tzfile: - return pytz.tzfile.build_tzinfo('local', tzfile) - # TZ specifies a zoneinfo zone. - try: - tz = pytz.timezone(tzenv) - # That worked, so we return this: - return tz - except pytz.UnknownTimeZoneError: - raise pytz.UnknownTimeZoneError( - "tzlocal() does not support non-zoneinfo timezones like %s. \n" - "Please use a timezone in the form of Continent/City") - -def _get_localzone(_root='/'): +def _get_localzone_name(_root="/"): """Tries to find the local timezone configuration. - This method prefers finding the timezone name and passing that to pytz, - over passing in the localtime file, as in the later case the zoneinfo - name is unknown. + This method finds the timezone name, if it can, or it returns None. The parameter _root makes the function look for files like /etc/localtime beneath the _root directory. This is primarily used by the tests. In normal usage you call the function without parameters.""" - tzenv = os.environ.get('TZ') + # First try the ENV setting. + tzenv = utils._tz_name_from_env() if tzenv: + return tzenv + + # Are we under Termux on Android? + if os.path.exists(os.path.join(_root, "system/bin/getprop")): + log.debug("This looks like Termux") + + import subprocess + try: - return _tz_from_env(tzenv) - except pytz.UnknownTimeZoneError: + androidtz = ( + subprocess.check_output(["getprop", "persist.sys.timezone"]) + .strip() + .decode() + ) + return androidtz + except (OSError, subprocess.CalledProcessError): + # proot environment or failed to getprop + log.debug("It's not termux?") pass # Now look for distribution specific configuration files # that contain the timezone name. - tzpath = os.path.join(_root, 'etc/timezone') - if os.path.exists(tzpath): - with open(tzpath, 'rb') as tzfile: - data = tzfile.read() - # Issue #3 was that /etc/timezone was a zoneinfo file. - # That's a misconfiguration, but we need to handle it gracefully: - if data[:5] != 'TZif2': - etctz = data.strip().decode() - # Get rid of host definitions and comments: - if ' ' in etctz: - etctz, dummy = etctz.split(' ', 1) - if '#' in etctz: - etctz, dummy = etctz.split('#', 1) - return pytz.timezone(etctz.replace(' ', '_')) + # Stick all of them in a dict, to compare later. + found_configs = {} + + for configfile in ("etc/timezone", "var/db/zoneinfo"): + tzpath = os.path.join(_root, configfile) + try: + with open(tzpath) as tzfile: + data = tzfile.read() + log.debug(f"{tzpath} found, contents:\n {data}") + + etctz = data.strip("/ \t\r\n") + if not etctz: + # Empty file, skip + continue + for etctz in etctz.splitlines(): + # Get rid of host definitions and comments: + if " " in etctz: + etctz, dummy = etctz.split(" ", 1) + if "#" in etctz: + etctz, dummy = etctz.split("#", 1) + if not etctz: + continue + + found_configs[tzpath] = etctz.replace(" ", "_") + + except (OSError, UnicodeDecodeError): + # File doesn't exist or is a directory, or it's a binary file. + continue # CentOS has a ZONE setting in /etc/sysconfig/clock, # OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and # Gentoo has a TIMEZONE setting in /etc/conf.d/clock # We look through these files for a timezone: - zone_re = re.compile('\s*ZONE\s*=\s*\"') - timezone_re = re.compile('\s*TIMEZONE\s*=\s*\"') - end_re = re.compile('\"') + zone_re = re.compile(r"\s*ZONE\s*=\s*\"") + timezone_re = re.compile(r"\s*TIMEZONE\s*=\s*\"") + end_re = re.compile('"') - for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'): + for filename in ("etc/sysconfig/clock", "etc/conf.d/clock"): tzpath = os.path.join(_root, filename) - if not os.path.exists(tzpath): + try: + with open(tzpath, "rt") as tzfile: + data = tzfile.readlines() + log.debug(f"{tzpath} found, contents:\n {data}") + + for line in data: + # Look for the ZONE= setting. + match = zone_re.match(line) + if match is None: + # No ZONE= setting. Look for the TIMEZONE= setting. + match = timezone_re.match(line) + if match is not None: + # Some setting existed + line = line[match.end() :] + etctz = line[: end_re.search(line).start()] + + # We found a timezone + found_configs[tzpath] = etctz.replace(" ", "_") + + except (OSError, UnicodeDecodeError): + # UnicodeDecode handles when clock is symlink to /etc/localtime continue - with open(tzpath, 'rt') as tzfile: - data = tzfile.readlines() - for line in data: - # Look for the ZONE= setting. - match = zone_re.match(line) - if match is None: - # No ZONE= setting. Look for the TIMEZONE= setting. - match = timezone_re.match(line) - if match is not None: - # Some setting existed - line = line[match.end():] - etctz = line[:end_re.search(line).start()] + # systemd distributions use symlinks that include the zone name, + # see manpage of localtime(5) and timedatectl(1) + tzpath = os.path.join(_root, "etc/localtime") + if os.path.exists(tzpath) and os.path.islink(tzpath): + log.debug(f"{tzpath} found") + etctz = os.path.realpath(tzpath) + start = etctz.find("/") + 1 + while start != 0: + etctz = etctz[start:] + try: + zoneinfo.ZoneInfo(etctz) + tzinfo = f"{tzpath} is a symlink to" + found_configs[tzinfo] = etctz.replace(" ", "_") + # Only need first valid relative path in simlink. + break + except zoneinfo.ZoneInfoNotFoundError: + pass + start = etctz.find("/") + 1 - # We found a timezone - return pytz.timezone(etctz.replace(' ', '_')) + if len(found_configs) > 0: + log.debug(f"{len(found_configs)} found:\n {found_configs}") + # We found some explicit config of some sort! + if len(found_configs) > 1: + # Uh-oh, multiple configs. See if they match: + unique_tzs = set() + zoneinfopath = os.path.join(_root, "usr", "share", "zoneinfo") + directory_depth = len(zoneinfopath.split(os.path.sep)) - # No explicit setting existed. Use localtime - for filename in ('etc/localtime', 'usr/local/etc/localtime'): - tzpath = os.path.join(_root, filename) + for tzname in found_configs.values(): + # Look them up in /usr/share/zoneinfo, and find what they + # really point to: + path = os.path.realpath(os.path.join(zoneinfopath, *tzname.split("/"))) + real_zone_name = "/".join(path.split(os.path.sep)[directory_depth:]) + unique_tzs.add(real_zone_name) - if not os.path.exists(tzpath): - continue - with open(tzpath, 'rb') as tzfile: - return pytz.tzfile.build_tzinfo('local', tzfile) + if len(unique_tzs) != 1: + message = "Multiple conflicting time zone configurations found:\n" + for key, value in found_configs.items(): + message += f"{key}: {value}\n" + message += "Fix the configuration, or set the time zone in a TZ environment variable.\n" + raise zoneinfo.ZoneInfoNotFoundError(message) - raise pytz.UnknownTimeZoneError('Can not find any timezone configuration') + # We found exactly one config! Use it. + return list(found_configs.values())[0] -def get_localzone(): + +def _get_localzone(_root="/"): + """Creates a timezone object from the timezone name. + + If there is no timezone config, it will try to create a file from the + localtime timezone, and if there isn't one, it will default to UTC. + + The parameter _root makes the function look for files like /etc/localtime + beneath the _root directory. This is primarily used by the tests. + In normal usage you call the function without parameters.""" + + # First try the ENV setting. + tzenv = utils._tz_from_env() + if tzenv: + return tzenv + + tzname = _get_localzone_name(_root) + if tzname is None: + # No explicit setting existed. Use localtime + log.debug("No explicit setting existed. Use localtime") + for filename in ("etc/localtime", "usr/local/etc/localtime"): + tzpath = os.path.join(_root, filename) + + if not os.path.exists(tzpath): + continue + with open(tzpath, "rb") as tzfile: + tz = zoneinfo.ZoneInfo.from_file(tzfile, key="local") + break + else: + warnings.warn("Can not find any timezone configuration, defaulting to UTC.") + tz = timezone.utc + else: + tz = zoneinfo.ZoneInfo(tzname) + + if _root == "/": + # We are using a file in etc to name the timezone. + # Verify that the timezone specified there is actually used: + utils.assert_tz_offset(tz, error=False) + return tz + + +def get_localzone_name() -> str: + """Get the computers configured local timezone name, if any.""" + global _cache_tz_name + if _cache_tz_name is None: + _cache_tz_name = _get_localzone_name() + + return _cache_tz_name + + +def get_localzone() -> zoneinfo.ZoneInfo: """Get the computers configured local timezone, if any.""" + global _cache_tz if _cache_tz is None: _cache_tz = _get_localzone() + return _cache_tz -def reload_localzone(): + +def reload_localzone() -> zoneinfo.ZoneInfo: """Reload the cached localzone. You need to call this if the timezone has changed.""" + global _cache_tz_name global _cache_tz + _cache_tz_name = _get_localzone_name() _cache_tz = _get_localzone() + return _cache_tz diff --git a/lib/tzlocal/utils.py b/lib/tzlocal/utils.py new file mode 100644 index 00000000..3990535f --- /dev/null +++ b/lib/tzlocal/utils.py @@ -0,0 +1,112 @@ +import calendar +import datetime +import logging +import os +import time +import warnings + +try: + import zoneinfo # pragma: no cover +except ImportError: + from backports import zoneinfo # pragma: no cover + +from tzlocal import windows_tz + +log = logging.getLogger("tzlocal") + + +def get_tz_offset(tz): + """Get timezone's offset using built-in function datetime.utcoffset().""" + return int(datetime.datetime.now(tz).utcoffset().total_seconds()) + + +def assert_tz_offset(tz, error=True): + """Assert that system's timezone offset equals to the timezone offset found. + + If they don't match, we probably have a misconfiguration, for example, an + incorrect timezone set in /etc/timezone file in systemd distributions. + + If error is True, this method will raise a ValueError, otherwise it will + emit a warning. + """ + + tz_offset = get_tz_offset(tz) + system_offset = calendar.timegm(time.localtime()) - calendar.timegm(time.gmtime()) + # No one has timezone offsets less than a minute, so this should be close enough: + if abs(tz_offset - system_offset) > 60: + msg = ( + f"Timezone offset does not match system offset: {tz_offset} != {system_offset}. " + "Please, check your config files." + ) + if error: + raise ValueError(msg) + warnings.warn(msg) + + +def _tz_name_from_env(tzenv=None): + if tzenv is None: + tzenv = os.environ.get("TZ") + + if not tzenv: + return None + + log.debug(f"Found a TZ environment: {tzenv}") + + if tzenv[0] == ":": + tzenv = tzenv[1:] + + if tzenv in windows_tz.tz_win: + # Yup, it's a timezone + return tzenv + + if os.path.isabs(tzenv) and os.path.exists(tzenv): + # It's a file specification, expand it, if possible + parts = os.path.realpath(tzenv).split(os.sep) + + # Is it a zone info zone? + possible_tz = "/".join(parts[-2:]) + if possible_tz in windows_tz.tz_win: + # Yup, it is + return possible_tz + + # Maybe it's a short one, like UTC? + if parts[-1] in windows_tz.tz_win: + # Indeed + return parts[-1] + + log.debug("TZ does not contain a time zone name") + return None + + +def _tz_from_env(tzenv=None): + if tzenv is None: + tzenv = os.environ.get("TZ") + + if not tzenv: + return None + + # Some weird format that exists: + if tzenv[0] == ":": + tzenv = tzenv[1:] + + # TZ specifies a file + if os.path.isabs(tzenv) and os.path.exists(tzenv): + # Try to see if we can figure out the name + tzname = _tz_name_from_env(tzenv) + if not tzname: + # Nope, not a standard timezone name, just take the filename + tzname = tzenv.split(os.sep)[-1] + with open(tzenv, "rb") as tzfile: + return zoneinfo.ZoneInfo.from_file(tzfile, key=tzname) + + # TZ must specify a zoneinfo zone. + try: + tz = zoneinfo.ZoneInfo(tzenv) + # That worked, so we return this: + return tz + except zoneinfo.ZoneInfoNotFoundError: + # Nope, it's something like "PST4DST" etc, we can't handle that. + raise zoneinfo.ZoneInfoNotFoundError( + f"tzlocal() does not support non-zoneinfo timezones like {tzenv}. \n" + "Please use a timezone in the form of Continent/City" + ) from None diff --git a/lib/tzlocal/win32.py b/lib/tzlocal/win32.py index e80c916a..2fa59fe6 100644 --- a/lib/tzlocal/win32.py +++ b/lib/tzlocal/win32.py @@ -1,72 +1,78 @@ +import logging +from datetime import datetime + try: - import winreg as winreg + import _winreg as winreg except ImportError: import winreg +try: + import zoneinfo # pragma: no cover +except ImportError: + from backports import zoneinfo # pragma: no cover + +from tzlocal import utils from tzlocal.windows_tz import win_tz -import pytz _cache_tz = None +_cache_tz_name = None + +log = logging.getLogger("tzlocal") + def valuestodict(key): """Convert a registry key's values to a dictionary.""" - dict = {} + result = {} size = winreg.QueryInfoKey(key)[1] for i in range(size): data = winreg.EnumValue(key, i) - dict[data[0]] = data[1] - return dict + result[data[0]] = data[1] + return result -def get_localzone_name(): + +def _get_dst_info(tz): + # Find the offset for when it doesn't have DST: + dst_offset = std_offset = None + has_dst = False + year = datetime.now().year + for dt in (datetime(year, 1, 1), datetime(year, 6, 1)): + if tz.dst(dt).total_seconds() == 0.0: + # OK, no DST during winter, get this offset + std_offset = tz.utcoffset(dt).total_seconds() + else: + has_dst = True + + return has_dst, std_offset, dst_offset + + +def _get_localzone_name(): # Windows is special. It has unique time zone names (in several # meanings of the word) available, but unfortunately, they can be # translated to the language of the operating system, so we need to # do a backwards lookup, by going through all time zones and see which # one matches. + tzenv = utils._tz_name_from_env() + if tzenv: + return tzenv + + log.debug("Looking up time zone info from registry") handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation" localtz = winreg.OpenKey(handle, TZLOCALKEYNAME) keyvalues = valuestodict(localtz) localtz.Close() - if 'TimeZoneKeyName' in keyvalues: - # Windows 7 (and Vista?) + + if "TimeZoneKeyName" in keyvalues: + # Windows 7 and later # For some reason this returns a string with loads of NUL bytes at # least on some systems. I don't know if this is a bug somewhere, I # just work around it. - tzkeyname = keyvalues['TimeZoneKeyName'].split('\x00', 1)[0] + tzkeyname = keyvalues["TimeZoneKeyName"].split("\x00", 1)[0] else: - # Windows 2000 or XP - - # This is the localized name: - tzwin = keyvalues['StandardName'] - - # Open the list of timezones to look up the real name: - TZKEYNAME = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones" - tzkey = winreg.OpenKey(handle, TZKEYNAME) - - # Now, match this value to Time Zone information - tzkeyname = None - for i in range(winreg.QueryInfoKey(tzkey)[0]): - subkey = winreg.EnumKey(tzkey, i) - sub = winreg.OpenKey(tzkey, subkey) - data = valuestodict(sub) - sub.Close() - try: - if data['Std'] == tzwin: - tzkeyname = subkey - break - except KeyError: - # This timezone didn't have proper configuration. - # Ignore it. - pass - - tzkey.Close() - handle.Close() - - if tzkeyname is None: - raise LookupError('Can not find Windows timezone configuration') + # Don't support XP any longer + raise LookupError("Can not find Windows timezone configuration") timezone = win_tz.get(tzkeyname) if timezone is None: @@ -76,18 +82,66 @@ def get_localzone_name(): # Return what we have. if timezone is None: - raise pytz.UnknownTimeZoneError('Can not find timezone ' + tzkeyname) + raise zoneinfo.ZoneInfoNotFoundError(tzkeyname) + + if keyvalues.get("DynamicDaylightTimeDisabled", 0) == 1: + # DST is disabled, so don't return the timezone name, + # instead return Etc/GMT+offset + + tz = zoneinfo.ZoneInfo(timezone) + has_dst, std_offset, dst_offset = _get_dst_info(tz) + if not has_dst: + # The DST is turned off in the windows configuration, + # but this timezone doesn't have DST so it doesn't matter + return timezone + + if std_offset is None: + raise zoneinfo.ZoneInfoNotFoundError( + f"{tzkeyname} claims to not have a non-DST time!?" + ) + + if std_offset % 3600: + # I can't convert this to an hourly offset + raise zoneinfo.ZoneInfoNotFoundError( + f"tzlocal can't support disabling DST in the {timezone} zone." + ) + + # This has whole hours as offset, return it as Etc/GMT + return f"Etc/GMT{-std_offset//3600:+.0f}" return timezone -def get_localzone(): + +def get_localzone_name() -> str: + """Get the zoneinfo timezone name that matches the Windows-configured timezone.""" + global _cache_tz_name + if _cache_tz_name is None: + _cache_tz_name = _get_localzone_name() + + return _cache_tz_name + + +def get_localzone() -> zoneinfo.ZoneInfo: """Returns the zoneinfo-based tzinfo object that matches the Windows-configured timezone.""" + global _cache_tz if _cache_tz is None: - _cache_tz = pytz.timezone(get_localzone_name()) + _cache_tz = zoneinfo.ZoneInfo(get_localzone_name()) + + if not utils._tz_name_from_env(): + # If the timezone does NOT come from a TZ environment variable, + # verify that it's correct. If it's from the environment, + # we accept it, this is so you can run tests with different timezones. + utils.assert_tz_offset(_cache_tz, error=False) + return _cache_tz -def reload_localzone(): + +def reload_localzone() -> zoneinfo.ZoneInfo: """Reload the cached localzone. You need to call this if the timezone has changed.""" global _cache_tz - _cache_tz = pytz.timezone(get_localzone_name()) + global _cache_tz_name + _cache_tz_name = _get_localzone_name() + _cache_tz = zoneinfo.ZoneInfo(_cache_tz_name) + utils.assert_tz_offset(_cache_tz, error=False) + return _cache_tz diff --git a/lib/tzlocal/windows_tz.py b/lib/tzlocal/windows_tz.py index 0790bb48..3d475760 100644 --- a/lib/tzlocal/windows_tz.py +++ b/lib/tzlocal/windows_tz.py @@ -1,542 +1,736 @@ -# This file is autogenerated by the get_windows_info.py script +# This file is autogenerated by the update_windows_mapping.py script # Do not edit. -win_tz = {'AUS Central Standard Time': 'Australia/Darwin', - 'AUS Eastern Standard Time': 'Australia/Sydney', - 'Afghanistan Standard Time': 'Asia/Kabul', - 'Alaskan Standard Time': 'America/Anchorage', - 'Arab Standard Time': 'Asia/Riyadh', - 'Arabian Standard Time': 'Asia/Dubai', - 'Arabic Standard Time': 'Asia/Baghdad', - 'Argentina Standard Time': 'America/Buenos_Aires', - 'Atlantic Standard Time': 'America/Halifax', - 'Azerbaijan Standard Time': 'Asia/Baku', - 'Azores Standard Time': 'Atlantic/Azores', - 'Bahia Standard Time': 'America/Bahia', - 'Bangladesh Standard Time': 'Asia/Dhaka', - 'Belarus Standard Time': 'Europe/Minsk', - 'Canada Central Standard Time': 'America/Regina', - 'Cape Verde Standard Time': 'Atlantic/Cape_Verde', - 'Caucasus Standard Time': 'Asia/Yerevan', - 'Cen. Australia Standard Time': 'Australia/Adelaide', - 'Central America Standard Time': 'America/Guatemala', - 'Central Asia Standard Time': 'Asia/Almaty', - 'Central Brazilian Standard Time': 'America/Cuiaba', - 'Central Europe Standard Time': 'Europe/Budapest', - 'Central European Standard Time': 'Europe/Warsaw', - 'Central Pacific Standard Time': 'Pacific/Guadalcanal', - 'Central Standard Time': 'America/Chicago', - 'Central Standard Time (Mexico)': 'America/Mexico_City', - 'China Standard Time': 'Asia/Shanghai', - 'Dateline Standard Time': 'Etc/GMT+12', - 'E. Africa Standard Time': 'Africa/Nairobi', - 'E. Australia Standard Time': 'Australia/Brisbane', - 'E. South America Standard Time': 'America/Sao_Paulo', - 'Eastern Standard Time': 'America/New_York', - 'Egypt Standard Time': 'Africa/Cairo', - 'Ekaterinburg Standard Time': 'Asia/Yekaterinburg', - 'FLE Standard Time': 'Europe/Kiev', - 'Fiji Standard Time': 'Pacific/Fiji', - 'GMT Standard Time': 'Europe/London', - 'GTB Standard Time': 'Europe/Bucharest', - 'Georgian Standard Time': 'Asia/Tbilisi', - 'Greenland Standard Time': 'America/Godthab', - 'Greenwich Standard Time': 'Atlantic/Reykjavik', - 'Hawaiian Standard Time': 'Pacific/Honolulu', - 'India Standard Time': 'Asia/Calcutta', - 'Iran Standard Time': 'Asia/Tehran', - 'Israel Standard Time': 'Asia/Jerusalem', - 'Jordan Standard Time': 'Asia/Amman', - 'Kaliningrad Standard Time': 'Europe/Kaliningrad', - 'Korea Standard Time': 'Asia/Seoul', - 'Libya Standard Time': 'Africa/Tripoli', - 'Line Islands Standard Time': 'Pacific/Kiritimati', - 'Magadan Standard Time': 'Asia/Magadan', - 'Mauritius Standard Time': 'Indian/Mauritius', - 'Middle East Standard Time': 'Asia/Beirut', - 'Montevideo Standard Time': 'America/Montevideo', - 'Morocco Standard Time': 'Africa/Casablanca', - 'Mountain Standard Time': 'America/Denver', - 'Mountain Standard Time (Mexico)': 'America/Chihuahua', - 'Myanmar Standard Time': 'Asia/Rangoon', - 'N. Central Asia Standard Time': 'Asia/Novosibirsk', - 'Namibia Standard Time': 'Africa/Windhoek', - 'Nepal Standard Time': 'Asia/Katmandu', - 'New Zealand Standard Time': 'Pacific/Auckland', - 'Newfoundland Standard Time': 'America/St_Johns', - 'North Asia East Standard Time': 'Asia/Irkutsk', - 'North Asia Standard Time': 'Asia/Krasnoyarsk', - 'Pacific SA Standard Time': 'America/Santiago', - 'Pacific Standard Time': 'America/Los_Angeles', - 'Pacific Standard Time (Mexico)': 'America/Santa_Isabel', - 'Pakistan Standard Time': 'Asia/Karachi', - 'Paraguay Standard Time': 'America/Asuncion', - 'Romance Standard Time': 'Europe/Paris', - 'Russia Time Zone 10': 'Asia/Srednekolymsk', - 'Russia Time Zone 11': 'Asia/Kamchatka', - 'Russia Time Zone 3': 'Europe/Samara', - 'Russian Standard Time': 'Europe/Moscow', - 'SA Eastern Standard Time': 'America/Cayenne', - 'SA Pacific Standard Time': 'America/Bogota', - 'SA Western Standard Time': 'America/La_Paz', - 'SE Asia Standard Time': 'Asia/Bangkok', - 'Samoa Standard Time': 'Pacific/Apia', - 'Singapore Standard Time': 'Asia/Singapore', - 'South Africa Standard Time': 'Africa/Johannesburg', - 'Sri Lanka Standard Time': 'Asia/Colombo', - 'Syria Standard Time': 'Asia/Damascus', - 'Taipei Standard Time': 'Asia/Taipei', - 'Tasmania Standard Time': 'Australia/Hobart', - 'Tokyo Standard Time': 'Asia/Tokyo', - 'Tonga Standard Time': 'Pacific/Tongatapu', - 'Turkey Standard Time': 'Europe/Istanbul', - 'US Eastern Standard Time': 'America/Indianapolis', - 'US Mountain Standard Time': 'America/Phoenix', - 'UTC': 'Etc/GMT', - 'UTC+12': 'Etc/GMT-12', - 'UTC-02': 'Etc/GMT+2', - 'UTC-11': 'Etc/GMT+11', - 'Ulaanbaatar Standard Time': 'Asia/Ulaanbaatar', - 'Venezuela Standard Time': 'America/Caracas', - 'Vladivostok Standard Time': 'Asia/Vladivostok', - 'W. Australia Standard Time': 'Australia/Perth', - 'W. Central Africa Standard Time': 'Africa/Lagos', - 'W. Europe Standard Time': 'Europe/Berlin', - 'West Asia Standard Time': 'Asia/Tashkent', - 'West Pacific Standard Time': 'Pacific/Port_Moresby', - 'Yakutsk Standard Time': 'Asia/Yakutsk'} +win_tz = { + "AUS Central Standard Time": "Australia/Darwin", + "AUS Eastern Standard Time": "Australia/Sydney", + "Afghanistan Standard Time": "Asia/Kabul", + "Alaskan Standard Time": "America/Anchorage", + "Aleutian Standard Time": "America/Adak", + "Altai Standard Time": "Asia/Barnaul", + "Arab Standard Time": "Asia/Riyadh", + "Arabian Standard Time": "Asia/Dubai", + "Arabic Standard Time": "Asia/Baghdad", + "Argentina Standard Time": "America/Buenos_Aires", + "Astrakhan Standard Time": "Europe/Astrakhan", + "Atlantic Standard Time": "America/Halifax", + "Aus Central W. Standard Time": "Australia/Eucla", + "Azerbaijan Standard Time": "Asia/Baku", + "Azores Standard Time": "Atlantic/Azores", + "Bahia Standard Time": "America/Bahia", + "Bangladesh Standard Time": "Asia/Dhaka", + "Belarus Standard Time": "Europe/Minsk", + "Bougainville Standard Time": "Pacific/Bougainville", + "Canada Central Standard Time": "America/Regina", + "Cape Verde Standard Time": "Atlantic/Cape_Verde", + "Caucasus Standard Time": "Asia/Yerevan", + "Cen. Australia Standard Time": "Australia/Adelaide", + "Central America Standard Time": "America/Guatemala", + "Central Asia Standard Time": "Asia/Almaty", + "Central Brazilian Standard Time": "America/Cuiaba", + "Central Europe Standard Time": "Europe/Budapest", + "Central European Standard Time": "Europe/Warsaw", + "Central Pacific Standard Time": "Pacific/Guadalcanal", + "Central Standard Time": "America/Chicago", + "Central Standard Time (Mexico)": "America/Mexico_City", + "Chatham Islands Standard Time": "Pacific/Chatham", + "China Standard Time": "Asia/Shanghai", + "Cuba Standard Time": "America/Havana", + "Dateline Standard Time": "Etc/GMT+12", + "E. Africa Standard Time": "Africa/Nairobi", + "E. Australia Standard Time": "Australia/Brisbane", + "E. Europe Standard Time": "Europe/Chisinau", + "E. South America Standard Time": "America/Sao_Paulo", + "Easter Island Standard Time": "Pacific/Easter", + "Eastern Standard Time": "America/New_York", + "Eastern Standard Time (Mexico)": "America/Cancun", + "Egypt Standard Time": "Africa/Cairo", + "Ekaterinburg Standard Time": "Asia/Yekaterinburg", + "FLE Standard Time": "Europe/Kiev", + "Fiji Standard Time": "Pacific/Fiji", + "GMT Standard Time": "Europe/London", + "GTB Standard Time": "Europe/Bucharest", + "Georgian Standard Time": "Asia/Tbilisi", + "Greenland Standard Time": "America/Godthab", + "Greenwich Standard Time": "Atlantic/Reykjavik", + "Haiti Standard Time": "America/Port-au-Prince", + "Hawaiian Standard Time": "Pacific/Honolulu", + "India Standard Time": "Asia/Calcutta", + "Iran Standard Time": "Asia/Tehran", + "Israel Standard Time": "Asia/Jerusalem", + "Jordan Standard Time": "Asia/Amman", + "Kaliningrad Standard Time": "Europe/Kaliningrad", + "Korea Standard Time": "Asia/Seoul", + "Libya Standard Time": "Africa/Tripoli", + "Line Islands Standard Time": "Pacific/Kiritimati", + "Lord Howe Standard Time": "Australia/Lord_Howe", + "Magadan Standard Time": "Asia/Magadan", + "Magallanes Standard Time": "America/Punta_Arenas", + "Marquesas Standard Time": "Pacific/Marquesas", + "Mauritius Standard Time": "Indian/Mauritius", + "Middle East Standard Time": "Asia/Beirut", + "Montevideo Standard Time": "America/Montevideo", + "Morocco Standard Time": "Africa/Casablanca", + "Mountain Standard Time": "America/Denver", + "Mountain Standard Time (Mexico)": "America/Mazatlan", + "Myanmar Standard Time": "Asia/Rangoon", + "N. Central Asia Standard Time": "Asia/Novosibirsk", + "Namibia Standard Time": "Africa/Windhoek", + "Nepal Standard Time": "Asia/Katmandu", + "New Zealand Standard Time": "Pacific/Auckland", + "Newfoundland Standard Time": "America/St_Johns", + "Norfolk Standard Time": "Pacific/Norfolk", + "North Asia East Standard Time": "Asia/Irkutsk", + "North Asia Standard Time": "Asia/Krasnoyarsk", + "North Korea Standard Time": "Asia/Pyongyang", + "Omsk Standard Time": "Asia/Omsk", + "Pacific SA Standard Time": "America/Santiago", + "Pacific Standard Time": "America/Los_Angeles", + "Pacific Standard Time (Mexico)": "America/Tijuana", + "Pakistan Standard Time": "Asia/Karachi", + "Paraguay Standard Time": "America/Asuncion", + "Qyzylorda Standard Time": "Asia/Qyzylorda", + "Romance Standard Time": "Europe/Paris", + "Russia Time Zone 10": "Asia/Srednekolymsk", + "Russia Time Zone 11": "Asia/Kamchatka", + "Russia Time Zone 3": "Europe/Samara", + "Russian Standard Time": "Europe/Moscow", + "SA Eastern Standard Time": "America/Cayenne", + "SA Pacific Standard Time": "America/Bogota", + "SA Western Standard Time": "America/La_Paz", + "SE Asia Standard Time": "Asia/Bangkok", + "Saint Pierre Standard Time": "America/Miquelon", + "Sakhalin Standard Time": "Asia/Sakhalin", + "Samoa Standard Time": "Pacific/Apia", + "Sao Tome Standard Time": "Africa/Sao_Tome", + "Saratov Standard Time": "Europe/Saratov", + "Singapore Standard Time": "Asia/Singapore", + "South Africa Standard Time": "Africa/Johannesburg", + "South Sudan Standard Time": "Africa/Juba", + "Sri Lanka Standard Time": "Asia/Colombo", + "Sudan Standard Time": "Africa/Khartoum", + "Syria Standard Time": "Asia/Damascus", + "Taipei Standard Time": "Asia/Taipei", + "Tasmania Standard Time": "Australia/Hobart", + "Tocantins Standard Time": "America/Araguaina", + "Tokyo Standard Time": "Asia/Tokyo", + "Tomsk Standard Time": "Asia/Tomsk", + "Tonga Standard Time": "Pacific/Tongatapu", + "Transbaikal Standard Time": "Asia/Chita", + "Turkey Standard Time": "Europe/Istanbul", + "Turks And Caicos Standard Time": "America/Grand_Turk", + "US Eastern Standard Time": "America/Indianapolis", + "US Mountain Standard Time": "America/Phoenix", + "UTC": "Etc/UTC", + "UTC+12": "Etc/GMT-12", + "UTC+13": "Etc/GMT-13", + "UTC-02": "Etc/GMT+2", + "UTC-08": "Etc/GMT+8", + "UTC-09": "Etc/GMT+9", + "UTC-11": "Etc/GMT+11", + "Ulaanbaatar Standard Time": "Asia/Ulaanbaatar", + "Venezuela Standard Time": "America/Caracas", + "Vladivostok Standard Time": "Asia/Vladivostok", + "Volgograd Standard Time": "Europe/Volgograd", + "W. Australia Standard Time": "Australia/Perth", + "W. Central Africa Standard Time": "Africa/Lagos", + "W. Europe Standard Time": "Europe/Berlin", + "W. Mongolia Standard Time": "Asia/Hovd", + "West Asia Standard Time": "Asia/Tashkent", + "West Bank Standard Time": "Asia/Hebron", + "West Pacific Standard Time": "Pacific/Port_Moresby", + "Yakutsk Standard Time": "Asia/Yakutsk", + "Yukon Standard Time": "America/Whitehorse", +} # Old name for the win_tz variable: tz_names = win_tz -tz_win = {'Africa/Abidjan': 'Greenwich Standard Time', - 'Africa/Accra': 'Greenwich Standard Time', - 'Africa/Addis_Ababa': 'E. Africa Standard Time', - 'Africa/Algiers': 'W. Central Africa Standard Time', - 'Africa/Asmera': 'E. Africa Standard Time', - 'Africa/Bamako': 'Greenwich Standard Time', - 'Africa/Bangui': 'W. Central Africa Standard Time', - 'Africa/Banjul': 'Greenwich Standard Time', - 'Africa/Bissau': 'Greenwich Standard Time', - 'Africa/Blantyre': 'South Africa Standard Time', - 'Africa/Brazzaville': 'W. Central Africa Standard Time', - 'Africa/Bujumbura': 'South Africa Standard Time', - 'Africa/Cairo': 'Egypt Standard Time', - 'Africa/Casablanca': 'Morocco Standard Time', - 'Africa/Ceuta': 'Romance Standard Time', - 'Africa/Conakry': 'Greenwich Standard Time', - 'Africa/Dakar': 'Greenwich Standard Time', - 'Africa/Dar_es_Salaam': 'E. Africa Standard Time', - 'Africa/Djibouti': 'E. Africa Standard Time', - 'Africa/Douala': 'W. Central Africa Standard Time', - 'Africa/El_Aaiun': 'Morocco Standard Time', - 'Africa/Freetown': 'Greenwich Standard Time', - 'Africa/Gaborone': 'South Africa Standard Time', - 'Africa/Harare': 'South Africa Standard Time', - 'Africa/Johannesburg': 'South Africa Standard Time', - 'Africa/Juba': 'E. Africa Standard Time', - 'Africa/Kampala': 'E. Africa Standard Time', - 'Africa/Khartoum': 'E. Africa Standard Time', - 'Africa/Kigali': 'South Africa Standard Time', - 'Africa/Kinshasa': 'W. Central Africa Standard Time', - 'Africa/Lagos': 'W. Central Africa Standard Time', - 'Africa/Libreville': 'W. Central Africa Standard Time', - 'Africa/Lome': 'Greenwich Standard Time', - 'Africa/Luanda': 'W. Central Africa Standard Time', - 'Africa/Lubumbashi': 'South Africa Standard Time', - 'Africa/Lusaka': 'South Africa Standard Time', - 'Africa/Malabo': 'W. Central Africa Standard Time', - 'Africa/Maputo': 'South Africa Standard Time', - 'Africa/Maseru': 'South Africa Standard Time', - 'Africa/Mbabane': 'South Africa Standard Time', - 'Africa/Mogadishu': 'E. Africa Standard Time', - 'Africa/Monrovia': 'Greenwich Standard Time', - 'Africa/Nairobi': 'E. Africa Standard Time', - 'Africa/Ndjamena': 'W. Central Africa Standard Time', - 'Africa/Niamey': 'W. Central Africa Standard Time', - 'Africa/Nouakchott': 'Greenwich Standard Time', - 'Africa/Ouagadougou': 'Greenwich Standard Time', - 'Africa/Porto-Novo': 'W. Central Africa Standard Time', - 'Africa/Sao_Tome': 'Greenwich Standard Time', - 'Africa/Tripoli': 'Libya Standard Time', - 'Africa/Tunis': 'W. Central Africa Standard Time', - 'Africa/Windhoek': 'Namibia Standard Time', - 'America/Anchorage': 'Alaskan Standard Time', - 'America/Anguilla': 'SA Western Standard Time', - 'America/Antigua': 'SA Western Standard Time', - 'America/Araguaina': 'SA Eastern Standard Time', - 'America/Argentina/La_Rioja': 'Argentina Standard Time', - 'America/Argentina/Rio_Gallegos': 'Argentina Standard Time', - 'America/Argentina/Salta': 'Argentina Standard Time', - 'America/Argentina/San_Juan': 'Argentina Standard Time', - 'America/Argentina/San_Luis': 'Argentina Standard Time', - 'America/Argentina/Tucuman': 'Argentina Standard Time', - 'America/Argentina/Ushuaia': 'Argentina Standard Time', - 'America/Aruba': 'SA Western Standard Time', - 'America/Asuncion': 'Paraguay Standard Time', - 'America/Bahia': 'Bahia Standard Time', - 'America/Bahia_Banderas': 'Central Standard Time (Mexico)', - 'America/Barbados': 'SA Western Standard Time', - 'America/Belem': 'SA Eastern Standard Time', - 'America/Belize': 'Central America Standard Time', - 'America/Blanc-Sablon': 'SA Western Standard Time', - 'America/Boa_Vista': 'SA Western Standard Time', - 'America/Bogota': 'SA Pacific Standard Time', - 'America/Boise': 'Mountain Standard Time', - 'America/Buenos_Aires': 'Argentina Standard Time', - 'America/Cambridge_Bay': 'Mountain Standard Time', - 'America/Campo_Grande': 'Central Brazilian Standard Time', - 'America/Cancun': 'Central Standard Time (Mexico)', - 'America/Caracas': 'Venezuela Standard Time', - 'America/Catamarca': 'Argentina Standard Time', - 'America/Cayenne': 'SA Eastern Standard Time', - 'America/Cayman': 'SA Pacific Standard Time', - 'America/Chicago': 'Central Standard Time', - 'America/Chihuahua': 'Mountain Standard Time (Mexico)', - 'America/Coral_Harbour': 'SA Pacific Standard Time', - 'America/Cordoba': 'Argentina Standard Time', - 'America/Costa_Rica': 'Central America Standard Time', - 'America/Creston': 'US Mountain Standard Time', - 'America/Cuiaba': 'Central Brazilian Standard Time', - 'America/Curacao': 'SA Western Standard Time', - 'America/Danmarkshavn': 'UTC', - 'America/Dawson': 'Pacific Standard Time', - 'America/Dawson_Creek': 'US Mountain Standard Time', - 'America/Denver': 'Mountain Standard Time', - 'America/Detroit': 'Eastern Standard Time', - 'America/Dominica': 'SA Western Standard Time', - 'America/Edmonton': 'Mountain Standard Time', - 'America/Eirunepe': 'SA Pacific Standard Time', - 'America/El_Salvador': 'Central America Standard Time', - 'America/Fortaleza': 'SA Eastern Standard Time', - 'America/Glace_Bay': 'Atlantic Standard Time', - 'America/Godthab': 'Greenland Standard Time', - 'America/Goose_Bay': 'Atlantic Standard Time', - 'America/Grand_Turk': 'SA Western Standard Time', - 'America/Grenada': 'SA Western Standard Time', - 'America/Guadeloupe': 'SA Western Standard Time', - 'America/Guatemala': 'Central America Standard Time', - 'America/Guayaquil': 'SA Pacific Standard Time', - 'America/Guyana': 'SA Western Standard Time', - 'America/Halifax': 'Atlantic Standard Time', - 'America/Havana': 'Eastern Standard Time', - 'America/Hermosillo': 'US Mountain Standard Time', - 'America/Indiana/Knox': 'Central Standard Time', - 'America/Indiana/Marengo': 'US Eastern Standard Time', - 'America/Indiana/Petersburg': 'Eastern Standard Time', - 'America/Indiana/Tell_City': 'Central Standard Time', - 'America/Indiana/Vevay': 'US Eastern Standard Time', - 'America/Indiana/Vincennes': 'Eastern Standard Time', - 'America/Indiana/Winamac': 'Eastern Standard Time', - 'America/Indianapolis': 'US Eastern Standard Time', - 'America/Inuvik': 'Mountain Standard Time', - 'America/Iqaluit': 'Eastern Standard Time', - 'America/Jamaica': 'SA Pacific Standard Time', - 'America/Jujuy': 'Argentina Standard Time', - 'America/Juneau': 'Alaskan Standard Time', - 'America/Kentucky/Monticello': 'Eastern Standard Time', - 'America/Kralendijk': 'SA Western Standard Time', - 'America/La_Paz': 'SA Western Standard Time', - 'America/Lima': 'SA Pacific Standard Time', - 'America/Los_Angeles': 'Pacific Standard Time', - 'America/Louisville': 'Eastern Standard Time', - 'America/Lower_Princes': 'SA Western Standard Time', - 'America/Maceio': 'SA Eastern Standard Time', - 'America/Managua': 'Central America Standard Time', - 'America/Manaus': 'SA Western Standard Time', - 'America/Marigot': 'SA Western Standard Time', - 'America/Martinique': 'SA Western Standard Time', - 'America/Matamoros': 'Central Standard Time', - 'America/Mazatlan': 'Mountain Standard Time (Mexico)', - 'America/Mendoza': 'Argentina Standard Time', - 'America/Menominee': 'Central Standard Time', - 'America/Merida': 'Central Standard Time (Mexico)', - 'America/Mexico_City': 'Central Standard Time (Mexico)', - 'America/Moncton': 'Atlantic Standard Time', - 'America/Monterrey': 'Central Standard Time (Mexico)', - 'America/Montevideo': 'Montevideo Standard Time', - 'America/Montreal': 'Eastern Standard Time', - 'America/Montserrat': 'SA Western Standard Time', - 'America/Nassau': 'Eastern Standard Time', - 'America/New_York': 'Eastern Standard Time', - 'America/Nipigon': 'Eastern Standard Time', - 'America/Nome': 'Alaskan Standard Time', - 'America/Noronha': 'UTC-02', - 'America/North_Dakota/Beulah': 'Central Standard Time', - 'America/North_Dakota/Center': 'Central Standard Time', - 'America/North_Dakota/New_Salem': 'Central Standard Time', - 'America/Ojinaga': 'Mountain Standard Time', - 'America/Panama': 'SA Pacific Standard Time', - 'America/Pangnirtung': 'Eastern Standard Time', - 'America/Paramaribo': 'SA Eastern Standard Time', - 'America/Phoenix': 'US Mountain Standard Time', - 'America/Port-au-Prince': 'Eastern Standard Time', - 'America/Port_of_Spain': 'SA Western Standard Time', - 'America/Porto_Velho': 'SA Western Standard Time', - 'America/Puerto_Rico': 'SA Western Standard Time', - 'America/Rainy_River': 'Central Standard Time', - 'America/Rankin_Inlet': 'Central Standard Time', - 'America/Recife': 'SA Eastern Standard Time', - 'America/Regina': 'Canada Central Standard Time', - 'America/Resolute': 'Central Standard Time', - 'America/Rio_Branco': 'SA Pacific Standard Time', - 'America/Santa_Isabel': 'Pacific Standard Time (Mexico)', - 'America/Santarem': 'SA Eastern Standard Time', - 'America/Santiago': 'Pacific SA Standard Time', - 'America/Santo_Domingo': 'SA Western Standard Time', - 'America/Sao_Paulo': 'E. South America Standard Time', - 'America/Scoresbysund': 'Azores Standard Time', - 'America/Sitka': 'Alaskan Standard Time', - 'America/St_Barthelemy': 'SA Western Standard Time', - 'America/St_Johns': 'Newfoundland Standard Time', - 'America/St_Kitts': 'SA Western Standard Time', - 'America/St_Lucia': 'SA Western Standard Time', - 'America/St_Thomas': 'SA Western Standard Time', - 'America/St_Vincent': 'SA Western Standard Time', - 'America/Swift_Current': 'Canada Central Standard Time', - 'America/Tegucigalpa': 'Central America Standard Time', - 'America/Thule': 'Atlantic Standard Time', - 'America/Thunder_Bay': 'Eastern Standard Time', - 'America/Tijuana': 'Pacific Standard Time', - 'America/Toronto': 'Eastern Standard Time', - 'America/Tortola': 'SA Western Standard Time', - 'America/Vancouver': 'Pacific Standard Time', - 'America/Whitehorse': 'Pacific Standard Time', - 'America/Winnipeg': 'Central Standard Time', - 'America/Yakutat': 'Alaskan Standard Time', - 'America/Yellowknife': 'Mountain Standard Time', - 'Antarctica/Casey': 'W. Australia Standard Time', - 'Antarctica/Davis': 'SE Asia Standard Time', - 'Antarctica/DumontDUrville': 'West Pacific Standard Time', - 'Antarctica/Macquarie': 'Central Pacific Standard Time', - 'Antarctica/Mawson': 'West Asia Standard Time', - 'Antarctica/McMurdo': 'New Zealand Standard Time', - 'Antarctica/Palmer': 'Pacific SA Standard Time', - 'Antarctica/Rothera': 'SA Eastern Standard Time', - 'Antarctica/Syowa': 'E. Africa Standard Time', - 'Antarctica/Vostok': 'Central Asia Standard Time', - 'Arctic/Longyearbyen': 'W. Europe Standard Time', - 'Asia/Aden': 'Arab Standard Time', - 'Asia/Almaty': 'Central Asia Standard Time', - 'Asia/Amman': 'Jordan Standard Time', - 'Asia/Anadyr': 'Russia Time Zone 11', - 'Asia/Aqtau': 'West Asia Standard Time', - 'Asia/Aqtobe': 'West Asia Standard Time', - 'Asia/Ashgabat': 'West Asia Standard Time', - 'Asia/Baghdad': 'Arabic Standard Time', - 'Asia/Bahrain': 'Arab Standard Time', - 'Asia/Baku': 'Azerbaijan Standard Time', - 'Asia/Bangkok': 'SE Asia Standard Time', - 'Asia/Beirut': 'Middle East Standard Time', - 'Asia/Bishkek': 'Central Asia Standard Time', - 'Asia/Brunei': 'Singapore Standard Time', - 'Asia/Calcutta': 'India Standard Time', - 'Asia/Chita': 'North Asia East Standard Time', - 'Asia/Choibalsan': 'Ulaanbaatar Standard Time', - 'Asia/Colombo': 'Sri Lanka Standard Time', - 'Asia/Damascus': 'Syria Standard Time', - 'Asia/Dhaka': 'Bangladesh Standard Time', - 'Asia/Dili': 'Tokyo Standard Time', - 'Asia/Dubai': 'Arabian Standard Time', - 'Asia/Dushanbe': 'West Asia Standard Time', - 'Asia/Hong_Kong': 'China Standard Time', - 'Asia/Hovd': 'SE Asia Standard Time', - 'Asia/Irkutsk': 'North Asia East Standard Time', - 'Asia/Jakarta': 'SE Asia Standard Time', - 'Asia/Jayapura': 'Tokyo Standard Time', - 'Asia/Jerusalem': 'Israel Standard Time', - 'Asia/Kabul': 'Afghanistan Standard Time', - 'Asia/Kamchatka': 'Russia Time Zone 11', - 'Asia/Karachi': 'Pakistan Standard Time', - 'Asia/Katmandu': 'Nepal Standard Time', - 'Asia/Khandyga': 'Yakutsk Standard Time', - 'Asia/Krasnoyarsk': 'North Asia Standard Time', - 'Asia/Kuala_Lumpur': 'Singapore Standard Time', - 'Asia/Kuching': 'Singapore Standard Time', - 'Asia/Kuwait': 'Arab Standard Time', - 'Asia/Macau': 'China Standard Time', - 'Asia/Magadan': 'Magadan Standard Time', - 'Asia/Makassar': 'Singapore Standard Time', - 'Asia/Manila': 'Singapore Standard Time', - 'Asia/Muscat': 'Arabian Standard Time', - 'Asia/Nicosia': 'GTB Standard Time', - 'Asia/Novokuznetsk': 'North Asia Standard Time', - 'Asia/Novosibirsk': 'N. Central Asia Standard Time', - 'Asia/Omsk': 'N. Central Asia Standard Time', - 'Asia/Oral': 'West Asia Standard Time', - 'Asia/Phnom_Penh': 'SE Asia Standard Time', - 'Asia/Pontianak': 'SE Asia Standard Time', - 'Asia/Pyongyang': 'Korea Standard Time', - 'Asia/Qatar': 'Arab Standard Time', - 'Asia/Qyzylorda': 'Central Asia Standard Time', - 'Asia/Rangoon': 'Myanmar Standard Time', - 'Asia/Riyadh': 'Arab Standard Time', - 'Asia/Saigon': 'SE Asia Standard Time', - 'Asia/Sakhalin': 'Vladivostok Standard Time', - 'Asia/Samarkand': 'West Asia Standard Time', - 'Asia/Seoul': 'Korea Standard Time', - 'Asia/Shanghai': 'China Standard Time', - 'Asia/Singapore': 'Singapore Standard Time', - 'Asia/Srednekolymsk': 'Russia Time Zone 10', - 'Asia/Taipei': 'Taipei Standard Time', - 'Asia/Tashkent': 'West Asia Standard Time', - 'Asia/Tbilisi': 'Georgian Standard Time', - 'Asia/Tehran': 'Iran Standard Time', - 'Asia/Thimphu': 'Bangladesh Standard Time', - 'Asia/Tokyo': 'Tokyo Standard Time', - 'Asia/Ulaanbaatar': 'Ulaanbaatar Standard Time', - 'Asia/Urumqi': 'Central Asia Standard Time', - 'Asia/Ust-Nera': 'Vladivostok Standard Time', - 'Asia/Vientiane': 'SE Asia Standard Time', - 'Asia/Vladivostok': 'Vladivostok Standard Time', - 'Asia/Yakutsk': 'Yakutsk Standard Time', - 'Asia/Yekaterinburg': 'Ekaterinburg Standard Time', - 'Asia/Yerevan': 'Caucasus Standard Time', - 'Atlantic/Azores': 'Azores Standard Time', - 'Atlantic/Bermuda': 'Atlantic Standard Time', - 'Atlantic/Canary': 'GMT Standard Time', - 'Atlantic/Cape_Verde': 'Cape Verde Standard Time', - 'Atlantic/Faeroe': 'GMT Standard Time', - 'Atlantic/Madeira': 'GMT Standard Time', - 'Atlantic/Reykjavik': 'Greenwich Standard Time', - 'Atlantic/South_Georgia': 'UTC-02', - 'Atlantic/St_Helena': 'Greenwich Standard Time', - 'Atlantic/Stanley': 'SA Eastern Standard Time', - 'Australia/Adelaide': 'Cen. Australia Standard Time', - 'Australia/Brisbane': 'E. Australia Standard Time', - 'Australia/Broken_Hill': 'Cen. Australia Standard Time', - 'Australia/Currie': 'Tasmania Standard Time', - 'Australia/Darwin': 'AUS Central Standard Time', - 'Australia/Hobart': 'Tasmania Standard Time', - 'Australia/Lindeman': 'E. Australia Standard Time', - 'Australia/Melbourne': 'AUS Eastern Standard Time', - 'Australia/Perth': 'W. Australia Standard Time', - 'Australia/Sydney': 'AUS Eastern Standard Time', - 'CST6CDT': 'Central Standard Time', - 'EST5EDT': 'Eastern Standard Time', - 'Etc/GMT': 'UTC', - 'Etc/GMT+1': 'Cape Verde Standard Time', - 'Etc/GMT+10': 'Hawaiian Standard Time', - 'Etc/GMT+11': 'UTC-11', - 'Etc/GMT+12': 'Dateline Standard Time', - 'Etc/GMT+2': 'UTC-02', - 'Etc/GMT+3': 'SA Eastern Standard Time', - 'Etc/GMT+4': 'SA Western Standard Time', - 'Etc/GMT+5': 'SA Pacific Standard Time', - 'Etc/GMT+6': 'Central America Standard Time', - 'Etc/GMT+7': 'US Mountain Standard Time', - 'Etc/GMT-1': 'W. Central Africa Standard Time', - 'Etc/GMT-10': 'West Pacific Standard Time', - 'Etc/GMT-11': 'Central Pacific Standard Time', - 'Etc/GMT-12': 'UTC+12', - 'Etc/GMT-13': 'Tonga Standard Time', - 'Etc/GMT-14': 'Line Islands Standard Time', - 'Etc/GMT-2': 'South Africa Standard Time', - 'Etc/GMT-3': 'E. Africa Standard Time', - 'Etc/GMT-4': 'Arabian Standard Time', - 'Etc/GMT-5': 'West Asia Standard Time', - 'Etc/GMT-6': 'Central Asia Standard Time', - 'Etc/GMT-7': 'SE Asia Standard Time', - 'Etc/GMT-8': 'Singapore Standard Time', - 'Etc/GMT-9': 'Tokyo Standard Time', - 'Etc/UTC': 'UTC', - 'Europe/Amsterdam': 'W. Europe Standard Time', - 'Europe/Andorra': 'W. Europe Standard Time', - 'Europe/Athens': 'GTB Standard Time', - 'Europe/Belgrade': 'Central Europe Standard Time', - 'Europe/Berlin': 'W. Europe Standard Time', - 'Europe/Bratislava': 'Central Europe Standard Time', - 'Europe/Brussels': 'Romance Standard Time', - 'Europe/Bucharest': 'GTB Standard Time', - 'Europe/Budapest': 'Central Europe Standard Time', - 'Europe/Busingen': 'W. Europe Standard Time', - 'Europe/Chisinau': 'GTB Standard Time', - 'Europe/Copenhagen': 'Romance Standard Time', - 'Europe/Dublin': 'GMT Standard Time', - 'Europe/Gibraltar': 'W. Europe Standard Time', - 'Europe/Guernsey': 'GMT Standard Time', - 'Europe/Helsinki': 'FLE Standard Time', - 'Europe/Isle_of_Man': 'GMT Standard Time', - 'Europe/Istanbul': 'Turkey Standard Time', - 'Europe/Jersey': 'GMT Standard Time', - 'Europe/Kaliningrad': 'Kaliningrad Standard Time', - 'Europe/Kiev': 'FLE Standard Time', - 'Europe/Lisbon': 'GMT Standard Time', - 'Europe/Ljubljana': 'Central Europe Standard Time', - 'Europe/London': 'GMT Standard Time', - 'Europe/Luxembourg': 'W. Europe Standard Time', - 'Europe/Madrid': 'Romance Standard Time', - 'Europe/Malta': 'W. Europe Standard Time', - 'Europe/Mariehamn': 'FLE Standard Time', - 'Europe/Minsk': 'Belarus Standard Time', - 'Europe/Monaco': 'W. Europe Standard Time', - 'Europe/Moscow': 'Russian Standard Time', - 'Europe/Oslo': 'W. Europe Standard Time', - 'Europe/Paris': 'Romance Standard Time', - 'Europe/Podgorica': 'Central Europe Standard Time', - 'Europe/Prague': 'Central Europe Standard Time', - 'Europe/Riga': 'FLE Standard Time', - 'Europe/Rome': 'W. Europe Standard Time', - 'Europe/Samara': 'Russia Time Zone 3', - 'Europe/San_Marino': 'W. Europe Standard Time', - 'Europe/Sarajevo': 'Central European Standard Time', - 'Europe/Simferopol': 'Russian Standard Time', - 'Europe/Skopje': 'Central European Standard Time', - 'Europe/Sofia': 'FLE Standard Time', - 'Europe/Stockholm': 'W. Europe Standard Time', - 'Europe/Tallinn': 'FLE Standard Time', - 'Europe/Tirane': 'Central Europe Standard Time', - 'Europe/Uzhgorod': 'FLE Standard Time', - 'Europe/Vaduz': 'W. Europe Standard Time', - 'Europe/Vatican': 'W. Europe Standard Time', - 'Europe/Vienna': 'W. Europe Standard Time', - 'Europe/Vilnius': 'FLE Standard Time', - 'Europe/Volgograd': 'Russian Standard Time', - 'Europe/Warsaw': 'Central European Standard Time', - 'Europe/Zagreb': 'Central European Standard Time', - 'Europe/Zaporozhye': 'FLE Standard Time', - 'Europe/Zurich': 'W. Europe Standard Time', - 'Indian/Antananarivo': 'E. Africa Standard Time', - 'Indian/Chagos': 'Central Asia Standard Time', - 'Indian/Christmas': 'SE Asia Standard Time', - 'Indian/Cocos': 'Myanmar Standard Time', - 'Indian/Comoro': 'E. Africa Standard Time', - 'Indian/Kerguelen': 'West Asia Standard Time', - 'Indian/Mahe': 'Mauritius Standard Time', - 'Indian/Maldives': 'West Asia Standard Time', - 'Indian/Mauritius': 'Mauritius Standard Time', - 'Indian/Mayotte': 'E. Africa Standard Time', - 'Indian/Reunion': 'Mauritius Standard Time', - 'MST7MDT': 'Mountain Standard Time', - 'PST8PDT': 'Pacific Standard Time', - 'Pacific/Apia': 'Samoa Standard Time', - 'Pacific/Auckland': 'New Zealand Standard Time', - 'Pacific/Efate': 'Central Pacific Standard Time', - 'Pacific/Enderbury': 'Tonga Standard Time', - 'Pacific/Fakaofo': 'Tonga Standard Time', - 'Pacific/Fiji': 'Fiji Standard Time', - 'Pacific/Funafuti': 'UTC+12', - 'Pacific/Galapagos': 'Central America Standard Time', - 'Pacific/Guadalcanal': 'Central Pacific Standard Time', - 'Pacific/Guam': 'West Pacific Standard Time', - 'Pacific/Honolulu': 'Hawaiian Standard Time', - 'Pacific/Johnston': 'Hawaiian Standard Time', - 'Pacific/Kiritimati': 'Line Islands Standard Time', - 'Pacific/Kosrae': 'Central Pacific Standard Time', - 'Pacific/Kwajalein': 'UTC+12', - 'Pacific/Majuro': 'UTC+12', - 'Pacific/Midway': 'UTC-11', - 'Pacific/Nauru': 'UTC+12', - 'Pacific/Niue': 'UTC-11', - 'Pacific/Noumea': 'Central Pacific Standard Time', - 'Pacific/Pago_Pago': 'UTC-11', - 'Pacific/Palau': 'Tokyo Standard Time', - 'Pacific/Ponape': 'Central Pacific Standard Time', - 'Pacific/Port_Moresby': 'West Pacific Standard Time', - 'Pacific/Rarotonga': 'Hawaiian Standard Time', - 'Pacific/Saipan': 'West Pacific Standard Time', - 'Pacific/Tahiti': 'Hawaiian Standard Time', - 'Pacific/Tarawa': 'UTC+12', - 'Pacific/Tongatapu': 'Tonga Standard Time', - 'Pacific/Truk': 'West Pacific Standard Time', - 'Pacific/Wake': 'UTC+12', - 'Pacific/Wallis': 'UTC+12'} +tz_win = { + "": "Central Standard Time (Mexico)", + "Africa/Abidjan": "Greenwich Standard Time", + "Africa/Accra": "Greenwich Standard Time", + "Africa/Addis_Ababa": "E. Africa Standard Time", + "Africa/Algiers": "W. Central Africa Standard Time", + "Africa/Asmara": "E. Africa Standard Time", + "Africa/Asmera": "E. Africa Standard Time", + "Africa/Bamako": "Greenwich Standard Time", + "Africa/Bangui": "W. Central Africa Standard Time", + "Africa/Banjul": "Greenwich Standard Time", + "Africa/Bissau": "Greenwich Standard Time", + "Africa/Blantyre": "South Africa Standard Time", + "Africa/Brazzaville": "W. Central Africa Standard Time", + "Africa/Bujumbura": "South Africa Standard Time", + "Africa/Cairo": "Egypt Standard Time", + "Africa/Casablanca": "Morocco Standard Time", + "Africa/Ceuta": "Romance Standard Time", + "Africa/Conakry": "Greenwich Standard Time", + "Africa/Dakar": "Greenwich Standard Time", + "Africa/Dar_es_Salaam": "E. Africa Standard Time", + "Africa/Djibouti": "E. Africa Standard Time", + "Africa/Douala": "W. Central Africa Standard Time", + "Africa/El_Aaiun": "Morocco Standard Time", + "Africa/Freetown": "Greenwich Standard Time", + "Africa/Gaborone": "South Africa Standard Time", + "Africa/Harare": "South Africa Standard Time", + "Africa/Johannesburg": "South Africa Standard Time", + "Africa/Juba": "South Sudan Standard Time", + "Africa/Kampala": "E. Africa Standard Time", + "Africa/Khartoum": "Sudan Standard Time", + "Africa/Kigali": "South Africa Standard Time", + "Africa/Kinshasa": "W. Central Africa Standard Time", + "Africa/Lagos": "W. Central Africa Standard Time", + "Africa/Libreville": "W. Central Africa Standard Time", + "Africa/Lome": "Greenwich Standard Time", + "Africa/Luanda": "W. Central Africa Standard Time", + "Africa/Lubumbashi": "South Africa Standard Time", + "Africa/Lusaka": "South Africa Standard Time", + "Africa/Malabo": "W. Central Africa Standard Time", + "Africa/Maputo": "South Africa Standard Time", + "Africa/Maseru": "South Africa Standard Time", + "Africa/Mbabane": "South Africa Standard Time", + "Africa/Mogadishu": "E. Africa Standard Time", + "Africa/Monrovia": "Greenwich Standard Time", + "Africa/Nairobi": "E. Africa Standard Time", + "Africa/Ndjamena": "W. Central Africa Standard Time", + "Africa/Niamey": "W. Central Africa Standard Time", + "Africa/Nouakchott": "Greenwich Standard Time", + "Africa/Ouagadougou": "Greenwich Standard Time", + "Africa/Porto-Novo": "W. Central Africa Standard Time", + "Africa/Sao_Tome": "Sao Tome Standard Time", + "Africa/Timbuktu": "Greenwich Standard Time", + "Africa/Tripoli": "Libya Standard Time", + "Africa/Tunis": "W. Central Africa Standard Time", + "Africa/Windhoek": "Namibia Standard Time", + "America/Adak": "Aleutian Standard Time", + "America/Anchorage": "Alaskan Standard Time", + "America/Anguilla": "SA Western Standard Time", + "America/Antigua": "SA Western Standard Time", + "America/Araguaina": "Tocantins Standard Time", + "America/Argentina/Buenos_Aires": "Argentina Standard Time", + "America/Argentina/Catamarca": "Argentina Standard Time", + "America/Argentina/ComodRivadavia": "Argentina Standard Time", + "America/Argentina/Cordoba": "Argentina Standard Time", + "America/Argentina/Jujuy": "Argentina Standard Time", + "America/Argentina/La_Rioja": "Argentina Standard Time", + "America/Argentina/Mendoza": "Argentina Standard Time", + "America/Argentina/Rio_Gallegos": "Argentina Standard Time", + "America/Argentina/Salta": "Argentina Standard Time", + "America/Argentina/San_Juan": "Argentina Standard Time", + "America/Argentina/San_Luis": "Argentina Standard Time", + "America/Argentina/Tucuman": "Argentina Standard Time", + "America/Argentina/Ushuaia": "Argentina Standard Time", + "America/Aruba": "SA Western Standard Time", + "America/Asuncion": "Paraguay Standard Time", + "America/Atikokan": "SA Pacific Standard Time", + "America/Atka": "Aleutian Standard Time", + "America/Bahia": "Bahia Standard Time", + "America/Bahia_Banderas": "Central Standard Time (Mexico)", + "America/Barbados": "SA Western Standard Time", + "America/Belem": "SA Eastern Standard Time", + "America/Belize": "Central America Standard Time", + "America/Blanc-Sablon": "SA Western Standard Time", + "America/Boa_Vista": "SA Western Standard Time", + "America/Bogota": "SA Pacific Standard Time", + "America/Boise": "Mountain Standard Time", + "America/Buenos_Aires": "Argentina Standard Time", + "America/Cambridge_Bay": "Mountain Standard Time", + "America/Campo_Grande": "Central Brazilian Standard Time", + "America/Cancun": "Eastern Standard Time (Mexico)", + "America/Caracas": "Venezuela Standard Time", + "America/Catamarca": "Argentina Standard Time", + "America/Cayenne": "SA Eastern Standard Time", + "America/Cayman": "SA Pacific Standard Time", + "America/Chicago": "Central Standard Time", + "America/Chihuahua": "Central Standard Time (Mexico)", + "America/Ciudad_Juarez": "Mountain Standard Time", + "America/Coral_Harbour": "SA Pacific Standard Time", + "America/Cordoba": "Argentina Standard Time", + "America/Costa_Rica": "Central America Standard Time", + "America/Creston": "US Mountain Standard Time", + "America/Cuiaba": "Central Brazilian Standard Time", + "America/Curacao": "SA Western Standard Time", + "America/Danmarkshavn": "Greenwich Standard Time", + "America/Dawson": "Yukon Standard Time", + "America/Dawson_Creek": "US Mountain Standard Time", + "America/Denver": "Mountain Standard Time", + "America/Detroit": "Eastern Standard Time", + "America/Dominica": "SA Western Standard Time", + "America/Edmonton": "Mountain Standard Time", + "America/Eirunepe": "SA Pacific Standard Time", + "America/El_Salvador": "Central America Standard Time", + "America/Ensenada": "Pacific Standard Time (Mexico)", + "America/Fort_Nelson": "US Mountain Standard Time", + "America/Fort_Wayne": "US Eastern Standard Time", + "America/Fortaleza": "SA Eastern Standard Time", + "America/Glace_Bay": "Atlantic Standard Time", + "America/Godthab": "Greenland Standard Time", + "America/Goose_Bay": "Atlantic Standard Time", + "America/Grand_Turk": "Turks And Caicos Standard Time", + "America/Grenada": "SA Western Standard Time", + "America/Guadeloupe": "SA Western Standard Time", + "America/Guatemala": "Central America Standard Time", + "America/Guayaquil": "SA Pacific Standard Time", + "America/Guyana": "SA Western Standard Time", + "America/Halifax": "Atlantic Standard Time", + "America/Havana": "Cuba Standard Time", + "America/Hermosillo": "US Mountain Standard Time", + "America/Indiana/Indianapolis": "US Eastern Standard Time", + "America/Indiana/Knox": "Central Standard Time", + "America/Indiana/Marengo": "US Eastern Standard Time", + "America/Indiana/Petersburg": "Eastern Standard Time", + "America/Indiana/Tell_City": "Central Standard Time", + "America/Indiana/Vevay": "US Eastern Standard Time", + "America/Indiana/Vincennes": "Eastern Standard Time", + "America/Indiana/Winamac": "Eastern Standard Time", + "America/Indianapolis": "US Eastern Standard Time", + "America/Inuvik": "Mountain Standard Time", + "America/Iqaluit": "Eastern Standard Time", + "America/Jamaica": "SA Pacific Standard Time", + "America/Jujuy": "Argentina Standard Time", + "America/Juneau": "Alaskan Standard Time", + "America/Kentucky/Louisville": "Eastern Standard Time", + "America/Kentucky/Monticello": "Eastern Standard Time", + "America/Knox_IN": "Central Standard Time", + "America/Kralendijk": "SA Western Standard Time", + "America/La_Paz": "SA Western Standard Time", + "America/Lima": "SA Pacific Standard Time", + "America/Los_Angeles": "Pacific Standard Time", + "America/Louisville": "Eastern Standard Time", + "America/Lower_Princes": "SA Western Standard Time", + "America/Maceio": "SA Eastern Standard Time", + "America/Managua": "Central America Standard Time", + "America/Manaus": "SA Western Standard Time", + "America/Marigot": "SA Western Standard Time", + "America/Martinique": "SA Western Standard Time", + "America/Matamoros": "Central Standard Time", + "America/Mazatlan": "Mountain Standard Time (Mexico)", + "America/Mendoza": "Argentina Standard Time", + "America/Menominee": "Central Standard Time", + "America/Merida": "Central Standard Time (Mexico)", + "America/Metlakatla": "Alaskan Standard Time", + "America/Mexico_City": "Central Standard Time (Mexico)", + "America/Miquelon": "Saint Pierre Standard Time", + "America/Moncton": "Atlantic Standard Time", + "America/Monterrey": "Central Standard Time (Mexico)", + "America/Montevideo": "Montevideo Standard Time", + "America/Montreal": "Eastern Standard Time", + "America/Montserrat": "SA Western Standard Time", + "America/Nassau": "Eastern Standard Time", + "America/New_York": "Eastern Standard Time", + "America/Nipigon": "Eastern Standard Time", + "America/Nome": "Alaskan Standard Time", + "America/Noronha": "UTC-02", + "America/North_Dakota/Beulah": "Central Standard Time", + "America/North_Dakota/Center": "Central Standard Time", + "America/North_Dakota/New_Salem": "Central Standard Time", + "America/Nuuk": "Greenland Standard Time", + "America/Ojinaga": "Central Standard Time", + "America/Panama": "SA Pacific Standard Time", + "America/Pangnirtung": "Eastern Standard Time", + "America/Paramaribo": "SA Eastern Standard Time", + "America/Phoenix": "US Mountain Standard Time", + "America/Port-au-Prince": "Haiti Standard Time", + "America/Port_of_Spain": "SA Western Standard Time", + "America/Porto_Acre": "SA Pacific Standard Time", + "America/Porto_Velho": "SA Western Standard Time", + "America/Puerto_Rico": "SA Western Standard Time", + "America/Punta_Arenas": "Magallanes Standard Time", + "America/Rainy_River": "Central Standard Time", + "America/Rankin_Inlet": "Central Standard Time", + "America/Recife": "SA Eastern Standard Time", + "America/Regina": "Canada Central Standard Time", + "America/Resolute": "Central Standard Time", + "America/Rio_Branco": "SA Pacific Standard Time", + "America/Rosario": "Argentina Standard Time", + "America/Santa_Isabel": "Pacific Standard Time (Mexico)", + "America/Santarem": "SA Eastern Standard Time", + "America/Santiago": "Pacific SA Standard Time", + "America/Santo_Domingo": "SA Western Standard Time", + "America/Sao_Paulo": "E. South America Standard Time", + "America/Scoresbysund": "Azores Standard Time", + "America/Shiprock": "Mountain Standard Time", + "America/Sitka": "Alaskan Standard Time", + "America/St_Barthelemy": "SA Western Standard Time", + "America/St_Johns": "Newfoundland Standard Time", + "America/St_Kitts": "SA Western Standard Time", + "America/St_Lucia": "SA Western Standard Time", + "America/St_Thomas": "SA Western Standard Time", + "America/St_Vincent": "SA Western Standard Time", + "America/Swift_Current": "Canada Central Standard Time", + "America/Tegucigalpa": "Central America Standard Time", + "America/Thule": "Atlantic Standard Time", + "America/Thunder_Bay": "Eastern Standard Time", + "America/Tijuana": "Pacific Standard Time (Mexico)", + "America/Toronto": "Eastern Standard Time", + "America/Tortola": "SA Western Standard Time", + "America/Vancouver": "Pacific Standard Time", + "America/Virgin": "SA Western Standard Time", + "America/Whitehorse": "Yukon Standard Time", + "America/Winnipeg": "Central Standard Time", + "America/Yakutat": "Alaskan Standard Time", + "America/Yellowknife": "Mountain Standard Time", + "Antarctica/Casey": "Central Pacific Standard Time", + "Antarctica/Davis": "SE Asia Standard Time", + "Antarctica/DumontDUrville": "West Pacific Standard Time", + "Antarctica/Macquarie": "Tasmania Standard Time", + "Antarctica/Mawson": "West Asia Standard Time", + "Antarctica/McMurdo": "New Zealand Standard Time", + "Antarctica/Palmer": "SA Eastern Standard Time", + "Antarctica/Rothera": "SA Eastern Standard Time", + "Antarctica/South_Pole": "New Zealand Standard Time", + "Antarctica/Syowa": "E. Africa Standard Time", + "Antarctica/Vostok": "Central Asia Standard Time", + "Arctic/Longyearbyen": "W. Europe Standard Time", + "Asia/Aden": "Arab Standard Time", + "Asia/Almaty": "Central Asia Standard Time", + "Asia/Amman": "Jordan Standard Time", + "Asia/Anadyr": "Russia Time Zone 11", + "Asia/Aqtau": "West Asia Standard Time", + "Asia/Aqtobe": "West Asia Standard Time", + "Asia/Ashgabat": "West Asia Standard Time", + "Asia/Ashkhabad": "West Asia Standard Time", + "Asia/Atyrau": "West Asia Standard Time", + "Asia/Baghdad": "Arabic Standard Time", + "Asia/Bahrain": "Arab Standard Time", + "Asia/Baku": "Azerbaijan Standard Time", + "Asia/Bangkok": "SE Asia Standard Time", + "Asia/Barnaul": "Altai Standard Time", + "Asia/Beirut": "Middle East Standard Time", + "Asia/Bishkek": "Central Asia Standard Time", + "Asia/Brunei": "Singapore Standard Time", + "Asia/Calcutta": "India Standard Time", + "Asia/Chita": "Transbaikal Standard Time", + "Asia/Choibalsan": "Ulaanbaatar Standard Time", + "Asia/Chongqing": "China Standard Time", + "Asia/Chungking": "China Standard Time", + "Asia/Colombo": "Sri Lanka Standard Time", + "Asia/Dacca": "Bangladesh Standard Time", + "Asia/Damascus": "Syria Standard Time", + "Asia/Dhaka": "Bangladesh Standard Time", + "Asia/Dili": "Tokyo Standard Time", + "Asia/Dubai": "Arabian Standard Time", + "Asia/Dushanbe": "West Asia Standard Time", + "Asia/Famagusta": "GTB Standard Time", + "Asia/Gaza": "West Bank Standard Time", + "Asia/Harbin": "China Standard Time", + "Asia/Hebron": "West Bank Standard Time", + "Asia/Ho_Chi_Minh": "SE Asia Standard Time", + "Asia/Hong_Kong": "China Standard Time", + "Asia/Hovd": "W. Mongolia Standard Time", + "Asia/Irkutsk": "North Asia East Standard Time", + "Asia/Istanbul": "Turkey Standard Time", + "Asia/Jakarta": "SE Asia Standard Time", + "Asia/Jayapura": "Tokyo Standard Time", + "Asia/Jerusalem": "Israel Standard Time", + "Asia/Kabul": "Afghanistan Standard Time", + "Asia/Kamchatka": "Russia Time Zone 11", + "Asia/Karachi": "Pakistan Standard Time", + "Asia/Kashgar": "Central Asia Standard Time", + "Asia/Kathmandu": "Nepal Standard Time", + "Asia/Katmandu": "Nepal Standard Time", + "Asia/Khandyga": "Yakutsk Standard Time", + "Asia/Kolkata": "India Standard Time", + "Asia/Krasnoyarsk": "North Asia Standard Time", + "Asia/Kuala_Lumpur": "Singapore Standard Time", + "Asia/Kuching": "Singapore Standard Time", + "Asia/Kuwait": "Arab Standard Time", + "Asia/Macao": "China Standard Time", + "Asia/Macau": "China Standard Time", + "Asia/Magadan": "Magadan Standard Time", + "Asia/Makassar": "Singapore Standard Time", + "Asia/Manila": "Singapore Standard Time", + "Asia/Muscat": "Arabian Standard Time", + "Asia/Nicosia": "GTB Standard Time", + "Asia/Novokuznetsk": "North Asia Standard Time", + "Asia/Novosibirsk": "N. Central Asia Standard Time", + "Asia/Omsk": "Omsk Standard Time", + "Asia/Oral": "West Asia Standard Time", + "Asia/Phnom_Penh": "SE Asia Standard Time", + "Asia/Pontianak": "SE Asia Standard Time", + "Asia/Pyongyang": "North Korea Standard Time", + "Asia/Qatar": "Arab Standard Time", + "Asia/Qostanay": "Central Asia Standard Time", + "Asia/Qyzylorda": "Qyzylorda Standard Time", + "Asia/Rangoon": "Myanmar Standard Time", + "Asia/Riyadh": "Arab Standard Time", + "Asia/Saigon": "SE Asia Standard Time", + "Asia/Sakhalin": "Sakhalin Standard Time", + "Asia/Samarkand": "West Asia Standard Time", + "Asia/Seoul": "Korea Standard Time", + "Asia/Shanghai": "China Standard Time", + "Asia/Singapore": "Singapore Standard Time", + "Asia/Srednekolymsk": "Russia Time Zone 10", + "Asia/Taipei": "Taipei Standard Time", + "Asia/Tashkent": "West Asia Standard Time", + "Asia/Tbilisi": "Georgian Standard Time", + "Asia/Tehran": "Iran Standard Time", + "Asia/Tel_Aviv": "Israel Standard Time", + "Asia/Thimbu": "Bangladesh Standard Time", + "Asia/Thimphu": "Bangladesh Standard Time", + "Asia/Tokyo": "Tokyo Standard Time", + "Asia/Tomsk": "Tomsk Standard Time", + "Asia/Ujung_Pandang": "Singapore Standard Time", + "Asia/Ulaanbaatar": "Ulaanbaatar Standard Time", + "Asia/Ulan_Bator": "Ulaanbaatar Standard Time", + "Asia/Urumqi": "Central Asia Standard Time", + "Asia/Ust-Nera": "Vladivostok Standard Time", + "Asia/Vientiane": "SE Asia Standard Time", + "Asia/Vladivostok": "Vladivostok Standard Time", + "Asia/Yakutsk": "Yakutsk Standard Time", + "Asia/Yangon": "Myanmar Standard Time", + "Asia/Yekaterinburg": "Ekaterinburg Standard Time", + "Asia/Yerevan": "Caucasus Standard Time", + "Atlantic/Azores": "Azores Standard Time", + "Atlantic/Bermuda": "Atlantic Standard Time", + "Atlantic/Canary": "GMT Standard Time", + "Atlantic/Cape_Verde": "Cape Verde Standard Time", + "Atlantic/Faeroe": "GMT Standard Time", + "Atlantic/Faroe": "GMT Standard Time", + "Atlantic/Jan_Mayen": "W. Europe Standard Time", + "Atlantic/Madeira": "GMT Standard Time", + "Atlantic/Reykjavik": "Greenwich Standard Time", + "Atlantic/South_Georgia": "UTC-02", + "Atlantic/St_Helena": "Greenwich Standard Time", + "Atlantic/Stanley": "SA Eastern Standard Time", + "Australia/ACT": "AUS Eastern Standard Time", + "Australia/Adelaide": "Cen. Australia Standard Time", + "Australia/Brisbane": "E. Australia Standard Time", + "Australia/Broken_Hill": "Cen. Australia Standard Time", + "Australia/Canberra": "AUS Eastern Standard Time", + "Australia/Currie": "Tasmania Standard Time", + "Australia/Darwin": "AUS Central Standard Time", + "Australia/Eucla": "Aus Central W. Standard Time", + "Australia/Hobart": "Tasmania Standard Time", + "Australia/LHI": "Lord Howe Standard Time", + "Australia/Lindeman": "E. Australia Standard Time", + "Australia/Lord_Howe": "Lord Howe Standard Time", + "Australia/Melbourne": "AUS Eastern Standard Time", + "Australia/NSW": "AUS Eastern Standard Time", + "Australia/North": "AUS Central Standard Time", + "Australia/Perth": "W. Australia Standard Time", + "Australia/Queensland": "E. Australia Standard Time", + "Australia/South": "Cen. Australia Standard Time", + "Australia/Sydney": "AUS Eastern Standard Time", + "Australia/Tasmania": "Tasmania Standard Time", + "Australia/Victoria": "AUS Eastern Standard Time", + "Australia/West": "W. Australia Standard Time", + "Australia/Yancowinna": "Cen. Australia Standard Time", + "Brazil/Acre": "SA Pacific Standard Time", + "Brazil/DeNoronha": "UTC-02", + "Brazil/East": "E. South America Standard Time", + "Brazil/West": "SA Western Standard Time", + "CST6CDT": "Central Standard Time", + "Canada/Atlantic": "Atlantic Standard Time", + "Canada/Central": "Central Standard Time", + "Canada/Eastern": "Eastern Standard Time", + "Canada/Mountain": "Mountain Standard Time", + "Canada/Newfoundland": "Newfoundland Standard Time", + "Canada/Pacific": "Pacific Standard Time", + "Canada/Saskatchewan": "Canada Central Standard Time", + "Canada/Yukon": "Yukon Standard Time", + "Chile/Continental": "Pacific SA Standard Time", + "Chile/EasterIsland": "Easter Island Standard Time", + "Cuba": "Cuba Standard Time", + "EST5EDT": "Eastern Standard Time", + "Egypt": "Egypt Standard Time", + "Eire": "GMT Standard Time", + "Etc/GMT": "UTC", + "Etc/GMT+0": "UTC", + "Etc/GMT+1": "Cape Verde Standard Time", + "Etc/GMT+10": "Hawaiian Standard Time", + "Etc/GMT+11": "UTC-11", + "Etc/GMT+12": "Dateline Standard Time", + "Etc/GMT+2": "UTC-02", + "Etc/GMT+3": "SA Eastern Standard Time", + "Etc/GMT+4": "SA Western Standard Time", + "Etc/GMT+5": "SA Pacific Standard Time", + "Etc/GMT+6": "Central America Standard Time", + "Etc/GMT+7": "US Mountain Standard Time", + "Etc/GMT+8": "UTC-08", + "Etc/GMT+9": "UTC-09", + "Etc/GMT-0": "UTC", + "Etc/GMT-1": "W. Central Africa Standard Time", + "Etc/GMT-10": "West Pacific Standard Time", + "Etc/GMT-11": "Central Pacific Standard Time", + "Etc/GMT-12": "UTC+12", + "Etc/GMT-13": "UTC+13", + "Etc/GMT-14": "Line Islands Standard Time", + "Etc/GMT-2": "South Africa Standard Time", + "Etc/GMT-3": "E. Africa Standard Time", + "Etc/GMT-4": "Arabian Standard Time", + "Etc/GMT-5": "West Asia Standard Time", + "Etc/GMT-6": "Central Asia Standard Time", + "Etc/GMT-7": "SE Asia Standard Time", + "Etc/GMT-8": "Singapore Standard Time", + "Etc/GMT-9": "Tokyo Standard Time", + "Etc/GMT0": "UTC", + "Etc/Greenwich": "UTC", + "Etc/UCT": "UTC", + "Etc/UTC": "UTC", + "Etc/Universal": "UTC", + "Etc/Zulu": "UTC", + "Europe/Amsterdam": "W. Europe Standard Time", + "Europe/Andorra": "W. Europe Standard Time", + "Europe/Astrakhan": "Astrakhan Standard Time", + "Europe/Athens": "GTB Standard Time", + "Europe/Belfast": "GMT Standard Time", + "Europe/Belgrade": "Central Europe Standard Time", + "Europe/Berlin": "W. Europe Standard Time", + "Europe/Bratislava": "Central Europe Standard Time", + "Europe/Brussels": "Romance Standard Time", + "Europe/Bucharest": "GTB Standard Time", + "Europe/Budapest": "Central Europe Standard Time", + "Europe/Busingen": "W. Europe Standard Time", + "Europe/Chisinau": "E. Europe Standard Time", + "Europe/Copenhagen": "Romance Standard Time", + "Europe/Dublin": "GMT Standard Time", + "Europe/Gibraltar": "W. Europe Standard Time", + "Europe/Guernsey": "GMT Standard Time", + "Europe/Helsinki": "FLE Standard Time", + "Europe/Isle_of_Man": "GMT Standard Time", + "Europe/Istanbul": "Turkey Standard Time", + "Europe/Jersey": "GMT Standard Time", + "Europe/Kaliningrad": "Kaliningrad Standard Time", + "Europe/Kiev": "FLE Standard Time", + "Europe/Kirov": "Russian Standard Time", + "Europe/Kyiv": "FLE Standard Time", + "Europe/Lisbon": "GMT Standard Time", + "Europe/Ljubljana": "Central Europe Standard Time", + "Europe/London": "GMT Standard Time", + "Europe/Luxembourg": "W. Europe Standard Time", + "Europe/Madrid": "Romance Standard Time", + "Europe/Malta": "W. Europe Standard Time", + "Europe/Mariehamn": "FLE Standard Time", + "Europe/Minsk": "Belarus Standard Time", + "Europe/Monaco": "W. Europe Standard Time", + "Europe/Moscow": "Russian Standard Time", + "Europe/Nicosia": "GTB Standard Time", + "Europe/Oslo": "W. Europe Standard Time", + "Europe/Paris": "Romance Standard Time", + "Europe/Podgorica": "Central Europe Standard Time", + "Europe/Prague": "Central Europe Standard Time", + "Europe/Riga": "FLE Standard Time", + "Europe/Rome": "W. Europe Standard Time", + "Europe/Samara": "Russia Time Zone 3", + "Europe/San_Marino": "W. Europe Standard Time", + "Europe/Sarajevo": "Central European Standard Time", + "Europe/Saratov": "Saratov Standard Time", + "Europe/Simferopol": "Russian Standard Time", + "Europe/Skopje": "Central European Standard Time", + "Europe/Sofia": "FLE Standard Time", + "Europe/Stockholm": "W. Europe Standard Time", + "Europe/Tallinn": "FLE Standard Time", + "Europe/Tirane": "Central Europe Standard Time", + "Europe/Tiraspol": "E. Europe Standard Time", + "Europe/Ulyanovsk": "Astrakhan Standard Time", + "Europe/Uzhgorod": "FLE Standard Time", + "Europe/Vaduz": "W. Europe Standard Time", + "Europe/Vatican": "W. Europe Standard Time", + "Europe/Vienna": "W. Europe Standard Time", + "Europe/Vilnius": "FLE Standard Time", + "Europe/Volgograd": "Volgograd Standard Time", + "Europe/Warsaw": "Central European Standard Time", + "Europe/Zagreb": "Central European Standard Time", + "Europe/Zaporozhye": "FLE Standard Time", + "Europe/Zurich": "W. Europe Standard Time", + "GB": "GMT Standard Time", + "GB-Eire": "GMT Standard Time", + "GMT+0": "UTC", + "GMT-0": "UTC", + "GMT0": "UTC", + "Greenwich": "UTC", + "Hongkong": "China Standard Time", + "Iceland": "Greenwich Standard Time", + "Indian/Antananarivo": "E. Africa Standard Time", + "Indian/Chagos": "Central Asia Standard Time", + "Indian/Christmas": "SE Asia Standard Time", + "Indian/Cocos": "Myanmar Standard Time", + "Indian/Comoro": "E. Africa Standard Time", + "Indian/Kerguelen": "West Asia Standard Time", + "Indian/Mahe": "Mauritius Standard Time", + "Indian/Maldives": "West Asia Standard Time", + "Indian/Mauritius": "Mauritius Standard Time", + "Indian/Mayotte": "E. Africa Standard Time", + "Indian/Reunion": "Mauritius Standard Time", + "Iran": "Iran Standard Time", + "Israel": "Israel Standard Time", + "Jamaica": "SA Pacific Standard Time", + "Japan": "Tokyo Standard Time", + "Kwajalein": "UTC+12", + "Libya": "Libya Standard Time", + "MST7MDT": "Mountain Standard Time", + "Mexico/BajaNorte": "Pacific Standard Time (Mexico)", + "Mexico/BajaSur": "Mountain Standard Time (Mexico)", + "Mexico/General": "Central Standard Time (Mexico)", + "NZ": "New Zealand Standard Time", + "NZ-CHAT": "Chatham Islands Standard Time", + "Navajo": "Mountain Standard Time", + "PRC": "China Standard Time", + "PST8PDT": "Pacific Standard Time", + "Pacific/Apia": "Samoa Standard Time", + "Pacific/Auckland": "New Zealand Standard Time", + "Pacific/Bougainville": "Bougainville Standard Time", + "Pacific/Chatham": "Chatham Islands Standard Time", + "Pacific/Chuuk": "West Pacific Standard Time", + "Pacific/Easter": "Easter Island Standard Time", + "Pacific/Efate": "Central Pacific Standard Time", + "Pacific/Enderbury": "UTC+13", + "Pacific/Fakaofo": "UTC+13", + "Pacific/Fiji": "Fiji Standard Time", + "Pacific/Funafuti": "UTC+12", + "Pacific/Galapagos": "Central America Standard Time", + "Pacific/Gambier": "UTC-09", + "Pacific/Guadalcanal": "Central Pacific Standard Time", + "Pacific/Guam": "West Pacific Standard Time", + "Pacific/Honolulu": "Hawaiian Standard Time", + "Pacific/Johnston": "Hawaiian Standard Time", + "Pacific/Kanton": "UTC+13", + "Pacific/Kiritimati": "Line Islands Standard Time", + "Pacific/Kosrae": "Central Pacific Standard Time", + "Pacific/Kwajalein": "UTC+12", + "Pacific/Majuro": "UTC+12", + "Pacific/Marquesas": "Marquesas Standard Time", + "Pacific/Midway": "UTC-11", + "Pacific/Nauru": "UTC+12", + "Pacific/Niue": "UTC-11", + "Pacific/Norfolk": "Norfolk Standard Time", + "Pacific/Noumea": "Central Pacific Standard Time", + "Pacific/Pago_Pago": "UTC-11", + "Pacific/Palau": "Tokyo Standard Time", + "Pacific/Pitcairn": "UTC-08", + "Pacific/Pohnpei": "Central Pacific Standard Time", + "Pacific/Ponape": "Central Pacific Standard Time", + "Pacific/Port_Moresby": "West Pacific Standard Time", + "Pacific/Rarotonga": "Hawaiian Standard Time", + "Pacific/Saipan": "West Pacific Standard Time", + "Pacific/Samoa": "UTC-11", + "Pacific/Tahiti": "Hawaiian Standard Time", + "Pacific/Tarawa": "UTC+12", + "Pacific/Tongatapu": "Tonga Standard Time", + "Pacific/Truk": "West Pacific Standard Time", + "Pacific/Wake": "UTC+12", + "Pacific/Wallis": "UTC+12", + "Pacific/Yap": "West Pacific Standard Time", + "Poland": "Central European Standard Time", + "Portugal": "GMT Standard Time", + "ROC": "Taipei Standard Time", + "ROK": "Korea Standard Time", + "Singapore": "Singapore Standard Time", + "Turkey": "Turkey Standard Time", + "UCT": "UTC", + "US/Alaska": "Alaskan Standard Time", + "US/Aleutian": "Aleutian Standard Time", + "US/Arizona": "US Mountain Standard Time", + "US/Central": "Central Standard Time", + "US/Eastern": "Eastern Standard Time", + "US/Hawaii": "Hawaiian Standard Time", + "US/Indiana-Starke": "Central Standard Time", + "US/Michigan": "Eastern Standard Time", + "US/Mountain": "Mountain Standard Time", + "US/Pacific": "Pacific Standard Time", + "US/Samoa": "UTC-11", + "UTC": "UTC", + "Universal": "UTC", + "W-SU": "Russian Standard Time", + "Zulu": "UTC", +}