From 1fac478bf3f31ceae184be8d0a32a6399384b6af Mon Sep 17 00:00:00 2001 From: ThioJoe <12518330+ThioJoe@users.noreply.github.com> Date: Thu, 16 Dec 2021 19:38:53 -0700 Subject: [PATCH] Update Script - New detection engine for more spammer types - Auto-Smart Mode: Now also detects sex spam bots with very high accuracy. Huge amount of filtering logic added to find the large variety of these types of bots - Auto-Smart mode can now detect bots with look-alike usernames - Sample list now shows number of comments by each author - Better detection of potential false positives from replies - Filter now ignores uploader's comments regardless of who is scanning --- YouTubeSpammerPurge.py | 313 +++- .../default_config.ini | 0 assets/rootZoneDomainList.txt | 1489 +++++++++++++++++ requirements.txt | 1 + 4 files changed, 1764 insertions(+), 39 deletions(-) rename default_config.ini => assets/default_config.ini (100%) create mode 100644 assets/rootZoneDomainList.txt diff --git a/YouTubeSpammerPurge.py b/YouTubeSpammerPurge.py index 762ada4..1fd6774 100644 --- a/YouTubeSpammerPurge.py +++ b/YouTubeSpammerPurge.py @@ -35,7 +35,7 @@ ### IMPORTANT: I OFFER NO WARRANTY OR GUARANTEE FOR THIS SCRIPT. USE AT YOUR OWN RISK. ### I tested it on my own and implemented some failsafes as best as I could, ### but there could always be some kind of bug. You should inspect the code yourself. -version = "2.0.0-Beta1" +version = "2.0.0-Beta2" #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# # Try Imports @@ -57,6 +57,7 @@ try: from base64 import b85decode as b64decode from configparser import ConfigParser from pkg_resources import parse_version + from confusables import confusable_regex, normalize # Non Standard Modules import rtfunicode @@ -156,8 +157,8 @@ def print_comments(check_video_id_localprint, comments, logMode): valuesPreparedToPrint = "" print(f"{F.LIGHTMAGENTA_EX}---------------------------- Match Samples: One comment per matched-comment author ----------------------------{S.R}") for value in matchSamplesDict.values(): - valuesPreparedToWrite = valuesPreparedToWrite + f"{str(value['n'])}. {str(value['authorID'])} | {make_rtf_compatible(str(value['nameAndText']))} \\line \n" - valuesPreparedToPrint = valuesPreparedToPrint + f"{str(value['n'])}. {str(value['nameAndText'])}\n" + valuesPreparedToWrite = valuesPreparedToWrite + value['iString'] + value['cString'] + f"{str(value['authorID'])} | {make_rtf_compatible(str(value['nameAndText']))} \\line \n" + valuesPreparedToPrint = valuesPreparedToPrint + value['iString'] + value['cString'] + f"{str(value['nameAndText'])}\n" if logMode == True: write_rtf(logFileName, "-------------------- Match Samples: One comment per matched-comment author -------------------- \\line\\line \n") write_rtf(logFileName, valuesPreparedToWrite) @@ -241,7 +242,13 @@ def print_prepared_comments(check_video_id_localprep, comments, j, logMode): # Adds a sample to matchSamplesDict and preps formatting def add_sample(authorID, authorNameRaw, commentText): global matchSamplesDict - count = len(matchSamplesDict) + 1 + global authorMatchCountDict + + # Make index number and string formatted version + index = len(matchSamplesDict) + 1 + iString = f"{str(index)}. ".ljust(4) + authorNumComments = authorMatchCountDict[authorID] + cString = f"[x{str(authorNumComments)}] ".ljust(7) # Left Justify Author Name and Comment Text if len(authorNameRaw) > 20: @@ -255,7 +262,7 @@ def add_sample(authorID, authorNameRaw, commentText): commentText = commentText[0:85].ljust(85) # Add comment sample, author ID, name, and counter - matchSamplesDict[authorID] = {'n':count, 'authorID':authorID, 'authorName':authorNameRaw, 'nameAndText':authorName + commentText} + matchSamplesDict[authorID] = {'index':index, 'cString':cString, 'iString':iString, 'count':authorNumComments, 'authorID':authorID, 'authorName':authorNameRaw, 'nameAndText':authorName + commentText} ########################################################################################## @@ -263,7 +270,7 @@ def add_sample(authorID, authorNameRaw, commentText): ########################################################################################## # Call the API's commentThreads.list method to list the existing comments. -def get_comments(youtube, currentUser, filterMode, filterSubMode, check_video_id=None, check_channel_id=None, nextPageToken=None, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, regexPattern=None): # None are set as default if no parameters passed into function +def get_comments(youtube, miscData, currentUser, filterMode, filterSubMode, check_video_id=None, check_channel_id=None, nextPageToken=None, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, regexPattern=None): # None are set as default if no parameters passed into function global scannedCommentsCount # Initialize some variables authorChannelName = None @@ -330,13 +337,13 @@ def get_comments(youtube, currentUser, filterMode, filterSubMode, check_video_id commentText = "[Deleted/Missing Comment]" # Runs check against comment info for whichever filter data is relevant - check_against_filter(currentUser, filterMode=filterMode, filterSubMode=filterSubMode, commentID=parent_id, videoID=videoID, authorChannelID=parentAuthorChannelID, parentAuthorChannelID=None, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, authorChannelName=authorChannelName, commentText=commentText, regexPattern=regexPattern) + check_against_filter(currentUser, miscData, filterMode=filterMode, filterSubMode=filterSubMode, commentID=parent_id, videoID=videoID, authorChannelID=parentAuthorChannelID, parentAuthorChannelID=None, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, authorChannelName=authorChannelName, commentText=commentText, regexPattern=regexPattern) scannedCommentsCount += 1 # Counts number of comments scanned, add to global count if numReplies > 0 and len(limitedRepliesList) < numReplies: - get_replies(currentUser, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern) + get_replies(currentUser, miscData, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern) elif numReplies > 0 and len(limitedRepliesList) == numReplies: # limitedRepliesList can never be more than numReplies - get_replies(currentUser, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern, limitedRepliesList) + get_replies(currentUser, miscData, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern, limitedRepliesList) else: print_count_stats(final=False) # Updates displayed stats if no replies @@ -348,7 +355,7 @@ def get_comments(youtube, currentUser, filterMode, filterSubMode, check_video_id ########################################################################################## # Call the API's comments.list method to list the existing comment replies. -def get_replies(currentUser, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, regexPattern=None, repliesList=None): +def get_replies(currentUser, miscData, filterMode, filterSubMode, parent_id, videoID, parentAuthorChannelID, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, regexPattern=None, repliesList=None): global scannedRepliesCount # Initialize some variables authorChannelName = None @@ -403,7 +410,7 @@ def get_replies(currentUser, filterMode, filterSubMode, parent_id, videoID, pare commentText = "[Deleted/Missing Comment]" # Runs check against comment info for whichever filter data is relevant - check_against_filter(currentUser, filterMode=filterMode, filterSubMode=filterSubMode, commentID=replyID, videoID=videoID, authorChannelID=authorChannelID, parentAuthorChannelID=parentAuthorChannelID, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, authorChannelName=authorChannelName, commentText=commentText, regexPattern=regexPattern, allThreadAuthorNames=allThreadAuthorNames) + check_against_filter(currentUser, miscData, filterMode=filterMode, filterSubMode=filterSubMode, commentID=replyID, videoID=videoID, authorChannelID=authorChannelID, parentAuthorChannelID=parentAuthorChannelID, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, authorChannelName=authorChannelName, commentText=commentText, regexPattern=regexPattern, allThreadAuthorNames=allThreadAuthorNames) # Update latest stats scannedRepliesCount += 1 # Count number of replies scanned, add to global count @@ -413,25 +420,42 @@ def get_replies(currentUser, filterMode, filterSubMode, parent_id, videoID, pare ############################## CHECK AGAINST FILTER ###################################### # The basic logic that actually checks each comment against filter criteria -def check_against_filter(currentUser, filterMode, filterSubMode, commentID, videoID, authorChannelID, parentAuthorChannelID=None, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, authorChannelName=None, commentText=None, regexPattern=None, allThreadAuthorNames=None): +def check_against_filter(currentUser, miscData, filterMode, filterSubMode, commentID, videoID, authorChannelID, parentAuthorChannelID=None, inputtedSpammerChannelID=None, inputtedUsernameFilter=None, inputtedCommentTextFilter=None, authorChannelName=None, commentText=None, regexPattern=None, allThreadAuthorNames=None): global vidIdDict global matchedCommentsDict commentTextOriginal = str(commentText) # Do not even check comment if author ID matches currently logged in user's ID - if currentUser[0] != authorChannelID: - # Logic to avoid false positives from replies to spammers - if allThreadAuthorNames and (filterMode == "AutoSmart" or filterMode == "NameAndText"): - for name in allThreadAuthorNames: - if "@"+str(name) in commentText: - commentText = commentText.replace("@"+str(name), "") + if currentUser[0] != authorChannelID and miscData['channelOwnerID'] != authorChannelID: + if "@" in commentText: + # Logic to avoid false positives from replies to spammers + if allThreadAuthorNames and (filterMode == "AutoSmart" or filterMode == "NameAndText"): + for name in allThreadAuthorNames: + if "@"+str(name) in commentText: + commentText = commentText.replace("@"+str(name), "") + # Extra logic to detect false positive if spammer's comment already deleted, but someone replied + if matchedCommentsDict and filterMode == "AutoSmart": + for key, value in matchedCommentsDict.items(): + if "@"+str(value['authorName']) in commentText: + remove = True + for key2,value2 in matchedCommentsDict.items(): + if value2['authorID'] == authorChannelID: + remove = False + if remove == True: + commentText = commentText.replace("@"+str(value['authorName']), "") # If the comment/username matches criteria based on mode, add key/value pair of comment ID and author ID to matchedCommentsDict # Also add key-value pair of comment ID and video ID to dictionary + # Also count how many spam comments for each author def add_spam(commentID, videoID): global matchedCommentsDict + global authorMatchCountDict matchedCommentsDict[commentID] = {'text':commentTextOriginal, 'authorName':authorChannelName, 'authorID':authorChannelID, 'videoID':videoID} vidIdDict[commentID] = videoID # Probably remove this later, but still being used for now + if authorChannelID in authorMatchCountDict: + authorMatchCountDict[authorChannelID] += 1 + else: + authorMatchCountDict[authorChannelID] = 1 # Checks author of either parent comment or reply (both passed in as commentID) against channel ID inputted by user if filterMode == "ID": @@ -493,18 +517,105 @@ def check_against_filter(currentUser, filterMode, filterSubMode, commentID, vide # Here inputtedComment/Author Filters are tuples of, where 2nd element is list of char-sets to check against ## Also Check if reply author ID is same as parent comment author ID, if so, ignore (to account for users who reply to spammers) elif filterMode == "AutoSmart": + # Receive Variables numberFilterSet = inputtedUsernameFilter['spammerNumbersSet'] compiledRegex = inputtedUsernameFilter['compiledRegex'] minNumbersMatchCount = inputtedUsernameFilter['minNumbersMatchCount'] - + #usernameBlackCharsSet = inputtedUsernameFilter['usernameBlackCharsSet'] + spamGenEmojiSet = inputtedUsernameFilter['spamGenEmojiSet'] + redAdEmojiSet = inputtedUsernameFilter['redAdEmojiSet'] + yellowAdEmojiSet = inputtedUsernameFilter['yellowAdEmojiSet'] + hrtSet = inputtedUsernameFilter['hrtSet'] + domainRegex = inputtedUsernameFilter['domainRegex'] + compiledRegexDict = inputtedUsernameFilter['compiledRegexDict'] + languages = inputtedUsernameFilter['languages'] + + # Processed Variables combinedString = authorChannelName + commentText combinedSet = make_char_set(combinedString, stripLettersNumbers=True, stripPunctuation=True) + usernameSet = make_char_set(authorChannelName) + + # Functions + def findOnlyObfuscated(regexExpression, originalWord, stringToSearch): + # Confusable thinks s and f look similar, have to compensate to avoid false positive + ignoredConfusablesConverter = {ord('f'):ord('s'),ord('s'):ord('f')} + result = re.findall(regexExpression, stringToSearch.lower()) + if result == None: + return False + else: + for match in result: + lowerWord = originalWord.lower() + if match.lower() != lowerWord and match.lower() != lowerWord.translate(ignoredConfusablesConverter): + return True + + # Run Checks if authorChannelID == parentAuthorChannelID: pass elif len(numberFilterSet.intersection(combinedSet)) >= minNumbersMatchCount: add_spam(commentID, videoID) elif compiledRegex.search(combinedString): add_spam(commentID, videoID) + # Black Tests + #elif usernameBlackCharsSet.intersection(usernameSet): + # add_spam(commentID, videoID) + elif any(re.search(expression[1], authorChannelName) for expression in compiledRegexDict['usernameBlackWords']): + add_spam(commentID, videoID) + elif any(findOnlyObfuscated(expression[1], expression[0], combinedString) for expression in compiledRegexDict['blackAdWords']): + add_spam(commentID, videoID) + elif re.search(inputtedUsernameFilter['usernameConfuseRegex'], authorChannelName): + add_spam(commentID, videoID) + # Multi Criteria Tests + else: + # Defaults + yellowCount = 0 + redCount = 0 + + languageCount = 0 + for language in languages: + if language[2].intersection(combinedSet): + languageCount += 1 + + # Yellow Tests + if any(findOnlyObfuscated(expression[1], expression[0], combinedString) for expression in compiledRegexDict['yellowAdWords']): + yellowCount += 1 + + if len(hrtSet.intersection(combinedSet)) >= 2: + yellowCount += 1 + + if yellowAdEmojiSet.intersection(combinedSet): + yellowCount += 1 + + if spamGenEmojiSet.intersection(combinedSet): + yellowCount += 1 + + if combinedString.count('#') >= 5: + yellowCount += 1 + + if combinedString.count('\n') >= 10: + yellowCount += 1 + + if re.search(domainRegex, combinedString.lower()): + yellowCount += 1 + + if languageCount >= 2: + yellowCount += 1 + + # Red Tests + #if any(foundObfuscated(re.findall(expression[1], combinedString), expression[0]) for expression in compiledRegexDict['redAdWords']): + if any(findOnlyObfuscated(expression[1], expression[0], combinedString) for expression in compiledRegexDict['redAdWords']): + redCount += 1 + + if redAdEmojiSet.intersection(combinedSet): + redCount += 1 + + # Calculate Score + if yellowCount >= 3: + add_spam(commentID, videoID) + elif redCount >= 2: + add_spam(commentID, videoID) + elif redCount >= 1 and yellowCount >= 1: + add_spam(commentID, videoID) + else: pass @@ -685,7 +796,7 @@ def exclude_authors(inputtedString): # Get authorIDs for selected sample comments for authorID, info in matchSamplesDict.items(): - if str(info['n']) in SampleIDsToExclude: + if str(info['index']) in SampleIDsToExclude: authorIDsToExclude += [authorID] # Get comment IDs to be excluded @@ -751,13 +862,14 @@ def get_channel_id(video_id): results = youtube.videos().list( part="snippet", id=video_id, - fields="items/snippet/channelId", + fields="items/snippet/channelId,items/snippet/channelTitle", maxResults=1 ).execute() - channel_id = results["items"][0]["snippet"]["channelId"] + channelID = results["items"][0]["snippet"]["channelId"] + channelTitle = results["items"][0]["snippet"]["channelTitle"] - return channel_id + return channelID, channelTitle ############################# GET CURRENTLY LOGGED IN USER ##################################### # Class for custom exception to throw if a comment if invalid channel ID returned @@ -1137,9 +1249,25 @@ def check_for_update(currentVersion, silentCheck=False): elif silentCheck == True: return isUpdateAvailable +############################# Ingest Other Files ############################## +def ingest_domain_file(): + def assetFilesPath(relative_path): + if hasattr(sys, '_MEIPASS'): # If running as a pyinstaller bundle + return os.path.join(sys._MEIPASS, relative_path) + return os.path.join(os.path.abspath("assets"), relative_path) # If running as script, specifies resource folder as /assets + + # Open list of root zone domain extensions + with open(assetFilesPath("rootZoneDomainList.txt"), 'r', encoding="utf-8") as domainFile: + rootZoneData = domainFile.readlines() + rootZoneList = [] + for line in rootZoneData: + line = line.strip() + rootZoneList.append(line.lower()) + + return rootZoneList + ############################# CONFIG FILE FUNCTIONS ############################## def create_config_file(): - def config_path(relative_path): if hasattr(sys, '_MEIPASS'): # If running as a pyinstaller bundle #print("Test1") # For Debugging @@ -1147,7 +1275,7 @@ def create_config_file(): return os.path.join(sys._MEIPASS, relative_path) #print("Test2") # for Debugging #print(os.path.join(os.path.abspath("assets"), relative_path)) # For debugging - return os.path.join(os.path.abspath(""), relative_path) # If running as script, specifies resource folder as /assets + return os.path.join(os.path.abspath("assets"), relative_path) # If running as script, specifies resource folder as /assets configFileName = "SpamPurgeConfig.ini" @@ -1603,8 +1731,9 @@ def prepare_filter_mode_non_ascii(currentUser, scanMode, config): sys.exit() # Auto filter for pre-made list of common spammer-used characters in usernames -def prepare_filter_mode_smart_chars(currentUser, scanMode, config): +def prepare_filter_mode_smart(currentUser, scanMode, config, miscData): currentUserName = currentUser[1] + domainList = miscData['domainList'] utf_16 = "utf-8" if config and config['filter_mode'] == "autosmart": print("Using Auto Smart Mode - Set from config file.") @@ -1614,7 +1743,19 @@ def prepare_filter_mode_smart_chars(currentUser, scanMode, config): print(" > Specifically, unicode characters that look like numbers\n") input("Press Enter to continue...") - # Spam Criteria + # General Spammer Criteria + spamGenEmoji = 'đŸ‘‡đŸ‘†â˜đŸ‘ˆđŸ‘‰â€”ïžđŸ”ŒđŸ…„â™œ' + #usernameRedChars ="" + #usernameBlackChars = "" + usernameBlackWords_Raw = [b'aA|ICWn^M`', b'aA|ICWn>^?c>'] + usernameBlackWords = [] + for x in usernameBlackWords_Raw: usernameBlackWords.append(b64decode(x).decode(utf_16)) + + # Prepare General Filters + spamGenEmojiSet = make_char_set(spamGenEmoji) + #usernameBlackCharsSet = make_char_set(usernameBlackChars) + + # Type 1 Spammer Criteria minNumbersMatchCount = 3 # Choice of minimum number of matches from spamNums before considered spam spamNums = b'@4S%jypiv`lJC5e@4S@nyp`{~mhZfm@4T4ryqWL3kng;a@4S-lyp!*|l<&Ni@4S}pyqE91nD4xq-+|(hpyH9V;*yBsleOZVw&I?E;+~4|pM-+ovAy7_sN#{K;*quDl8NGzw&I<);+}!xo{R9GgoEI*sp65M;*qxEl8WM!x8j|+;+}%yo{aFHgoNO$sp65N;*q!Fl8fS#xZ<6;;+})zo{jLIgoWafq~ejd;*yNwleyxZy5gRM;+~G;o`m9_j_{v^hT@T>;*q)Hl8xe%y5gO?;+}=#o{#XKgoomhrs9#h;*yTyle^-byyBjQ;+~N3k%YbQpM;3vf|%lwr{a;j;*yWzlf2@cz2csS;+~Q4pM;6xk*MO4yyB9O;*-7Noxb9ph~l1-@SlW=;*+Z4lfUqvgp2T>gpBZ?gn{s%gn;m!pN{aIpP2BSpQ7-cpRDkmpO5gJpPBHTpRMqnpQG@dpSJLwpOEmKpPKNUpRVwopQP}epSSRxpONsLpPTTVpRe$ppQZ4fpSbXypOWyMpPcZWpRn+qpQiAgpSkdzpOf&NpPlfXpRw?rpQrGhpStj!pOo;OpPulYpR(|spQ!MipS$p#pOx^PpP%rZpR@3tpQ-SjpSUPz;;6aesj=dzvf`|=@42Gyyo=$Rt>S^4;+U!8n5g2IrsA2f;+e7Ho2cTPnc|$9;+&h}oSfpEo#LFH;+&u2oS^EOn(CUH@Sl}{@Sl}|@Sl}}@Sl~2@Sl~3@Sl~4@SmQc@SmQd@SmQe@SmQf@SmQg@SmQh@SmQi' @@ -1623,14 +1764,94 @@ def prepare_filter_mode_smart_chars(currentUser, scanMode, config): y = b64decode(spamPlus).decode(utf_16) z = b64decode(spamOne).decode(utf_16) - # Process / Repair for Filter Use - spammerNumbersSet = make_char_set(x, stripLettersNumbers=False, stripKeyboardSpecialChars=False, stripPunctuation=False) - regexTest1 = f"[{y}][1]" - regexTest2 = f"[+][{z}]" - regexTest3 = f"[{y}][{z}]" + # Prepare Filters for Type 1 Spammers + spammerNumbersSet = make_char_set(x) + regexTest1 = f"[{y}] ?[1]" + regexTest2 = f"[+] ?[{z}]" + regexTest3 = f"[{y}] ?[{z}]" compiledRegex = re.compile(f"({regexTest1}|{regexTest2}|{regexTest3})") - filterSettings = {'spammerNumbersSet': spammerNumbersSet, 'compiledRegex': compiledRegex, 'minNumbersMatchCount': minNumbersMatchCount} + # Type 2 Spammer Criteria + blackAdWords, redAdWords, yellowAdWords = [], [], [] + blackAdWords_Raw = [b'V`yb#YanfTAaHVTW@&5', b'Z*XO9AZ>XdaB^>EX>0', b'b7f^9ZFwMYa&Km7Yy', b'V`yb#YanfTAa-eFWp4', b'V`yb#YanoPZ)Rz1', b'V`yb#Yan)MWMyv'] + redAdWords_Raw = [b'W_4q0', b'b7gn', b'WNBk-', b'WFcc~', b'W-4QA', b'W-2OUYX', b'Zgpg3', b'b1HZ', b'F*qv', b'aBp&M'] + yellowAdWords_Raw = [b'Y;SgD', b'Vr5}4@Sl;H@Sly0').decode(utf_16) + hrt = b64decode(b';+duJpOTpHpOTjFpOTmGpOTaCpOTsIpOTvJpOTyKpOT#LpQoYlpOT&MpO&QJouu%el9lkElAZ').decode(utf_16) + for x in blackAdWords_Raw: blackAdWords.append(b64decode(x).decode(utf_16)) + for x in redAdWords_Raw: redAdWords.append(b64decode(x).decode(utf_16)) + for x in yellowAdWords_Raw: yellowAdWords.append(b64decode(x).decode(utf_16)) + + # Prepare Filters for Type 2 Spammers + redAdEmojiSet = make_char_set(redAdEmoji) + yellowAdEmojiSet = make_char_set(yellowAdEmoji) + hrtSet = make_char_set(hrt) + + # Prepare Regex for Type 2 and General Spammers + compiledRegexDict = { + 'usernameBlackWords': [], + 'blackAdWords': [], + 'redAdWords': [], + 'yellowAdWords': [] + } + # Compile regex with upper case, otherwise many false positive character matches + bufferMatch, addBuffers = "*_~|`", "\[\]\(\)" # Add 'buffer' chars to compensate for obfuscation + m = bufferMatch + a = addBuffers + for word in usernameBlackWords: + value = re.compile(confusable_regex(word.upper(), include_character_padding=True).replace(m, a)) + compiledRegexDict['usernameBlackWords'].append([word, value]) + for word in blackAdWords: + value = re.compile(confusable_regex(word.upper(), include_character_padding=True).replace(m, a)) + compiledRegexDict['blackAdWords'].append([word, value]) + for word in redAdWords: + value = re.compile(confusable_regex(word.upper(), include_character_padding=True).replace(m, a)) + compiledRegexDict['redAdWords'].append([word, value]) + for word in yellowAdWords: + value = re.compile(confusable_regex(word.upper(), include_character_padding=True).replace(m, a)) + compiledRegexDict['yellowAdWords'].append([word, value]) + usernameConfuseRegex = re.compile(confusable_regex(miscData['channelOwnerName'])) + + # Prepare All-domain Regex Expression + prepString = "\.(" + first = True + for extension in domainList: + if first == True: + prepString += extension + first = False + else: + prepString = prepString + "|" + extension + prepString = prepString + ")\/" + domainRegex = re.compile(prepString) + + # Prepare Multi Language Detection + turkish = 'ÇçƞƟĞğİ' + germanic = 'áșžĂŸĂ„Ă€' + cyrillic = "гЎжзĐșĐ»ĐŒĐœĐżŃ€ŃŃ‚Ń„Ń…Ń†Ń‡ŃˆŃ‰Ń‹ŃŃŽŃŃŠŃŒ" + japanese = 'ゥスィむやりェ゚ォă‚Șカわキゟクグケă‚Čコゎゔザシゞă‚čă‚șă‚»ă‚Œă‚œă‚Ÿă‚żăƒ€ăƒăƒ‚ăƒ†ăƒ‡ăƒˆăƒ‰ăƒŠăƒ‹ăƒŒăƒăƒŽăƒăƒăƒ‘ăƒ’ăƒ“ăƒ”ăƒ•ăƒ–ăƒ—ăƒ˜ăƒ™ăƒšăƒ›ăƒœăƒăƒžăƒŸăƒ ăƒĄăƒąăƒŁăƒ€ăƒ„ăƒŠăƒ§ăƒšăƒ©ăƒȘルハロミワヰヱăƒČンノピヶヷマăƒčăƒșăƒŒăƒœăƒŸăƒżăă‚ăƒă„ă…ă†ă‡ăˆă‰ăŠă‹ăŒăăŽăă‘ă’ă“ă”ă•ă–ă—ă˜ă™ăšă›ăœăăžăŸă ăĄăąăŁă€ă„ăŠă§ăšă©ăȘにくねたはばぱăČびぎごぶぷぞăčășă»ăŒăœăŸăżă‚€ă‚ă‚‚ă‚ƒă‚„ă‚…ă‚†ă‚‡ă‚ˆă‚‰ă‚Šă‚‹ă‚Œă‚ă‚Žă‚ă‚ă‚‘ă‚’ă‚“ă‚”ă‚•ă‚–ă‚ă‚žă‚Ÿ' + languages = [['turkish', turkish, []], ['germanic', germanic, []], ['cyrillic', cyrillic, []], ['japanese', japanese, []]] + for item in languages: + item[2] = make_char_set(item[1]) + + filterSettings = { + 'spammerNumbersSet': spammerNumbersSet, + 'compiledRegex': compiledRegex, + 'minNumbersMatchCount': minNumbersMatchCount, + 'blackAdWords': blackAdWords, + 'redAdWords': redAdWords, + 'yellowAdWords': yellowAdWords, + #'usernameBlackCharsSet': usernameBlackCharsSet, + 'spamGenEmojiSet': spamGenEmojiSet, + 'usernameBlackWords': usernameBlackWords, + 'redAdEmojiSet': redAdEmojiSet, + 'yellowAdEmojiSet': yellowAdEmojiSet, + 'hrtSet': hrtSet, + 'domainRegex': domainRegex, + 'compiledRegexDict': compiledRegexDict, + 'usernameConfuseRegex': usernameConfuseRegex, + 'languages': languages + } return filterSettings, None ########################################################################################## @@ -1654,9 +1875,11 @@ def main(): global scannedRepliesCount global scannedCommentsCount global matchSamplesDict + global authorMatchCountDict # Default values for global variables matchedCommentsDict = {} + authorMatchCountDict = {} vidIdDict = {} vidTitleDict = {} matchSamplesDict = {} @@ -1717,7 +1940,12 @@ def main(): input("Press Enter to exit...") sys.exit() + # Load any other data print("\n Loading...\n") + miscData = {} + domainList = ingest_domain_file() + miscData['domainList'] = domainList + # Check for program updates if not config or config['auto_check_update'] == True: try: @@ -1815,8 +2043,13 @@ def main(): check_video_id = str(validVideoID[1]) title = get_video_title(check_video_id) print("\nChosen Video: " + title) - if currentUser[0] != get_channel_id(check_video_id): + + channelOwner = get_channel_id(check_video_id) + if currentUser[0] != channelOwner[0]: userNotChannelOwner = True + miscData['channelOwnerID'] = channelOwner[0] + miscData['channelOwnerName'] = channelOwner[1] + # Ask if correct video, or skip if config if config and config['skip_confirm_video'] == True: confirm = True @@ -1849,6 +2082,8 @@ def main(): except: print("\nInvalid Input! - Must be a whole number.") validConfigSetting = False + miscData['channelOwnerID'] = currentUser[0] + miscData['channelOwnerName'] = currentUser[1] # Create config file elif scanMode == "makeConfig": @@ -1963,7 +2198,7 @@ def main(): regexPattern = filterSettings[0] elif filterMode == "AutoSmart": - filterSettings = prepare_filter_mode_smart_chars(currentUser, scanMode, config) + filterSettings = prepare_filter_mode_smart(currentUser, scanMode, config, miscData) inputtedUsernameFilter = filterSettings[0] inputtedCommentTextFilter = filterSettings[0] @@ -1991,15 +2226,15 @@ def main(): print("(Note: If the program appears to freeze, try right clicking within the window)\n") print(" --- Scanning --- \n") - def scan_all(youtube, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern): - nextPageToken = get_comments(youtube, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, regexPattern=regexPattern) + def scan_all(youtube, miscData, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern): + nextPageToken = get_comments(youtube, miscData, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, regexPattern=regexPattern) print_count_stats(final=False) # Prints comment scan stats, updates on same line # After getting first page, if there are more pages, goes to get comments for next page while nextPageToken != "End" and scannedCommentsCount < maxScanNumber: - nextPageToken = get_comments(youtube, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, nextPageToken, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, regexPattern=regexPattern) + nextPageToken = get_comments(youtube, miscData, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, nextPageToken, inputtedSpammerChannelID=inputtedSpammerChannelID, inputtedUsernameFilter=inputtedUsernameFilter, inputtedCommentTextFilter=inputtedCommentTextFilter, regexPattern=regexPattern) print_count_stats(final=True) # Prints comment scan stats, finalizes - params = [youtube, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern] + params = [youtube, miscData, currentUser, filterMode, filterSubMode, check_video_id, check_channel_id, inputtedSpammerChannelID, inputtedUsernameFilter, inputtedCommentTextFilter, regexPattern] scan_all(*params) ########################################################## bypass = False diff --git a/default_config.ini b/assets/default_config.ini similarity index 100% rename from default_config.ini rename to assets/default_config.ini diff --git a/assets/rootZoneDomainList.txt b/assets/rootZoneDomainList.txt new file mode 100644 index 0000000..9c82cef --- /dev/null +++ b/assets/rootZoneDomainList.txt @@ -0,0 +1,1489 @@ +AAA +AARP +ABARTH +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADAC +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALFAROMEO +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AVIANCA +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BANANAREPUBLIC +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BENTLEY +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUDAPEST +BUGATTI +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANCERRESEARCH +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CBS +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CITYEATS +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMCAST +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOKINGCHANNEL +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CSC +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DABUR +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUNLOP +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +ETISALAT +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIAT +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOODNETWORK +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTDOOR +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUARDIAN +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HGTV +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELES +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYLOGISTICS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIM +KINDER +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LANCASTER +LANCIA +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINDE +LINK +LIPSY +LIVE +LIVING +LIXIL +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOFT +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MACYS +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MASERATI +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MUTUAL +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NATURA +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTHWESTERNMUTUAL +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLDNAVY +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PASSAGENS +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAMERICA +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDSTONE +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCHER +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCA +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHAW +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SHOWTIME +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIFFANY +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELCHANNEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLKSWAGEN +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +VUELOS +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XFINITY +XIHUAN +XIN +XN--11B4C3D +XN--1CK2E1B +XN--1QQW23A +XN--2SCRJ9C +XN--30RR7Y +XN--3BST00M +XN--3DS443G +XN--3E0B707E +XN--3HCRJ9C +XN--3PXU8K +XN--42C2D9A +XN--45BR5CYL +XN--45BRJ9C +XN--45Q11C +XN--4DBRK0CE +XN--4GBRIM +XN--54B7FTA0CC +XN--55QW42G +XN--55QX5D +XN--5SU34J936BGSG +XN--5TZM5G +XN--6FRZ82G +XN--6QQ986B3XL +XN--80ADXHKS +XN--80AO21A +XN--80AQECDR1A +XN--80ASEHDB +XN--80ASWG +XN--8Y0A063A +XN--90A3AC +XN--90AE +XN--90AIS +XN--9DBQ2A +XN--9ET52U +XN--9KRT00A +XN--B4W605FERD +XN--BCK1B9A5DRE4C +XN--C1AVG +XN--C2BR7G +XN--CCK2B3B +XN--CCKWCXETD +XN--CG4BKI +XN--CLCHC0EA0B2G2A9GCD +XN--CZR694B +XN--CZRS0T +XN--CZRU2D +XN--D1ACJ3B +XN--D1ALF +XN--E1A4C +XN--ECKVDTC9D +XN--EFVY88H +XN--FCT429K +XN--FHBEI +XN--FIQ228C5HS +XN--FIQ64B +XN--FIQS8S +XN--FIQZ9S +XN--FJQ720A +XN--FLW351E +XN--FPCRJ9C3D +XN--FZC2C9E2C +XN--FZYS8D69UVGM +XN--G2XX48C +XN--GCKR3F0F +XN--GECRJ9C +XN--GK3AT1E +XN--H2BREG3EVE +XN--H2BRJ9C +XN--H2BRJ9C8C +XN--HXT814E +XN--I1B6B1A6A2E +XN--IMR513N +XN--IO0A7I +XN--J1AEF +XN--J1AMH +XN--J6W193G +XN--JLQ480N2RG +XN--JLQ61U9W7B +XN--JVR189M +XN--KCRX77D1X4A +XN--KPRW13D +XN--KPRY57D +XN--KPUT3I +XN--L1ACC +XN--LGBBAT1AD8J +XN--MGB9AWBF +XN--MGBA3A3EJT +XN--MGBA3A4F16A +XN--MGBA7C0BBN0A +XN--MGBAAKC7DVF +XN--MGBAAM7A8H +XN--MGBAB2BD +XN--MGBAH1A3HJKRD +XN--MGBAI9AZGQP6J +XN--MGBAYH7GPA +XN--MGBBH1A +XN--MGBBH1A71E +XN--MGBC0A9AZCG +XN--MGBCA7DZDO +XN--MGBCPQ6GPA1A +XN--MGBERP4A5D4AR +XN--MGBGU82A +XN--MGBI4ECEXP +XN--MGBPL2FH +XN--MGBT3DHD +XN--MGBTX2B +XN--MGBX4CD0AB +XN--MIX891F +XN--MK1BU44C +XN--MXTQ1M +XN--NGBC5AZD +XN--NGBE9E0A +XN--NGBRX +XN--NODE +XN--NQV7F +XN--NQV7FS00EMA +XN--NYQY26A +XN--O3CW4H +XN--OGBPF8FL +XN--OTU796D +XN--P1ACF +XN--P1AI +XN--PGBS0DH +XN--PSSY2U +XN--Q7CE6A +XN--Q9JYB4C +XN--QCKA1PMC +XN--QXA6A +XN--QXAM +XN--RHQV96G +XN--ROVU88B +XN--RVC1E0AM3E +XN--S9BRJ9C +XN--SES554G +XN--T60B56A +XN--TCKWE +XN--TIQ49XQYJ +XN--UNUP4Y +XN--VERMGENSBERATER-CTB +XN--VERMGENSBERATUNG-PWB +XN--VHQUV +XN--VUQ861B +XN--W4R85EL8FHU5DNRA +XN--W4RS40L +XN--WGBH1C +XN--WGBL6A +XN--XHQ521B +XN--XKC2AL3HYE2A +XN--XKC2DL3A5EE0H +XN--Y9A3AQ +XN--YFRO4I67O +XN--YGBI2AMMX +XN--ZFR164B +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW diff --git a/requirements.txt b/requirements.txt index 7702a5e..0472c9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ google_auth_oauthlib==0.4.6 protobuf==3.19.1 colorama==0.4.4 rtfunicode==2.0 +confusables==1.2.0 certifi