Merge branch 'edge'

fix(logging): Fix typo in error transform
docs: Add github sponsor link
2026-01-14 16:08:02 -05:00 · 2022-02-02 16:59:56 -05:00 · 2022-02-01 13:13:27 -05:00 · 2022-02-01 12:01:34 -05:00 · 2022-01-31 14:08:21 -05:00
9 changed files with 198 additions and 117 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: [FoxxMD]
+custom: ["bitcoincash:qqmpsh365r8n9jhp4p8ks7f7qdr7203cws4kmkmr8q"]
--- a/src/Common/interfaces.ts
+++ b/src/Common/interfaces.ts
@@ -2100,3 +2100,56 @@ export interface FilterResult<T> {
    join: JoinOperands
    passed: boolean
 }
+
+export interface TextTransformOptions {
+    /**
+     * A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.
+     *
+     * * If `transformationsActivity` IS NOT defined then these transformations will be performed on BOTH the activity text (submission title or comment) AND the repost candidate text
+     * * If `transformationsActivity` IS defined then these transformations are only performed on repost candidate text
+     * */
+    transformations?: SearchAndReplaceRegExp[]
+
+    /**
+     * Specify a separate set of transformations for the activity text (submission title or comment)
+     *
+     * To perform no transformations when `transformations` is defined set this to an empty array (`[]`)
+     * */
+    transformationsActivity?: SearchAndReplaceRegExp[]
+}
+
+export interface TextMatchOptions {
+    /**
+     * The percentage, as a whole number, of a repost title/comment that must match the title/comment being checked in order to consider both a match
+     *
+     * Note: Setting to 0 will make every candidate considered a match -- useful if you want to match if the URL has been reposted anywhere
+     *
+     * Defaults to `85` (85%)
+     *
+     * @default 85
+     * @example [85]
+     * */
+    matchScore?: number
+
+    /**
+     * The minimum number of words in the activity being checked for which this rule will run on
+     *
+     * If the word count is below the minimum the rule fails
+     *
+     * Defaults to 2
+     *
+     * @default 2
+     * @example [2]
+     * */
+    minWordCount?: number
+
+    /**
+     * Should text matching be case sensitive?
+     *
+     * Defaults to false
+     *
+     * @default false
+     * @example [false]
+     **/
+    caseSensitive?: boolean
+}
--- a/src/Rule/RepeatActivityRule.ts
+++ b/src/Rule/RepeatActivityRule.ts
@@ -1,17 +1,28 @@
 import {Rule, RuleJSONConfig, RuleOptions, RuleResult} from "./index";
 import {Comment} from "snoowrap";
 import {
-    activityWindowText, asSubmission,
-    comparisonTextOp, FAIL, getActivitySubredditName, isExternalUrlSubmission, isRedditMedia,
-    parseGenericValueComparison, parseSubredditName,
-    parseUsableLinkIdentifier as linkParser, PASS, subredditStateIsNameOnly, toStrongSubredditState
+    activityWindowText,
+    asSubmission,
+    comparisonTextOp,
+    FAIL,
+    getActivitySubredditName,
+    isExternalUrlSubmission,
+    isRedditMedia,
+    parseGenericValueComparison,
+    parseSubredditName,
+    parseUsableLinkIdentifier as linkParser,
+    PASS,
+    searchAndReplace,
+    stringSameness,
+    subredditStateIsNameOnly,
+    toStrongSubredditState
 } from "../util";
 import {
    ActivityWindow,
    ActivityWindowType,
-    ReferenceSubmission,
+    ReferenceSubmission, SearchAndReplaceRegExp,
    StrongSubredditState,
-    SubredditState
+    SubredditState, TextMatchOptions, TextTransformOptions
 } from "../Common/interfaces";
 import Submission from "snoowrap/dist/objects/Submission";
 import dayjs from "dayjs";
@@ -29,27 +40,6 @@ interface RepeatActivityReducer {
    allSets: RepeatActivityData[]
 }

-const getActivityIdentifier = (activity: (Submission | Comment), length = 200) => {
-    let identifier: string;
-    if (asSubmission(activity)) {
-        if (activity.is_self) {
-            identifier = `${activity.title}${activity.selftext.slice(0, length)}`;
-        } else if(isRedditMedia(activity)) {
-            identifier = activity.title;
-        } else {
-            identifier = parseUsableLinkIdentifier(activity.url) as string;
-        }
-    } else {
-        identifier = activity.body.slice(0, length);
-    }
-    return identifier;
-}
-
-const fuzzyOptions = {
-    includeScore: true,
-    distance: 15
-};
-
 export class RepeatActivityRule extends Rule {
    threshold: string;
    window: ActivityWindowType;
@@ -62,6 +52,9 @@ export class RepeatActivityRule extends Rule {
    activityFilterFunc: (x: Submission|Comment) => Promise<boolean> = async (x) => true;
    keepRemoved: boolean;
    minWordCount: number;
+    transformations: SearchAndReplaceRegExp[]
+    caseSensitive: boolean
+    matchScore: number

    constructor(options: RepeatActivityOptions) {
        super(options);
@@ -75,7 +68,13 @@ export class RepeatActivityRule extends Rule {
            include = [],
            exclude = [],
            keepRemoved = false,
+            transformations = [],
+            caseSensitive = true,
+            matchScore = 85,
        } = options;
+        this.matchScore = matchScore;
+        this.transformations = transformations;
+        this.caseSensitive = caseSensitive;
        this.minWordCount = minWordCount;
        this.keepRemoved = keepRemoved;
        this.threshold = threshold;
@@ -136,6 +135,37 @@ export class RepeatActivityRule extends Rule {
        }
    }

+    getActivityIdentifier(activity: (Submission | Comment), length = 200, transform = true) {
+        let identifier: string;
+        if (asSubmission(activity)) {
+            if (activity.is_self) {
+                identifier = `${activity.title}${activity.selftext.slice(0, length)}`;
+            } else if(isRedditMedia(activity)) {
+                identifier = activity.title;
+            } else {
+                identifier = parseUsableLinkIdentifier(activity.url) as string;
+            }
+        } else {
+            identifier = activity.body.slice(0, length);
+        }
+
+        if(!transform) {
+            return identifier;
+        }
+
+        // apply any transforms
+        if (this.transformations.length > 0) {
+            identifier = searchAndReplace(identifier, this.transformations);
+        }
+
+        // perform after transformations so as not to mess up regex's depending on case
+        if(!this.caseSensitive) {
+            identifier = identifier.toLowerCase();
+        }
+
+        return identifier;
+    }
+
    async process(item: Submission|Comment): Promise<[boolean, RuleResult]> {
        let referenceUrl;
        if(asSubmission(item) && this.useSubmissionAsReference) {
@@ -162,9 +192,10 @@ export class RepeatActivityRule extends Rule {
            const acc = await accProm;
            const {openSets = [], allSets = []} = acc;

-            let identifier = getActivityIdentifier(activity);
+            let identifier = this.getActivityIdentifier(activity);
+
            const isUrl = isExternalUrlSubmission(activity);
-            let fu = new Fuse([identifier], !isUrl ? fuzzyOptions : {...fuzzyOptions, distance: 5});
+            //let fu = new Fuse([identifier], !isUrl ? fuzzyOptions : {...fuzzyOptions, distance: 5});
            const validSub = await this.activityFilterFunc(activity);
            let minMet = identifier.length >= this.minWordCount;

@@ -174,12 +205,15 @@ export class RepeatActivityRule extends Rule {
            let currIdentifierInOpen = false;
            const bufferedActivities = this.gapAllowance === undefined || this.gapAllowance === 0 ? [] : activities.slice(Math.max(0, index - this.gapAllowance), Math.max(0, index));
            for (const o of openSets) {
-                const res = fu.search(o.identifier);
-                const match = res.length > 0;
-                if (match && validSub && minMet) {
+                const strMatchResults = stringSameness(o.identifier, identifier);
+                if (strMatchResults.highScoreWeighted >= this.matchScore && minMet) {
                    updatedOpenSets.push({...o, sets: [...o.sets, activity]});
                    currIdentifierInOpen = true;
-                } else if (bufferedActivities.some(x => fu.search(getActivityIdentifier(x)).length > 0) && validSub && minMet) {
+                } else if (bufferedActivities.some(x => {
+                    let buffIdentifier = this.getActivityIdentifier(x);
+                    const buffMatch = stringSameness(identifier, buffIdentifier);
+                    return buffMatch.highScoreWeighted >= this.matchScore;
+                }) && validSub && minMet) {
                    updatedOpenSets.push(o);
                } else if(!currIdentifierInOpen && !isUrl) {
                    updatedAllSets.push(o);
@@ -193,15 +227,18 @@ export class RepeatActivityRule extends Rule {
                    // could be that a spammer is using different URLs for each submission but similar submission titles so search by title as well
                    const sub = activity as Submission;
                    identifier = sub.title;
-                    fu = new Fuse([identifier], !isUrl ? fuzzyOptions : {...fuzzyOptions, distance: 5});
+                    //fu = new Fuse([identifier], !isUrl ? fuzzyOptions : {...fuzzyOptions, distance: 5});
                    minMet = identifier.length >= this.minWordCount;
                    for (const o of openSets) {
-                        const res = fu.search(o.identifier);
-                        const match = res.length > 0;
-                        if (match && validSub && minMet) {
+                        const strMatchResults = stringSameness(o.identifier, identifier);
+                        if (strMatchResults.highScoreWeighted >= this.matchScore && minMet) {
                            updatedOpenSets.push({...o, sets: [...o.sets, activity]});
                            currIdentifierInOpen = true;
-                        } else if (bufferedActivities.some(x => fu.search(getActivityIdentifier(x)).length > 0) && validSub && minMet && !updatedOpenSets.includes(o)) {
+                        } else if (bufferedActivities.some(x => {
+                            let buffIdentifier = this.getActivityIdentifier(x);
+                            const buffMatch = stringSameness(identifier, buffIdentifier);
+                            return buffMatch.highScoreWeighted >= this.matchScore;
+                        }) && validSub && minMet && !updatedOpenSets.includes(o)) {
                            updatedOpenSets.push(o);
                        } else if(!updatedAllSets.includes(o)) {
                            updatedAllSets.push(o);
@@ -232,7 +269,7 @@ export class RepeatActivityRule extends Rule {
        let applicableGroupedActivities = identifierGroupedActivities;
        if (this.useSubmissionAsReference) {
            applicableGroupedActivities = new Map();
-            let identifier = getActivityIdentifier(item);
+            let identifier = this.getActivityIdentifier(item);
            let referenceSubmissions = identifierGroupedActivities.get(identifier);
            if(referenceSubmissions === undefined && isExternalUrlSubmission(item)) {
                // if external url sub then try by title
@@ -240,7 +277,7 @@ export class RepeatActivityRule extends Rule {
                referenceSubmissions = identifierGroupedActivities.get(identifier);
                if(referenceSubmissions === undefined) {
                    // didn't get by title so go back to url since that's the default
-                    identifier = getActivityIdentifier(item);
+                    identifier = this.getActivityIdentifier(item);
                }
            }

@@ -265,7 +302,7 @@ export class RepeatActivityRule extends Rule {
            };
            for (let set of value) {
                const test = comparisonTextOp(set.length, operator, thresholdValue);
-                const md = set.map((x: (Comment | Submission)) => `[${asSubmission(x) ? x.title : getActivityIdentifier(x, 50)}](https://reddit.com${x.permalink}) in ${x.subreddit_name_prefixed} on ${dayjs(x.created_utc * 1000).utc().format()}`);
+                const md = set.map((x: (Comment | Submission)) => `[${asSubmission(x) ? x.title : this.getActivityIdentifier(x, 50)}](https://reddit.com${x.permalink}) in ${x.subreddit_name_prefixed} on ${dayjs(x.created_utc * 1000).utc().format()}`);

                summaryData.sets.push(set);
                summaryData.largestTrigger = Math.max(summaryData.largestTrigger, set.length);
@@ -325,7 +362,7 @@ interface SummaryData {
    triggeringSetsMarkdown: string[]
 }

-interface RepeatActivityConfig extends ActivityWindow, ReferenceSubmission {
+interface RepeatActivityConfig extends ActivityWindow, ReferenceSubmission, TextMatchOptions {
    /**
     * The number of repeat submissions that will trigger the rule
     * @default ">= 5"
@@ -383,18 +420,9 @@ interface RepeatActivityConfig extends ActivityWindow, ReferenceSubmission {
    keepRemoved?: boolean

    /**
-     * For activities that are text-based this is the minimum number of words required for the activity to be considered for a repeat
-     *
-     * EX if `minimumWordCount=5` and a comment is `what about you` then it is ignored because `3 is less than 5`
-     *
-     * **For self-text submissions** -- title + body text
-     *
-     * **For comments* -- body text
-     *
-     * @default 1
-     * @example [1]
+     * A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.
     * */
-    minWordCount?: number,
+    transformations?: SearchAndReplaceRegExp[]
 }

 export interface RepeatActivityOptions extends RepeatActivityConfig, RuleOptions {
--- a/src/Rule/RepostRule.ts
+++ b/src/Rule/RepostRule.ts
@@ -18,7 +18,7 @@ import {
    RepostItem,
    RepostItemResult,
    SearchAndReplaceRegExp,
-    SearchFacetType,
+    SearchFacetType, TextMatchOptions, TextTransformOptions,
 } from "../Common/interfaces";
 import objectHash from "object-hash";
 import {getActivities, getAttributionIdentifier} from "../Utils/SnoowrapUtils";
@@ -30,59 +30,6 @@ import {rest} from "lodash";

 const parseYtIdentifier = parseUsableLinkIdentifier();

-export interface TextMatchOptions {
-    /**
-     * The percentage, as a whole number, of a repost title/comment that must match the title/comment being checked in order to consider both a match
-     *
-     * Note: Setting to 0 will make every candidate considered a match -- useful if you want to match if the URL has been reposted anywhere
-     *
-     * Defaults to `85` (85%)
-     *
-     * @default 85
-     * @example [85]
-     * */
-    matchScore?: number
-
-    /**
-     * The minimum number of words in the activity being checked for which this rule will run on
-     *
-     * If the word count is below the minimum the rule fails
-     *
-     * Defaults to 2
-     *
-     * @default 2
-     * @example [2]
-     * */
-    minWordCount?: number
-
-    /**
-     * Should text matching be case sensitive?
-     *
-     * Defaults to false
-     *
-     * @default false
-     * @example [false]
-     **/
-    caseSensitive?: boolean
-}
-
-export interface TextTransformOptions {
-    /**
-     * A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.
-     *
-     * * If `transformationsActivity` IS NOT defined then these transformations will be performed on BOTH the activity text (submission title or comment) AND the repost candidate text
-     * * If `transformationsActivity` IS defined then these transformations are only performed on repost candidate text
-     * */
-    transformations?: SearchAndReplaceRegExp[]
-
-    /**
-     * Specify a separate set of transformations for the activity text (submission title or comment)
-     *
-     * To perform no transformations when `transformations` is defined set this to an empty array (`[]`)
-     * */
-    transformationsActivity?: SearchAndReplaceRegExp[]
-}
-
 export interface SearchFacetJSONConfig extends TextMatchOptions, TextTransformOptions, ActivityWindow {
    kind: SearchFacetType | SearchFacetType[]
 }
--- a/src/Schema/App.json
+++ b/src/Schema/App.json
@@ -2667,6 +2667,11 @@
                        }
                    ]
                },
+                "caseSensitive": {
+                    "default": false,
+                    "description": "Should text matching be case sensitive?\n\nDefaults to false",
+                    "type": "boolean"
+                },
                "exclude": {
                    "description": "If present, activities will be counted only if they are **NOT** found in this list of Subreddits\n\nEach value in the list can be either:\n\n * string (name of subreddit)\n * regular expression to run on the subreddit name\n * `SubredditState`\n\nEX `[\"mealtimevideos\",\"askscience\", \"/onlyfans*\\/i\", {\"over18\": true}]`",
                    "examples": [
@@ -2757,9 +2762,14 @@
                    ],
                    "type": "string"
                },
+                "matchScore": {
+                    "default": 85,
+                    "description": "The percentage, as a whole number, of a repost title/comment that must match the title/comment being checked in order to consider both a match\n\nNote: Setting to 0 will make every candidate considered a match -- useful if you want to match if the URL has been reposted anywhere\n\nDefaults to `85` (85%)",
+                    "type": "number"
+                },
                "minWordCount": {
-                    "default": 1,
-                    "description": "For activities that are text-based this is the minimum number of words required for the activity to be considered for a repeat\n\nEX if `minimumWordCount=5` and a comment is `what about you` then it is ignored because `3 is less than 5`\n\n**For self-text submissions** -- title + body text\n\n**For comments* -- body text",
+                    "default": 2,
+                    "description": "The minimum number of words in the activity being checked for which this rule will run on\n\nIf the word count is below the minimum the rule fails\n\nDefaults to 2",
                    "type": "number"
                },
                "name": {
@@ -2775,6 +2785,13 @@
                    "description": "The number of repeat submissions that will trigger the rule",
                    "type": "string"
                },
+                "transformations": {
+                    "description": "A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.",
+                    "items": {
+                        "$ref": "#/definitions/SearchAndReplaceRegExp"
+                    },
+                    "type": "array"
+                },
                "useSubmissionAsReference": {
                    "default": true,
                    "description": "If activity is a Submission and is a link (not self-post) then only look at Submissions that contain this link, otherwise consider all activities.",
--- a/src/Schema/OperatorConfig.json
+++ b/src/Schema/OperatorConfig.json
@@ -633,6 +633,9 @@
                },
                "file": {
                    "allOf": [
+                        {
+                            "$ref": "#/definitions/Omit<DailyRotateFileTransportOptions,\"stream\"|\"dirname\"|\"options\"|\"handleRejections\"|\"format\"|\"handleExceptions\"|\"log\"|\"logv\"|\"close\">"
+                        },
                        {
                            "properties": {
                                "dirname": {
@@ -658,9 +661,6 @@
                                }
                            },
                            "type": "object"
-                        },
-                        {
-                            "$ref": "#/definitions/Omit<DailyRotateFileTransportOptions,\"stream\"|\"dirname\"|\"options\"|\"handleRejections\"|\"format\"|\"handleExceptions\"|\"log\"|\"logv\"|\"close\">"
                        }
                    ],
                    "description": "Options for Rotating File logging"
--- a/src/Schema/Rule.json
+++ b/src/Schema/Rule.json
@@ -1460,6 +1460,11 @@
                        }
                    ]
                },
+                "caseSensitive": {
+                    "default": false,
+                    "description": "Should text matching be case sensitive?\n\nDefaults to false",
+                    "type": "boolean"
+                },
                "exclude": {
                    "description": "If present, activities will be counted only if they are **NOT** found in this list of Subreddits\n\nEach value in the list can be either:\n\n * string (name of subreddit)\n * regular expression to run on the subreddit name\n * `SubredditState`\n\nEX `[\"mealtimevideos\",\"askscience\", \"/onlyfans*\\/i\", {\"over18\": true}]`",
                    "examples": [
@@ -1550,9 +1555,14 @@
                    ],
                    "type": "string"
                },
+                "matchScore": {
+                    "default": 85,
+                    "description": "The percentage, as a whole number, of a repost title/comment that must match the title/comment being checked in order to consider both a match\n\nNote: Setting to 0 will make every candidate considered a match -- useful if you want to match if the URL has been reposted anywhere\n\nDefaults to `85` (85%)",
+                    "type": "number"
+                },
                "minWordCount": {
-                    "default": 1,
-                    "description": "For activities that are text-based this is the minimum number of words required for the activity to be considered for a repeat\n\nEX if `minimumWordCount=5` and a comment is `what about you` then it is ignored because `3 is less than 5`\n\n**For self-text submissions** -- title + body text\n\n**For comments* -- body text",
+                    "default": 2,
+                    "description": "The minimum number of words in the activity being checked for which this rule will run on\n\nIf the word count is below the minimum the rule fails\n\nDefaults to 2",
                    "type": "number"
                },
                "name": {
@@ -1568,6 +1578,13 @@
                    "description": "The number of repeat submissions that will trigger the rule",
                    "type": "string"
                },
+                "transformations": {
+                    "description": "A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.",
+                    "items": {
+                        "$ref": "#/definitions/SearchAndReplaceRegExp"
+                    },
+                    "type": "array"
+                },
                "useSubmissionAsReference": {
                    "default": true,
                    "description": "If activity is a Submission and is a link (not self-post) then only look at Submissions that contain this link, otherwise consider all activities.",
--- a/src/Schema/RuleSet.json
+++ b/src/Schema/RuleSet.json
@@ -1434,6 +1434,11 @@
                        }
                    ]
                },
+                "caseSensitive": {
+                    "default": false,
+                    "description": "Should text matching be case sensitive?\n\nDefaults to false",
+                    "type": "boolean"
+                },
                "exclude": {
                    "description": "If present, activities will be counted only if they are **NOT** found in this list of Subreddits\n\nEach value in the list can be either:\n\n * string (name of subreddit)\n * regular expression to run on the subreddit name\n * `SubredditState`\n\nEX `[\"mealtimevideos\",\"askscience\", \"/onlyfans*\\/i\", {\"over18\": true}]`",
                    "examples": [
@@ -1524,9 +1529,14 @@
                    ],
                    "type": "string"
                },
+                "matchScore": {
+                    "default": 85,
+                    "description": "The percentage, as a whole number, of a repost title/comment that must match the title/comment being checked in order to consider both a match\n\nNote: Setting to 0 will make every candidate considered a match -- useful if you want to match if the URL has been reposted anywhere\n\nDefaults to `85` (85%)",
+                    "type": "number"
+                },
                "minWordCount": {
-                    "default": 1,
-                    "description": "For activities that are text-based this is the minimum number of words required for the activity to be considered for a repeat\n\nEX if `minimumWordCount=5` and a comment is `what about you` then it is ignored because `3 is less than 5`\n\n**For self-text submissions** -- title + body text\n\n**For comments* -- body text",
+                    "default": 2,
+                    "description": "The minimum number of words in the activity being checked for which this rule will run on\n\nIf the word count is below the minimum the rule fails\n\nDefaults to 2",
                    "type": "number"
                },
                "name": {
@@ -1542,6 +1552,13 @@
                    "description": "The number of repeat submissions that will trigger the rule",
                    "type": "string"
                },
+                "transformations": {
+                    "description": "A set of search-and-replace operations to perform on text values before performing a match. Transformations are performed in the order they are defined.",
+                    "items": {
+                        "$ref": "#/definitions/SearchAndReplaceRegExp"
+                    },
+                    "type": "array"
+                },
                "useSubmissionAsReference": {
                    "default": true,
                    "description": "If activity is a Submission and is a link (not self-post) then only look at Submissions that contain this link, otherwise consider all activities.",
--- a/src/util.ts
+++ b/src/util.ts
@@ -136,7 +136,7 @@ const errorAwareFormat = {
                for(const k in err) {
                    if(dummyErr.hasOwnProperty(k) || k === 'cause') {
                        // @ts-ignore
-                        dummyErr[k] = tinfo[k];
+                        dummyErr[k] = info[k];
                    }
                }
                // @ts-ignore
Author	SHA1	Message	Date
FoxxMD	fc51928054	Merge branch 'edge'	2022-02-02 16:59:56 -05:00
FoxxMD	c07276a3be	fix(logging): Fix typo in error transform	2022-02-01 13:13:27 -05:00
FoxxMD	4a2297f5cd	docs: Add github sponsor link	2022-02-01 12:01:34 -05:00
FoxxMD	f8967d55c4	feat(repeat): Use newer text comparison technique to improve repeat detection * Use same technique as repost rule which has high accuracy and let false-positives * Implement ability to see similarity score, case sensitivity, and text transformations	2022-01-31 14:08:21 -05:00