Compare commits

..

1 Commits

Author SHA1 Message Date
FoxxMD
eee2a8e856 feat: Support multiple expressions for regex property 2022-11-22 16:03:50 -05:00
6 changed files with 155 additions and 96 deletions

27
package-lock.json generated
View File

@@ -46,7 +46,6 @@
"express-session-cache-manager": "^1.0.2", "express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5", "express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3", "fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2", "globrex": "^0.1.2",
"got": "^11.8.2", "got": "^11.8.2",
"he": "^1.2.0", "he": "^1.2.0",
@@ -3980,14 +3979,6 @@
"micromatch": "^4.0.2" "micromatch": "^4.0.2"
} }
}, },
"node_modules/fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"dependencies": {
"mitt": "^1.2.0"
}
},
"node_modules/flat": { "node_modules/flat": {
"version": "5.0.2", "version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -6122,11 +6113,6 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
}, },
"node_modules/mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"node_modules/mkdirp": { "node_modules/mkdirp": {
"version": "0.5.6", "version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
@@ -13602,14 +13588,6 @@
"micromatch": "^4.0.2" "micromatch": "^4.0.2"
} }
}, },
"fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"requires": {
"mitt": "^1.2.0"
}
},
"flat": { "flat": {
"version": "5.0.2", "version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -15213,11 +15191,6 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
}, },
"mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"mkdirp": { "mkdirp": {
"version": "0.5.6", "version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",

View File

@@ -68,7 +68,6 @@
"express-session-cache-manager": "^1.0.2", "express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5", "express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3", "fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2", "globrex": "^0.1.2",
"got": "^11.8.2", "got": "^11.8.2",
"he": "^1.2.0", "he": "^1.2.0",

View File

@@ -572,7 +572,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) { if (stream !== undefined) {
this.logger.info('Restarting SHARED COMMENT STREAM due to a subreddit config change'); this.logger.info('Restarting SHARED COMMENT STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits'); stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processedBuffer; processed = stream.processed;
} }
if (sharedCommentsSubreddits.length > 100) { if (sharedCommentsSubreddits.length > 100) {
this.logger.warn(`SHARED COMMENT STREAM => Reddit can only combine 100 subreddits for getting new Comments but this bot has ${sharedCommentsSubreddits.length}`); this.logger.warn(`SHARED COMMENT STREAM => Reddit can only combine 100 subreddits for getting new Comments but this bot has ${sharedCommentsSubreddits.length}`);
@@ -605,7 +605,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) { if (stream !== undefined) {
this.logger.info('Restarting SHARED SUBMISSION STREAM due to a subreddit config change'); this.logger.info('Restarting SHARED SUBMISSION STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits'); stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processedBuffer; processed = stream.processed;
} }
if (sharedSubmissionsSubreddits.length > 100) { if (sharedSubmissionsSubreddits.length > 100) {
this.logger.warn(`SHARED SUBMISSION STREAM => Reddit can only combine 100 subreddits for getting new Submissions but this bot has ${sharedSubmissionsSubreddits.length}`); this.logger.warn(`SHARED SUBMISSION STREAM => Reddit can only combine 100 subreddits for getting new Submissions but this bot has ${sharedSubmissionsSubreddits.length}`);

View File

@@ -748,6 +748,10 @@ export interface RegExResult {
named: NamedGroup named: NamedGroup
} }
export interface RegExResultWithTest extends RegExResult {
test: RegExp
}
export type StrongCache = { export type StrongCache = {
authorTTL: number | boolean, authorTTL: number | boolean,
userNotesTTL: number | boolean, userNotesTTL: number | boolean,

View File

@@ -7,6 +7,7 @@ import {
PASS, triggeredIndicator, windowConfigToWindowCriteria PASS, triggeredIndicator, windowConfigToWindowCriteria
} from "../util"; } from "../util";
import { import {
RegExResultWithTest,
RuleResult, RuleResult,
} from "../Common/interfaces"; } from "../Common/interfaces";
import dayjs from 'dayjs'; import dayjs from 'dayjs';
@@ -14,10 +15,11 @@ import {SimpleError} from "../Utils/Errors";
import {JoinOperands} from "../Common/Infrastructure/Atomic"; import {JoinOperands} from "../Common/Infrastructure/Atomic";
import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow"; import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow";
import { import {
comparisonTextOp, comparisonTextOp, GenericComparison,
parseGenericValueComparison, parseGenericValueComparison,
parseGenericValueOrPercentComparison parseGenericValueOrPercentComparison
} from "../Common/Infrastructure/Comparisons"; } from "../Common/Infrastructure/Comparisons";
import {SnoowrapActivity} from "../Common/Infrastructure/Reddit";
export interface RegexCriteria { export interface RegexCriteria {
/** /**
@@ -27,13 +29,23 @@ export interface RegexCriteria {
* */ * */
name?: string name?: string
/** /**
* A valid Regular Expression to test content against * A valid Regular Expression, or list of expressions, to test content against
* *
* If no flags are specified then the **global** flag is used by default * If no flags are specified then the **global** flag is used by default
* *
* @examples ["/reddit|FoxxMD/ig"] * @examples ["/reddit|FoxxMD/ig"]
* */ * */
regex: string, regex: string | string[],
/**
* Determines if ALL regexes listed are run or if regexes are only run until one is matched.
*
* * `true` => all regexes are always run
* * `false` => regexes are run until one matches
*
* @default false
* */
exhaustive?: boolean
/** /**
* Which content from an Activity to test the regex against * Which content from an Activity to test the regex against
@@ -157,6 +169,7 @@ export class RegexRule extends Rule {
const { const {
name = (index + 1), name = (index + 1),
regex, regex,
exhaustive = false,
testOn: testOnVals = ['title', 'body'], testOn: testOnVals = ['title', 'body'],
lookAt = 'all', lookAt = 'all',
matchThreshold = '> 0', matchThreshold = '> 0',
@@ -174,13 +187,7 @@ export class RegexRule extends Rule {
return acc.concat(curr); return acc.concat(curr);
}, []); }, []);
// check regex const regexTests: RegExp[] = await this.convertToRegexArray(name, regex);
const regexContent = await this.resources.getContent(regex);
const reg = parseStringToRegex(regexContent, 'g');
if(reg === undefined) {
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${regex}`);
}
// ok cool its a valid regex
const matchComparison = parseGenericValueComparison(matchThreshold); const matchComparison = parseGenericValueComparison(matchThreshold);
const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold); const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold);
@@ -198,12 +205,13 @@ export class RegexRule extends Rule {
// first lets see if the activity we are checking satisfies thresholds // first lets see if the activity we are checking satisfies thresholds
// since we may be able to avoid api calls to get history // since we may be able to avoid api calls to get history
let actMatches = this.getMatchesFromActivity(item, testOn, reg); let actMatches = getMatchesFromActivity(item, testOn, regexTests, exhaustive);
matches = matches.concat(actMatches).slice(0, 100); const actMatchSummary = regexResultsSummary(actMatches);
matchCount += actMatches.length; matches = matches.concat(actMatchSummary.matches).slice(0, 100);
matchCount += actMatchSummary.matches.length;
activitiesTested++; activitiesTested++;
const singleMatched = comparisonTextOp(actMatches.length, matchComparison.operator, matchComparison.value); const singleMatched = comparisonTextOp(actMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (singleMatched) { if (singleMatched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -233,7 +241,7 @@ export class RegexRule extends Rule {
} }
history = await this.resources.getAuthorActivities(item.author, strongWindow); history = await this.resources.getAuthorActivities(item.author, strongWindow);
// remove current activity it exists in history so we don't count it twice // remove current activity if it exists in history so we don't count it twice
history = history.filter(x => x.id !== item.id); history = history.filter(x => x.id !== item.id);
const historyLength = history.length; const historyLength = history.length;
@@ -252,10 +260,12 @@ export class RegexRule extends Rule {
for (const h of history) { for (const h of history) {
activitiesTested++; activitiesTested++;
const aMatches = this.getMatchesFromActivity(h, testOn, reg); const aMatches = getMatchesFromActivity(h, testOn, regexTests, exhaustive);
matches = matches.concat(aMatches).slice(0, 100); actMatches = actMatches.concat(aMatches);
matchCount += aMatches.length; const actHistoryMatchSummary = regexResultsSummary(aMatches);
const matched = comparisonTextOp(aMatches.length, matchComparison.operator, matchComparison.value); matches = matches.concat(actHistoryMatchSummary.matches).slice(0, 100);
matchCount += actHistoryMatchSummary.matches.length;
const matched = comparisonTextOp(actHistoryMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (matched) { if (matched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -282,10 +292,19 @@ export class RegexRule extends Rule {
humanWindow = '1 Item'; humanWindow = '1 Item';
} }
// to provide at least one useful regex for this criteria
// use the first regex found by default
let relevantRegex: string = regexTests[0].toString();
// but if more than one regex was listed AND we did have matches
// then use the first regex that actually got a match
if(regexTests.length > 0 && actMatches.length > 0) {
relevantRegex = actMatches[0].test.toString();
}
const critResults = { const critResults = {
criteria: { criteria: {
name, name,
regex: regex !== regexContent ? `${regex} from ${regexContent}` : regex, regex: relevantRegex,
testOn, testOn,
matchThreshold, matchThreshold,
activityMatchThreshold, activityMatchThreshold,
@@ -352,44 +371,117 @@ export class RegexRule extends Rule {
return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]); return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]);
} }
protected getMatchesFromActivity(a: (Submission | Comment), testOn: string[], reg: RegExp): string[] { protected async convertToRegexArray(name: string | number, value: string | string[]): Promise<RegExp[]> {
let m: string[] = []; const regexTests: RegExp[] = [];
// determine what content we are testing const regexStringVals = typeof value === 'string' ? [value] : value;
let contents: string[] = []; for(const r of regexStringVals) {
if (asSubmission(a)) { // check regex
for (const l of testOn) { const regexContent = await this.resources.getContent(r);
switch (l) { const reg = parseStringToRegex(regexContent, 'ig');
case 'title': if (reg === undefined) {
contents.push(a.title); throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${value}`);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
} }
} else { // ok cool its a valid regex
contents.push(a.body) regexTests.push(reg);
} }
return regexTests;
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push(r.match);
}
}
}
return m;
} }
} }
export const getMatchResultsFromContent = (contents: string[], reg: RegExp): RegExResultWithTest[] => {
let m: RegExResultWithTest[] = [];
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push({...r, test: reg});
}
}
}
return m;
}
export const regexResultsSummary = (results: RegExResultWithTest[]) => {
const matchResults: ActivityMatchResults = {
matches: [],
matchesByTest: {},
groups: {}
}
for (const r of results) {
if (matchResults.matchesByTest[r.test.toString()] === undefined) {
matchResults.matchesByTest[r.test.toString()] = [];
}
matchResults.matchesByTest[r.test.toString()].push(r.match);
matchResults.matches.push(r.match);
if (r.named !== undefined) {
Object.entries(r.named).forEach(([key, val]) => {
if (matchResults.groups[key] === undefined) {
matchResults.groups[key] = [];
}
matchResults.groups[key].push(val);
});
}
}
return matchResults;
}
export const getMatchesFromActivity = (a: (Submission | Comment), testOn: string[], regexes: RegExp[], exhaustive: boolean): RegExResultWithTest[] => {
// determine what content we are testing
let contents: string[] = getMatchableContent(a, testOn);
let results: RegExResultWithTest[] = [];
for (const reg of regexes) {
const res = getMatchResultsFromContent(contents, reg);
if(res.length > 0) {
results = results.concat(res);
// only continue testing if the user wants to exhaustively check all regexes (to get more matches?)
if(!exhaustive) {
return results;
}
}
}
return results;
}
const getMatchableContent = (a: SnoowrapActivity, testOn: string[]) => {
let contents: string[] = [];
if (asSubmission(a)) {
for (const l of testOn) {
switch (l) {
case 'title':
contents.push(a.title);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
}
} else {
contents.push(a.body)
}
return contents;
}
interface RegexMatchComparisonOptions {
matchComparison: GenericComparison
activityMatchComparison?: GenericComparison
totalMatchComparison?: GenericComparison
}
interface ActivityMatchResults {
matches: string[]
matchesByTest: Record<string, string[]>
groups: Record<string, string[]>
}
interface RegexConfig { interface RegexConfig {
/** /**
* A list of Regular Expressions and conditions under which tested Activity(ies) are matched * A list of Regular Expressions and conditions under which tested Activity(ies) are matched

View File

@@ -7,7 +7,6 @@ import {mergeArr, parseDuration, random} from "../util";
import { Logger } from "winston"; import { Logger } from "winston";
import {ErrorWithCause} from "pony-cause"; import {ErrorWithCause} from "pony-cause";
import dayjs, {Dayjs as DayjsObj} from "dayjs"; import dayjs, {Dayjs as DayjsObj} from "dayjs";
import { FixedSizeList } from 'fixed-size-list'
type Awaitable<T> = Promise<T> | T; type Awaitable<T> = Promise<T> | T;
@@ -15,12 +14,10 @@ interface RCBPollingOptions<T> extends SnooStormOptions {
subreddit: string, subreddit: string,
enforceContinuity?: boolean enforceContinuity?: boolean
logger: Logger logger: Logger
sort?: string
name?: string, name?: string,
processed?: FixedSizeList<T[keyof T]> processed?: Set<T[keyof T]>
label?: string label?: string
dateCutoff?: boolean dateCutoff?: boolean
maxHistory?: number
} }
interface RCBPollConfiguration<T> extends PollConfiguration<T>,RCBPollingOptions<T> { interface RCBPollConfiguration<T> extends PollConfiguration<T>,RCBPollingOptions<T> {
@@ -43,9 +40,6 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
name: string = 'Reddit Stream'; name: string = 'Reddit Stream';
logger: Logger; logger: Logger;
subreddit: string; subreddit: string;
// using a fixed sized "regular" array means slightly more memory usage vs. a Set when holding N items
// BUT now we can limit N items to something reasonable instead of having a crazy big Set with all items seen since stream was started
processedBuffer: FixedSizeList<T[keyof T]>;
constructor(options: RCBPollConfiguration<T>) { constructor(options: RCBPollConfiguration<T>) {
super(options); super(options);
@@ -60,7 +54,6 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
label = 'Polling', label = 'Polling',
processed, processed,
dateCutoff, dateCutoff,
maxHistory = 300,
} = options; } = options;
this.subreddit = subreddit; this.subreddit = subreddit;
this.name = name !== undefined ? name : this.name; this.name = name !== undefined ? name : this.name;
@@ -74,10 +67,8 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
// if we pass in processed on init the intention is to "continue" from where the previous stream left off // if we pass in processed on init the intention is to "continue" from where the previous stream left off
// WITHOUT new start behavior // WITHOUT new start behavior
if (processed !== undefined) { if (processed !== undefined) {
this.processedBuffer = processed; this.processed = processed;
this.newStart = false; this.newStart = false;
} else {
this.processedBuffer = new FixedSizeList<T[keyof T]>(maxHistory);
} }
clearInterval(this.interval); clearInterval(this.interval);
@@ -106,14 +97,14 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
} }
for (const item of batch) { for (const item of batch) {
const id = item[self.identifier]; const id = item[self.identifier];
if (self.processedBuffer.data.some(x => x === id)) { if (self.processed.has(id)) {
anyAlreadySeen = true; anyAlreadySeen = true;
continue; continue;
} }
// add new item to list and set as processed // add new item to list and set as processed
newItems.push(item); newItems.push(item);
self.processedBuffer.add(id); self.processed.add(id);
} }
page++; page++;
} }