Compare commits

..

1 Commits

Author SHA1 Message Date
FoxxMD
eee2a8e856 feat: Support multiple expressions for regex property 2022-11-22 16:03:50 -05:00
6 changed files with 155 additions and 96 deletions

27
package-lock.json generated
View File

@@ -46,7 +46,6 @@
"express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2",
"got": "^11.8.2",
"he": "^1.2.0",
@@ -3980,14 +3979,6 @@
"micromatch": "^4.0.2"
}
},
"node_modules/fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"dependencies": {
"mitt": "^1.2.0"
}
},
"node_modules/flat": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -6122,11 +6113,6 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
},
"node_modules/mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"node_modules/mkdirp": {
"version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
@@ -13602,14 +13588,6 @@
"micromatch": "^4.0.2"
}
},
"fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"requires": {
"mitt": "^1.2.0"
}
},
"flat": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -15213,11 +15191,6 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
},
"mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"mkdirp": {
"version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",

View File

@@ -68,7 +68,6 @@
"express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2",
"got": "^11.8.2",
"he": "^1.2.0",

View File

@@ -572,7 +572,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) {
this.logger.info('Restarting SHARED COMMENT STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processedBuffer;
processed = stream.processed;
}
if (sharedCommentsSubreddits.length > 100) {
this.logger.warn(`SHARED COMMENT STREAM => Reddit can only combine 100 subreddits for getting new Comments but this bot has ${sharedCommentsSubreddits.length}`);
@@ -605,7 +605,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) {
this.logger.info('Restarting SHARED SUBMISSION STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processedBuffer;
processed = stream.processed;
}
if (sharedSubmissionsSubreddits.length > 100) {
this.logger.warn(`SHARED SUBMISSION STREAM => Reddit can only combine 100 subreddits for getting new Submissions but this bot has ${sharedSubmissionsSubreddits.length}`);

View File

@@ -748,6 +748,10 @@ export interface RegExResult {
named: NamedGroup
}
export interface RegExResultWithTest extends RegExResult {
test: RegExp
}
export type StrongCache = {
authorTTL: number | boolean,
userNotesTTL: number | boolean,

View File

@@ -7,6 +7,7 @@ import {
PASS, triggeredIndicator, windowConfigToWindowCriteria
} from "../util";
import {
RegExResultWithTest,
RuleResult,
} from "../Common/interfaces";
import dayjs from 'dayjs';
@@ -14,10 +15,11 @@ import {SimpleError} from "../Utils/Errors";
import {JoinOperands} from "../Common/Infrastructure/Atomic";
import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow";
import {
comparisonTextOp,
comparisonTextOp, GenericComparison,
parseGenericValueComparison,
parseGenericValueOrPercentComparison
} from "../Common/Infrastructure/Comparisons";
import {SnoowrapActivity} from "../Common/Infrastructure/Reddit";
export interface RegexCriteria {
/**
@@ -27,13 +29,23 @@ export interface RegexCriteria {
* */
name?: string
/**
* A valid Regular Expression to test content against
* A valid Regular Expression, or list of expressions, to test content against
*
* If no flags are specified then the **global** flag is used by default
*
* @examples ["/reddit|FoxxMD/ig"]
* */
regex: string,
regex: string | string[],
/**
* Determines if ALL regexes listed are run or if regexes are only run until one is matched.
*
* * `true` => all regexes are always run
* * `false` => regexes are run until one matches
*
* @default false
* */
exhaustive?: boolean
/**
* Which content from an Activity to test the regex against
@@ -157,6 +169,7 @@ export class RegexRule extends Rule {
const {
name = (index + 1),
regex,
exhaustive = false,
testOn: testOnVals = ['title', 'body'],
lookAt = 'all',
matchThreshold = '> 0',
@@ -174,13 +187,7 @@ export class RegexRule extends Rule {
return acc.concat(curr);
}, []);
// check regex
const regexContent = await this.resources.getContent(regex);
const reg = parseStringToRegex(regexContent, 'g');
if(reg === undefined) {
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${regex}`);
}
// ok cool its a valid regex
const regexTests: RegExp[] = await this.convertToRegexArray(name, regex);
const matchComparison = parseGenericValueComparison(matchThreshold);
const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold);
@@ -198,12 +205,13 @@ export class RegexRule extends Rule {
// first lets see if the activity we are checking satisfies thresholds
// since we may be able to avoid api calls to get history
let actMatches = this.getMatchesFromActivity(item, testOn, reg);
matches = matches.concat(actMatches).slice(0, 100);
matchCount += actMatches.length;
let actMatches = getMatchesFromActivity(item, testOn, regexTests, exhaustive);
const actMatchSummary = regexResultsSummary(actMatches);
matches = matches.concat(actMatchSummary.matches).slice(0, 100);
matchCount += actMatchSummary.matches.length;
activitiesTested++;
const singleMatched = comparisonTextOp(actMatches.length, matchComparison.operator, matchComparison.value);
const singleMatched = comparisonTextOp(actMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (singleMatched) {
activitiesMatchedCount++;
}
@@ -233,7 +241,7 @@ export class RegexRule extends Rule {
}
history = await this.resources.getAuthorActivities(item.author, strongWindow);
// remove current activity it exists in history so we don't count it twice
// remove current activity if it exists in history so we don't count it twice
history = history.filter(x => x.id !== item.id);
const historyLength = history.length;
@@ -252,10 +260,12 @@ export class RegexRule extends Rule {
for (const h of history) {
activitiesTested++;
const aMatches = this.getMatchesFromActivity(h, testOn, reg);
matches = matches.concat(aMatches).slice(0, 100);
matchCount += aMatches.length;
const matched = comparisonTextOp(aMatches.length, matchComparison.operator, matchComparison.value);
const aMatches = getMatchesFromActivity(h, testOn, regexTests, exhaustive);
actMatches = actMatches.concat(aMatches);
const actHistoryMatchSummary = regexResultsSummary(aMatches);
matches = matches.concat(actHistoryMatchSummary.matches).slice(0, 100);
matchCount += actHistoryMatchSummary.matches.length;
const matched = comparisonTextOp(actHistoryMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (matched) {
activitiesMatchedCount++;
}
@@ -282,10 +292,19 @@ export class RegexRule extends Rule {
humanWindow = '1 Item';
}
// to provide at least one useful regex for this criteria
// use the first regex found by default
let relevantRegex: string = regexTests[0].toString();
// but if more than one regex was listed AND we did have matches
// then use the first regex that actually got a match
if(regexTests.length > 0 && actMatches.length > 0) {
relevantRegex = actMatches[0].test.toString();
}
const critResults = {
criteria: {
name,
regex: regex !== regexContent ? `${regex} from ${regexContent}` : regex,
regex: relevantRegex,
testOn,
matchThreshold,
activityMatchThreshold,
@@ -352,44 +371,117 @@ export class RegexRule extends Rule {
return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]);
}
protected getMatchesFromActivity(a: (Submission | Comment), testOn: string[], reg: RegExp): string[] {
let m: string[] = [];
// determine what content we are testing
let contents: string[] = [];
if (asSubmission(a)) {
for (const l of testOn) {
switch (l) {
case 'title':
contents.push(a.title);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
protected async convertToRegexArray(name: string | number, value: string | string[]): Promise<RegExp[]> {
const regexTests: RegExp[] = [];
const regexStringVals = typeof value === 'string' ? [value] : value;
for(const r of regexStringVals) {
// check regex
const regexContent = await this.resources.getContent(r);
const reg = parseStringToRegex(regexContent, 'ig');
if (reg === undefined) {
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${value}`);
}
} else {
contents.push(a.body)
// ok cool its a valid regex
regexTests.push(reg);
}
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push(r.match);
}
}
}
return m;
return regexTests;
}
}
export const getMatchResultsFromContent = (contents: string[], reg: RegExp): RegExResultWithTest[] => {
let m: RegExResultWithTest[] = [];
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push({...r, test: reg});
}
}
}
return m;
}
export const regexResultsSummary = (results: RegExResultWithTest[]) => {
const matchResults: ActivityMatchResults = {
matches: [],
matchesByTest: {},
groups: {}
}
for (const r of results) {
if (matchResults.matchesByTest[r.test.toString()] === undefined) {
matchResults.matchesByTest[r.test.toString()] = [];
}
matchResults.matchesByTest[r.test.toString()].push(r.match);
matchResults.matches.push(r.match);
if (r.named !== undefined) {
Object.entries(r.named).forEach(([key, val]) => {
if (matchResults.groups[key] === undefined) {
matchResults.groups[key] = [];
}
matchResults.groups[key].push(val);
});
}
}
return matchResults;
}
export const getMatchesFromActivity = (a: (Submission | Comment), testOn: string[], regexes: RegExp[], exhaustive: boolean): RegExResultWithTest[] => {
// determine what content we are testing
let contents: string[] = getMatchableContent(a, testOn);
let results: RegExResultWithTest[] = [];
for (const reg of regexes) {
const res = getMatchResultsFromContent(contents, reg);
if(res.length > 0) {
results = results.concat(res);
// only continue testing if the user wants to exhaustively check all regexes (to get more matches?)
if(!exhaustive) {
return results;
}
}
}
return results;
}
const getMatchableContent = (a: SnoowrapActivity, testOn: string[]) => {
let contents: string[] = [];
if (asSubmission(a)) {
for (const l of testOn) {
switch (l) {
case 'title':
contents.push(a.title);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
}
} else {
contents.push(a.body)
}
return contents;
}
interface RegexMatchComparisonOptions {
matchComparison: GenericComparison
activityMatchComparison?: GenericComparison
totalMatchComparison?: GenericComparison
}
interface ActivityMatchResults {
matches: string[]
matchesByTest: Record<string, string[]>
groups: Record<string, string[]>
}
interface RegexConfig {
/**
* A list of Regular Expressions and conditions under which tested Activity(ies) are matched

View File

@@ -7,7 +7,6 @@ import {mergeArr, parseDuration, random} from "../util";
import { Logger } from "winston";
import {ErrorWithCause} from "pony-cause";
import dayjs, {Dayjs as DayjsObj} from "dayjs";
import { FixedSizeList } from 'fixed-size-list'
type Awaitable<T> = Promise<T> | T;
@@ -15,12 +14,10 @@ interface RCBPollingOptions<T> extends SnooStormOptions {
subreddit: string,
enforceContinuity?: boolean
logger: Logger
sort?: string
name?: string,
processed?: FixedSizeList<T[keyof T]>
processed?: Set<T[keyof T]>
label?: string
dateCutoff?: boolean
maxHistory?: number
}
interface RCBPollConfiguration<T> extends PollConfiguration<T>,RCBPollingOptions<T> {
@@ -43,9 +40,6 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
name: string = 'Reddit Stream';
logger: Logger;
subreddit: string;
// using a fixed sized "regular" array means slightly more memory usage vs. a Set when holding N items
// BUT now we can limit N items to something reasonable instead of having a crazy big Set with all items seen since stream was started
processedBuffer: FixedSizeList<T[keyof T]>;
constructor(options: RCBPollConfiguration<T>) {
super(options);
@@ -60,7 +54,6 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
label = 'Polling',
processed,
dateCutoff,
maxHistory = 300,
} = options;
this.subreddit = subreddit;
this.name = name !== undefined ? name : this.name;
@@ -74,10 +67,8 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
// if we pass in processed on init the intention is to "continue" from where the previous stream left off
// WITHOUT new start behavior
if (processed !== undefined) {
this.processedBuffer = processed;
this.processed = processed;
this.newStart = false;
} else {
this.processedBuffer = new FixedSizeList<T[keyof T]>(maxHistory);
}
clearInterval(this.interval);
@@ -106,14 +97,14 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
}
for (const item of batch) {
const id = item[self.identifier];
if (self.processedBuffer.data.some(x => x === id)) {
if (self.processed.has(id)) {
anyAlreadySeen = true;
continue;
}
// add new item to list and set as processed
newItems.push(item);
self.processedBuffer.add(id);
self.processed.add(id);
}
page++;
}