Compare commits

..

1 Commits

Author SHA1 Message Date
FoxxMD
14a317ace4 Use fixed array instead of Set to maybe reduce memory usage 2022-11-21 15:56:56 -05:00
6 changed files with 93 additions and 152 deletions

27
package-lock.json generated
View File

@@ -46,6 +46,7 @@
"express-session-cache-manager": "^1.0.2", "express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5", "express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3", "fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2", "globrex": "^0.1.2",
"got": "^11.8.2", "got": "^11.8.2",
"he": "^1.2.0", "he": "^1.2.0",
@@ -3979,6 +3980,14 @@
"micromatch": "^4.0.2" "micromatch": "^4.0.2"
} }
}, },
"node_modules/fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"dependencies": {
"mitt": "^1.2.0"
}
},
"node_modules/flat": { "node_modules/flat": {
"version": "5.0.2", "version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -6113,6 +6122,11 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
}, },
"node_modules/mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"node_modules/mkdirp": { "node_modules/mkdirp": {
"version": "0.5.6", "version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
@@ -13588,6 +13602,14 @@
"micromatch": "^4.0.2" "micromatch": "^4.0.2"
} }
}, },
"fixed-size-list": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/fixed-size-list/-/fixed-size-list-0.3.0.tgz",
"integrity": "sha512-c6I8wEE4ZtjKz35BaodH7yWuWmcaUVQwgBeNcI3LxJu79YH+ezHvf1oS9VkgJmyVy5eQ8Wh6jNVcj2rB4rgVgA==",
"requires": {
"mitt": "^1.2.0"
}
},
"flat": { "flat": {
"version": "5.0.2", "version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
@@ -15191,6 +15213,11 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q=="
}, },
"mitt": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-1.2.0.tgz",
"integrity": "sha512-r6lj77KlwqLhIUku9UWYes7KJtsczvolZkzp8hbaDPPaE24OmWl5s539Mytlj22siEQKosZ26qCBgda2PKwoJw=="
},
"mkdirp": { "mkdirp": {
"version": "0.5.6", "version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",

View File

@@ -68,6 +68,7 @@
"express-session-cache-manager": "^1.0.2", "express-session-cache-manager": "^1.0.2",
"express-socket.io-session": "^1.3.5", "express-socket.io-session": "^1.3.5",
"fast-deep-equal": "^3.1.3", "fast-deep-equal": "^3.1.3",
"fixed-size-list": "^0.3.0",
"globrex": "^0.1.2", "globrex": "^0.1.2",
"got": "^11.8.2", "got": "^11.8.2",
"he": "^1.2.0", "he": "^1.2.0",

View File

@@ -572,7 +572,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) { if (stream !== undefined) {
this.logger.info('Restarting SHARED COMMENT STREAM due to a subreddit config change'); this.logger.info('Restarting SHARED COMMENT STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits'); stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processed; processed = stream.processedBuffer;
} }
if (sharedCommentsSubreddits.length > 100) { if (sharedCommentsSubreddits.length > 100) {
this.logger.warn(`SHARED COMMENT STREAM => Reddit can only combine 100 subreddits for getting new Comments but this bot has ${sharedCommentsSubreddits.length}`); this.logger.warn(`SHARED COMMENT STREAM => Reddit can only combine 100 subreddits for getting new Comments but this bot has ${sharedCommentsSubreddits.length}`);
@@ -605,7 +605,7 @@ class Bot implements BotInstanceFunctions {
if (stream !== undefined) { if (stream !== undefined) {
this.logger.info('Restarting SHARED SUBMISSION STREAM due to a subreddit config change'); this.logger.info('Restarting SHARED SUBMISSION STREAM due to a subreddit config change');
stream.end('Replacing with a new stream with updated subreddits'); stream.end('Replacing with a new stream with updated subreddits');
processed = stream.processed; processed = stream.processedBuffer;
} }
if (sharedSubmissionsSubreddits.length > 100) { if (sharedSubmissionsSubreddits.length > 100) {
this.logger.warn(`SHARED SUBMISSION STREAM => Reddit can only combine 100 subreddits for getting new Submissions but this bot has ${sharedSubmissionsSubreddits.length}`); this.logger.warn(`SHARED SUBMISSION STREAM => Reddit can only combine 100 subreddits for getting new Submissions but this bot has ${sharedSubmissionsSubreddits.length}`);

View File

@@ -748,10 +748,6 @@ export interface RegExResult {
named: NamedGroup named: NamedGroup
} }
export interface RegExResultWithTest extends RegExResult {
test: RegExp
}
export type StrongCache = { export type StrongCache = {
authorTTL: number | boolean, authorTTL: number | boolean,
userNotesTTL: number | boolean, userNotesTTL: number | boolean,

View File

@@ -7,7 +7,6 @@ import {
PASS, triggeredIndicator, windowConfigToWindowCriteria PASS, triggeredIndicator, windowConfigToWindowCriteria
} from "../util"; } from "../util";
import { import {
RegExResultWithTest,
RuleResult, RuleResult,
} from "../Common/interfaces"; } from "../Common/interfaces";
import dayjs from 'dayjs'; import dayjs from 'dayjs';
@@ -15,11 +14,10 @@ import {SimpleError} from "../Utils/Errors";
import {JoinOperands} from "../Common/Infrastructure/Atomic"; import {JoinOperands} from "../Common/Infrastructure/Atomic";
import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow"; import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow";
import { import {
comparisonTextOp, GenericComparison, comparisonTextOp,
parseGenericValueComparison, parseGenericValueComparison,
parseGenericValueOrPercentComparison parseGenericValueOrPercentComparison
} from "../Common/Infrastructure/Comparisons"; } from "../Common/Infrastructure/Comparisons";
import {SnoowrapActivity} from "../Common/Infrastructure/Reddit";
export interface RegexCriteria { export interface RegexCriteria {
/** /**
@@ -29,23 +27,13 @@ export interface RegexCriteria {
* */ * */
name?: string name?: string
/** /**
* A valid Regular Expression, or list of expressions, to test content against * A valid Regular Expression to test content against
* *
* If no flags are specified then the **global** flag is used by default * If no flags are specified then the **global** flag is used by default
* *
* @examples ["/reddit|FoxxMD/ig"] * @examples ["/reddit|FoxxMD/ig"]
* */ * */
regex: string | string[], regex: string,
/**
* Determines if ALL regexes listed are run or if regexes are only run until one is matched.
*
* * `true` => all regexes are always run
* * `false` => regexes are run until one matches
*
* @default false
* */
exhaustive?: boolean
/** /**
* Which content from an Activity to test the regex against * Which content from an Activity to test the regex against
@@ -169,7 +157,6 @@ export class RegexRule extends Rule {
const { const {
name = (index + 1), name = (index + 1),
regex, regex,
exhaustive = false,
testOn: testOnVals = ['title', 'body'], testOn: testOnVals = ['title', 'body'],
lookAt = 'all', lookAt = 'all',
matchThreshold = '> 0', matchThreshold = '> 0',
@@ -187,7 +174,13 @@ export class RegexRule extends Rule {
return acc.concat(curr); return acc.concat(curr);
}, []); }, []);
const regexTests: RegExp[] = await this.convertToRegexArray(name, regex); // check regex
const regexContent = await this.resources.getContent(regex);
const reg = parseStringToRegex(regexContent, 'g');
if(reg === undefined) {
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${regex}`);
}
// ok cool its a valid regex
const matchComparison = parseGenericValueComparison(matchThreshold); const matchComparison = parseGenericValueComparison(matchThreshold);
const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold); const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold);
@@ -205,13 +198,12 @@ export class RegexRule extends Rule {
// first lets see if the activity we are checking satisfies thresholds // first lets see if the activity we are checking satisfies thresholds
// since we may be able to avoid api calls to get history // since we may be able to avoid api calls to get history
let actMatches = getMatchesFromActivity(item, testOn, regexTests, exhaustive); let actMatches = this.getMatchesFromActivity(item, testOn, reg);
const actMatchSummary = regexResultsSummary(actMatches); matches = matches.concat(actMatches).slice(0, 100);
matches = matches.concat(actMatchSummary.matches).slice(0, 100); matchCount += actMatches.length;
matchCount += actMatchSummary.matches.length;
activitiesTested++; activitiesTested++;
const singleMatched = comparisonTextOp(actMatchSummary.matches.length, matchComparison.operator, matchComparison.value); const singleMatched = comparisonTextOp(actMatches.length, matchComparison.operator, matchComparison.value);
if (singleMatched) { if (singleMatched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -241,7 +233,7 @@ export class RegexRule extends Rule {
} }
history = await this.resources.getAuthorActivities(item.author, strongWindow); history = await this.resources.getAuthorActivities(item.author, strongWindow);
// remove current activity if it exists in history so we don't count it twice // remove current activity it exists in history so we don't count it twice
history = history.filter(x => x.id !== item.id); history = history.filter(x => x.id !== item.id);
const historyLength = history.length; const historyLength = history.length;
@@ -260,12 +252,10 @@ export class RegexRule extends Rule {
for (const h of history) { for (const h of history) {
activitiesTested++; activitiesTested++;
const aMatches = getMatchesFromActivity(h, testOn, regexTests, exhaustive); const aMatches = this.getMatchesFromActivity(h, testOn, reg);
actMatches = actMatches.concat(aMatches); matches = matches.concat(aMatches).slice(0, 100);
const actHistoryMatchSummary = regexResultsSummary(aMatches); matchCount += aMatches.length;
matches = matches.concat(actHistoryMatchSummary.matches).slice(0, 100); const matched = comparisonTextOp(aMatches.length, matchComparison.operator, matchComparison.value);
matchCount += actHistoryMatchSummary.matches.length;
const matched = comparisonTextOp(actHistoryMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (matched) { if (matched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -292,19 +282,10 @@ export class RegexRule extends Rule {
humanWindow = '1 Item'; humanWindow = '1 Item';
} }
// to provide at least one useful regex for this criteria
// use the first regex found by default
let relevantRegex: string = regexTests[0].toString();
// but if more than one regex was listed AND we did have matches
// then use the first regex that actually got a match
if(regexTests.length > 0 && actMatches.length > 0) {
relevantRegex = actMatches[0].test.toString();
}
const critResults = { const critResults = {
criteria: { criteria: {
name, name,
regex: relevantRegex, regex: regex !== regexContent ? `${regex} from ${regexContent}` : regex,
testOn, testOn,
matchThreshold, matchThreshold,
activityMatchThreshold, activityMatchThreshold,
@@ -371,115 +352,42 @@ export class RegexRule extends Rule {
return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]); return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]);
} }
protected async convertToRegexArray(name: string | number, value: string | string[]): Promise<RegExp[]> { protected getMatchesFromActivity(a: (Submission | Comment), testOn: string[], reg: RegExp): string[] {
const regexTests: RegExp[] = []; let m: string[] = [];
const regexStringVals = typeof value === 'string' ? [value] : value; // determine what content we are testing
for(const r of regexStringVals) { let contents: string[] = [];
// check regex if (asSubmission(a)) {
const regexContent = await this.resources.getContent(r); for (const l of testOn) {
const reg = parseStringToRegex(regexContent, 'ig'); switch (l) {
if (reg === undefined) { case 'title':
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${value}`); contents.push(a.title);
} break;
// ok cool its a valid regex case 'body':
regexTests.push(reg); if (a.is_self) {
} contents.push(a.selftext);
return regexTests; }
} break;
} case 'url':
if (isExternalUrlSubmission(a)) {
export const getMatchResultsFromContent = (contents: string[], reg: RegExp): RegExResultWithTest[] => { contents.push(a.url);
let m: RegExResultWithTest[] = []; }
for (const c of contents) { break;
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push({...r, test: reg});
}
}
}
return m;
}
export const regexResultsSummary = (results: RegExResultWithTest[]) => {
const matchResults: ActivityMatchResults = {
matches: [],
matchesByTest: {},
groups: {}
}
for (const r of results) {
if (matchResults.matchesByTest[r.test.toString()] === undefined) {
matchResults.matchesByTest[r.test.toString()] = [];
}
matchResults.matchesByTest[r.test.toString()].push(r.match);
matchResults.matches.push(r.match);
if (r.named !== undefined) {
Object.entries(r.named).forEach(([key, val]) => {
if (matchResults.groups[key] === undefined) {
matchResults.groups[key] = [];
} }
matchResults.groups[key].push(val); }
}); } else {
contents.push(a.body)
} }
}
return matchResults;
}
export const getMatchesFromActivity = (a: (Submission | Comment), testOn: string[], regexes: RegExp[], exhaustive: boolean): RegExResultWithTest[] => { for (const c of contents) {
// determine what content we are testing const results = parseRegex(reg, c);
let contents: string[] = getMatchableContent(a, testOn); if(results !== undefined) {
let results: RegExResultWithTest[] = []; for(const r of results) {
m.push(r.match);
for (const reg of regexes) { }
const res = getMatchResultsFromContent(contents, reg);
if(res.length > 0) {
results = results.concat(res);
// only continue testing if the user wants to exhaustively check all regexes (to get more matches?)
if(!exhaustive) {
return results;
} }
} }
return m;
} }
return results;
}
const getMatchableContent = (a: SnoowrapActivity, testOn: string[]) => {
let contents: string[] = [];
if (asSubmission(a)) {
for (const l of testOn) {
switch (l) {
case 'title':
contents.push(a.title);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
}
} else {
contents.push(a.body)
}
return contents;
}
interface RegexMatchComparisonOptions {
matchComparison: GenericComparison
activityMatchComparison?: GenericComparison
totalMatchComparison?: GenericComparison
}
interface ActivityMatchResults {
matches: string[]
matchesByTest: Record<string, string[]>
groups: Record<string, string[]>
} }
interface RegexConfig { interface RegexConfig {

View File

@@ -7,6 +7,7 @@ import {mergeArr, parseDuration, random} from "../util";
import { Logger } from "winston"; import { Logger } from "winston";
import {ErrorWithCause} from "pony-cause"; import {ErrorWithCause} from "pony-cause";
import dayjs, {Dayjs as DayjsObj} from "dayjs"; import dayjs, {Dayjs as DayjsObj} from "dayjs";
import { FixedSizeList } from 'fixed-size-list'
type Awaitable<T> = Promise<T> | T; type Awaitable<T> = Promise<T> | T;
@@ -14,10 +15,12 @@ interface RCBPollingOptions<T> extends SnooStormOptions {
subreddit: string, subreddit: string,
enforceContinuity?: boolean enforceContinuity?: boolean
logger: Logger logger: Logger
sort?: string
name?: string, name?: string,
processed?: Set<T[keyof T]> processed?: FixedSizeList<T[keyof T]>
label?: string label?: string
dateCutoff?: boolean dateCutoff?: boolean
maxHistory?: number
} }
interface RCBPollConfiguration<T> extends PollConfiguration<T>,RCBPollingOptions<T> { interface RCBPollConfiguration<T> extends PollConfiguration<T>,RCBPollingOptions<T> {
@@ -40,6 +43,9 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
name: string = 'Reddit Stream'; name: string = 'Reddit Stream';
logger: Logger; logger: Logger;
subreddit: string; subreddit: string;
// using a fixed sized "regular" array means slightly more memory usage vs. a Set when holding N items
// BUT now we can limit N items to something reasonable instead of having a crazy big Set with all items seen since stream was started
processedBuffer: FixedSizeList<T[keyof T]>;
constructor(options: RCBPollConfiguration<T>) { constructor(options: RCBPollConfiguration<T>) {
super(options); super(options);
@@ -54,6 +60,7 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
label = 'Polling', label = 'Polling',
processed, processed,
dateCutoff, dateCutoff,
maxHistory = 300,
} = options; } = options;
this.subreddit = subreddit; this.subreddit = subreddit;
this.name = name !== undefined ? name : this.name; this.name = name !== undefined ? name : this.name;
@@ -67,8 +74,10 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
// if we pass in processed on init the intention is to "continue" from where the previous stream left off // if we pass in processed on init the intention is to "continue" from where the previous stream left off
// WITHOUT new start behavior // WITHOUT new start behavior
if (processed !== undefined) { if (processed !== undefined) {
this.processed = processed; this.processedBuffer = processed;
this.newStart = false; this.newStart = false;
} else {
this.processedBuffer = new FixedSizeList<T[keyof T]>(maxHistory);
} }
clearInterval(this.interval); clearInterval(this.interval);
@@ -97,14 +106,14 @@ export class SPoll<T extends RedditContent<object>> extends Poll<T> {
} }
for (const item of batch) { for (const item of batch) {
const id = item[self.identifier]; const id = item[self.identifier];
if (self.processed.has(id)) { if (self.processedBuffer.data.some(x => x === id)) {
anyAlreadySeen = true; anyAlreadySeen = true;
continue; continue;
} }
// add new item to list and set as processed // add new item to list and set as processed
newItems.push(item); newItems.push(item);
self.processed.add(id); self.processedBuffer.add(id);
} }
page++; page++;
} }