Compare commits

...

1 Commits

Author SHA1 Message Date
FoxxMD
eee2a8e856 feat: Support multiple expressions for regex property 2022-11-22 16:03:50 -05:00
2 changed files with 149 additions and 53 deletions

View File

@@ -748,6 +748,10 @@ export interface RegExResult {
named: NamedGroup named: NamedGroup
} }
export interface RegExResultWithTest extends RegExResult {
test: RegExp
}
export type StrongCache = { export type StrongCache = {
authorTTL: number | boolean, authorTTL: number | boolean,
userNotesTTL: number | boolean, userNotesTTL: number | boolean,

View File

@@ -7,6 +7,7 @@ import {
PASS, triggeredIndicator, windowConfigToWindowCriteria PASS, triggeredIndicator, windowConfigToWindowCriteria
} from "../util"; } from "../util";
import { import {
RegExResultWithTest,
RuleResult, RuleResult,
} from "../Common/interfaces"; } from "../Common/interfaces";
import dayjs from 'dayjs'; import dayjs from 'dayjs';
@@ -14,10 +15,11 @@ import {SimpleError} from "../Utils/Errors";
import {JoinOperands} from "../Common/Infrastructure/Atomic"; import {JoinOperands} from "../Common/Infrastructure/Atomic";
import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow"; import {ActivityWindowConfig} from "../Common/Infrastructure/ActivityWindow";
import { import {
comparisonTextOp, comparisonTextOp, GenericComparison,
parseGenericValueComparison, parseGenericValueComparison,
parseGenericValueOrPercentComparison parseGenericValueOrPercentComparison
} from "../Common/Infrastructure/Comparisons"; } from "../Common/Infrastructure/Comparisons";
import {SnoowrapActivity} from "../Common/Infrastructure/Reddit";
export interface RegexCriteria { export interface RegexCriteria {
/** /**
@@ -27,13 +29,23 @@ export interface RegexCriteria {
* */ * */
name?: string name?: string
/** /**
* A valid Regular Expression to test content against * A valid Regular Expression, or list of expressions, to test content against
* *
* If no flags are specified then the **global** flag is used by default * If no flags are specified then the **global** flag is used by default
* *
* @examples ["/reddit|FoxxMD/ig"] * @examples ["/reddit|FoxxMD/ig"]
* */ * */
regex: string, regex: string | string[],
/**
* Determines if ALL regexes listed are run or if regexes are only run until one is matched.
*
* * `true` => all regexes are always run
* * `false` => regexes are run until one matches
*
* @default false
* */
exhaustive?: boolean
/** /**
* Which content from an Activity to test the regex against * Which content from an Activity to test the regex against
@@ -157,6 +169,7 @@ export class RegexRule extends Rule {
const { const {
name = (index + 1), name = (index + 1),
regex, regex,
exhaustive = false,
testOn: testOnVals = ['title', 'body'], testOn: testOnVals = ['title', 'body'],
lookAt = 'all', lookAt = 'all',
matchThreshold = '> 0', matchThreshold = '> 0',
@@ -174,13 +187,7 @@ export class RegexRule extends Rule {
return acc.concat(curr); return acc.concat(curr);
}, []); }, []);
// check regex const regexTests: RegExp[] = await this.convertToRegexArray(name, regex);
const regexContent = await this.resources.getContent(regex);
const reg = parseStringToRegex(regexContent, 'g');
if(reg === undefined) {
throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${regex}`);
}
// ok cool its a valid regex
const matchComparison = parseGenericValueComparison(matchThreshold); const matchComparison = parseGenericValueComparison(matchThreshold);
const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold); const activityMatchComparison = activityMatchThreshold === null ? undefined : parseGenericValueOrPercentComparison(activityMatchThreshold);
@@ -198,12 +205,13 @@ export class RegexRule extends Rule {
// first lets see if the activity we are checking satisfies thresholds // first lets see if the activity we are checking satisfies thresholds
// since we may be able to avoid api calls to get history // since we may be able to avoid api calls to get history
let actMatches = this.getMatchesFromActivity(item, testOn, reg); let actMatches = getMatchesFromActivity(item, testOn, regexTests, exhaustive);
matches = matches.concat(actMatches).slice(0, 100); const actMatchSummary = regexResultsSummary(actMatches);
matchCount += actMatches.length; matches = matches.concat(actMatchSummary.matches).slice(0, 100);
matchCount += actMatchSummary.matches.length;
activitiesTested++; activitiesTested++;
const singleMatched = comparisonTextOp(actMatches.length, matchComparison.operator, matchComparison.value); const singleMatched = comparisonTextOp(actMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (singleMatched) { if (singleMatched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -233,7 +241,7 @@ export class RegexRule extends Rule {
} }
history = await this.resources.getAuthorActivities(item.author, strongWindow); history = await this.resources.getAuthorActivities(item.author, strongWindow);
// remove current activity it exists in history so we don't count it twice // remove current activity if it exists in history so we don't count it twice
history = history.filter(x => x.id !== item.id); history = history.filter(x => x.id !== item.id);
const historyLength = history.length; const historyLength = history.length;
@@ -252,10 +260,12 @@ export class RegexRule extends Rule {
for (const h of history) { for (const h of history) {
activitiesTested++; activitiesTested++;
const aMatches = this.getMatchesFromActivity(h, testOn, reg); const aMatches = getMatchesFromActivity(h, testOn, regexTests, exhaustive);
matches = matches.concat(aMatches).slice(0, 100); actMatches = actMatches.concat(aMatches);
matchCount += aMatches.length; const actHistoryMatchSummary = regexResultsSummary(aMatches);
const matched = comparisonTextOp(aMatches.length, matchComparison.operator, matchComparison.value); matches = matches.concat(actHistoryMatchSummary.matches).slice(0, 100);
matchCount += actHistoryMatchSummary.matches.length;
const matched = comparisonTextOp(actHistoryMatchSummary.matches.length, matchComparison.operator, matchComparison.value);
if (matched) { if (matched) {
activitiesMatchedCount++; activitiesMatchedCount++;
} }
@@ -282,10 +292,19 @@ export class RegexRule extends Rule {
humanWindow = '1 Item'; humanWindow = '1 Item';
} }
// to provide at least one useful regex for this criteria
// use the first regex found by default
let relevantRegex: string = regexTests[0].toString();
// but if more than one regex was listed AND we did have matches
// then use the first regex that actually got a match
if(regexTests.length > 0 && actMatches.length > 0) {
relevantRegex = actMatches[0].test.toString();
}
const critResults = { const critResults = {
criteria: { criteria: {
name, name,
regex: regex !== regexContent ? `${regex} from ${regexContent}` : regex, regex: relevantRegex,
testOn, testOn,
matchThreshold, matchThreshold,
activityMatchThreshold, activityMatchThreshold,
@@ -352,44 +371,117 @@ export class RegexRule extends Rule {
return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]); return Promise.resolve([criteriaMet, this.getResult(criteriaMet, {result, data: {results: criteriaResults, matchSample }})]);
} }
protected getMatchesFromActivity(a: (Submission | Comment), testOn: string[], reg: RegExp): string[] { protected async convertToRegexArray(name: string | number, value: string | string[]): Promise<RegExp[]> {
let m: string[] = []; const regexTests: RegExp[] = [];
// determine what content we are testing const regexStringVals = typeof value === 'string' ? [value] : value;
let contents: string[] = []; for(const r of regexStringVals) {
if (asSubmission(a)) { // check regex
for (const l of testOn) { const regexContent = await this.resources.getContent(r);
switch (l) { const reg = parseStringToRegex(regexContent, 'ig');
case 'title': if (reg === undefined) {
contents.push(a.title); throw new SimpleError(`Value given for regex on Criteria ${name} was not valid: ${value}`);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
} }
} else { // ok cool its a valid regex
contents.push(a.body) regexTests.push(reg);
} }
return regexTests;
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push(r.match);
}
}
}
return m;
} }
} }
export const getMatchResultsFromContent = (contents: string[], reg: RegExp): RegExResultWithTest[] => {
let m: RegExResultWithTest[] = [];
for (const c of contents) {
const results = parseRegex(reg, c);
if(results !== undefined) {
for(const r of results) {
m.push({...r, test: reg});
}
}
}
return m;
}
export const regexResultsSummary = (results: RegExResultWithTest[]) => {
const matchResults: ActivityMatchResults = {
matches: [],
matchesByTest: {},
groups: {}
}
for (const r of results) {
if (matchResults.matchesByTest[r.test.toString()] === undefined) {
matchResults.matchesByTest[r.test.toString()] = [];
}
matchResults.matchesByTest[r.test.toString()].push(r.match);
matchResults.matches.push(r.match);
if (r.named !== undefined) {
Object.entries(r.named).forEach(([key, val]) => {
if (matchResults.groups[key] === undefined) {
matchResults.groups[key] = [];
}
matchResults.groups[key].push(val);
});
}
}
return matchResults;
}
export const getMatchesFromActivity = (a: (Submission | Comment), testOn: string[], regexes: RegExp[], exhaustive: boolean): RegExResultWithTest[] => {
// determine what content we are testing
let contents: string[] = getMatchableContent(a, testOn);
let results: RegExResultWithTest[] = [];
for (const reg of regexes) {
const res = getMatchResultsFromContent(contents, reg);
if(res.length > 0) {
results = results.concat(res);
// only continue testing if the user wants to exhaustively check all regexes (to get more matches?)
if(!exhaustive) {
return results;
}
}
}
return results;
}
const getMatchableContent = (a: SnoowrapActivity, testOn: string[]) => {
let contents: string[] = [];
if (asSubmission(a)) {
for (const l of testOn) {
switch (l) {
case 'title':
contents.push(a.title);
break;
case 'body':
if (a.is_self) {
contents.push(a.selftext);
}
break;
case 'url':
if (isExternalUrlSubmission(a)) {
contents.push(a.url);
}
break;
}
}
} else {
contents.push(a.body)
}
return contents;
}
interface RegexMatchComparisonOptions {
matchComparison: GenericComparison
activityMatchComparison?: GenericComparison
totalMatchComparison?: GenericComparison
}
interface ActivityMatchResults {
matches: string[]
matchesByTest: Record<string, string[]>
groups: Record<string, string[]>
}
interface RegexConfig { interface RegexConfig {
/** /**
* A list of Regular Expressions and conditions under which tested Activity(ies) are matched * A list of Regular Expressions and conditions under which tested Activity(ies) are matched