diff --git a/src/Common/interfaces.ts b/src/Common/interfaces.ts index cb7c0aa..cb9d101 100644 --- a/src/Common/interfaces.ts +++ b/src/Common/interfaces.ts @@ -739,7 +739,7 @@ export interface RegExResult { match: string, groups: string[], index: number - named: NamedGroup | undefined + named: NamedGroup } export type StrongCache = { diff --git a/src/util.ts b/src/util.ts index f952f23..bc79c7e 100644 --- a/src/util.ts +++ b/src/util.ts @@ -859,62 +859,81 @@ export const dummyLogger = { info: (v: any) => null } -const GIST_REGEX = new RegExp(/.*gist\.github\.com\/.+\/(.+)/i) -const GH_BLOB_REGEX = new RegExp(/.*github\.com\/(.+)\/(.+)\/blob\/(.+)/i); -const REGEXR_REGEX = new RegExp(/^.*((regexr\.com)\/[\w\d]+).*$/i); -const REGEXR_PAGE_REGEX = new RegExp(/(.|[\n\r])+"expression":"(.+)","text"/g); +export const normalizeGistFileKey = (val: string) => val.replaceAll(/[^\w\d]/g, '').toLowerCase().trim(); +export const GIST_REGEX = new RegExp(/.*gist\.github\.com\/(?.+)\/(?[^#\/]+)(?:#file-(?.+))?/i) +export const GIST_RAW_REGEX = new RegExp(/.*gist\.github\.com\/(?.+)\/(?[^#\/]+)\/raw\/.+/i) +export const GH_BLOB_REGEX = new RegExp(/.*github\.com\/(?.+)\/(?.+)\/blob\/(?.+)(?:#.+)?/i); +export const REGEXR_REGEX = new RegExp(/^.*((regexr\.com)\/[\w\d]+).*$/i); +export const REGEXR_PAGE_REGEX = new RegExp(/(.|[\n\r])+"expression":"(.+)","text"/g); export const fetchExternalResult = async (url: string, logger: (any) = dummyLogger): Promise<[string, Response]> => { let hadError = false; logger.debug(`Attempting to detect resolvable URL for ${url}`); - let match = url.match(GIST_REGEX); - if (match !== null) { - const gistApiUrl = `https://api.github.com/gists/${match[1]}`; - logger.debug(`Looks like a non-raw gist URL! Trying to resolve ${gistApiUrl}`); + let match = parseRegexSingleOrFail(GIST_RAW_REGEX, url); // check for raw gist url first and if found treat as normal URL + if(match === undefined) { + // if not raw then if its still a gist then we need to parse and use API + match = parseRegexSingleOrFail(GIST_REGEX, url); - try { - const response = await fetch(gistApiUrl); - if (!response.ok) { - logger.warn(`Response was not OK from Gist API (${response.statusText}) -- will return response from original URL instead`); - if (response.size > 0) { - logger.warn(await response.text()) - } - hadError = true; - } else { - const data = await response.json(); - // get first found file - const fileKeys = Object.keys(data.files); - if (fileKeys.length === 0) { - logger.error(`No files found in gist!`); + if (match !== undefined) { + const gistApiUrl = `https://api.github.com/gists/${match.named.gistId}`; + logger.debug(`Looks like a non-raw gist URL! Trying to resolve ${gistApiUrl} ${match.named.fileName !== undefined ? ` and find file ${match.named.fileName}` : ''}`); + + try { + const response = await fetch(gistApiUrl); + if (!response.ok) { + logger.warn(`Response was not OK from Gist API (${response.statusText}) -- will return response from original URL instead`); + if (response.size > 0) { + logger.warn(await response.text()) + } + hadError = true; } else { - if (fileKeys.length > 1) { - logger.warn(`More than one file found in gist! Using first found: ${fileKeys[0]}`); + const data = await response.json(); + // get first found file + const fileKeys = Object.keys(data.files); + if (fileKeys.length === 0) { + logger.error(`No files found in gist!`); } else { - logger.debug(`Using file ${fileKeys[0]}`); - } - const file = data.files[fileKeys[0]]; - if (file.truncated === false) { - return [file.content, response]; - } - const rawUrl = file.raw_url; - logger.debug(`File contents was truncated, retrieving full contents from ${rawUrl}`); - try { - const rawUrlResponse = await fetch(rawUrl); - return [await rawUrlResponse.text(), rawUrlResponse]; - } catch (err: any) { - logger.error('Gist Raw URL Response returned an error, will return response from original URL instead'); - logger.error(err); + let fileKey = fileKeys[0]; + if (fileKeys.length > 1) { + if(match.named.fileName !== undefined) { + //const normalizedFileName = normalizeGistFileKey(match.named.fileName.replace('/^file-/', '')); + const normalizedFileName = normalizeGistFileKey(match.named.fileName); + const matchingKey = fileKeys.find(x => normalizeGistFileKey(x) === normalizedFileName); + if(matchingKey === undefined) { + throw new SimpleError(`Found Gist ${match.named.gistId} but it did not contain a file named ${match.named.fileName}`); + } + fileKey = matchingKey; + } else { + logger.warn(`More than one file found in gist but URL did not specify a filename! Using first found: ${fileKey}`); + } + } else { + logger.debug(`Using file ${fileKey}`); + } + const file = data.files[fileKey]; + if (file.truncated === false) { + return [file.content, response]; + } + const rawUrl = file.raw_url; + logger.debug(`File contents was truncated, retrieving full contents from ${rawUrl}`); + try { + const rawUrlResponse = await fetch(rawUrl); + return [await rawUrlResponse.text(), rawUrlResponse]; + } catch (err: any) { + logger.error('Gist Raw URL Response returned an error, will return response from original URL instead'); + logger.error(err); + } } } + } catch (err: any) { + logger.error('Response returned an error, will return response from original URL instead'); + logger.error(err); } - } catch (err: any) { - logger.error('Response returned an error, will return response from original URL instead'); - logger.error(err); } } - match = url.match(GH_BLOB_REGEX); - if (match !== null) { - const rawUrl = `https://raw.githubusercontent.com/${match[1]}/${match[2]}/${match[3]}` + match = parseRegexSingleOrFail(GH_BLOB_REGEX, url) + + if (match !== undefined) { + const rawUrl = `https://raw.githubusercontent.com/${match.named.user}/${match.named.repo}/${match.named.path}` logger.debug(`Looks like a single file github URL! Resolving to ${rawUrl}`); try { const response = await fetch(rawUrl); @@ -933,8 +952,8 @@ export const fetchExternalResult = async (url: string, logger: (any) = dummyLogg } } - match = url.match(REGEXR_REGEX); - if(match !== null) { + match = parseRegexSingleOrFail(REGEXR_REGEX, url); + if(match !== undefined) { logger.debug(`Looks like a Regexr URL! Trying to get expression from page HTML`); try { const response = await fetch(url); @@ -1442,7 +1461,7 @@ export const parseRegex = (reg: RegExp, val: string): RegExResult[] | undefined match: x[0], index: x.index, groups: x.slice(1), - named: x.groups, + named: x.groups || {}, } as RegExResult; }); } @@ -1455,10 +1474,21 @@ export const parseRegex = (reg: RegExp, val: string): RegExResult[] | undefined match: m[0], index: m.index as number, groups: m.slice(1), - named: m.groups + named: m.groups || {} }]; } +export const parseRegexSingleOrFail = (reg: RegExp, val: string): RegExResult | undefined => { + const results = parseRegex(reg, val); + if(results !== undefined) { + if(results.length > 1) { + throw new SimpleError(`Expected Regex to match once but got ${results.length} results. Either Regex must NOT be global (using 'g' flag) or parsed value must only match regex once. Given: ${val} || Regex: ${reg.toString()}`); + } + return results[0]; + } + return undefined; +} + export const testMaybeStringRegex = (test: string, subject: string, defaultFlags: string = 'i'): [boolean, string] => { let reg = parseStringToRegex(test, defaultFlags); if (reg === undefined) { diff --git a/tests/utils.test.ts b/tests/utils.test.ts index ed36f2b..03923f3 100644 --- a/tests/utils.test.ts +++ b/tests/utils.test.ts @@ -1,10 +1,11 @@ import {describe, it} from 'mocha'; import {assert} from 'chai'; import { - COMMENT_URL_ID, + COMMENT_URL_ID, GH_BLOB_REGEX, GIST_RAW_REGEX, + GIST_REGEX, parseDurationFromString, parseLinkIdentifier, - parseRedditEntity, removeUndefinedKeys, SUBMISSION_URL_ID + parseRedditEntity, parseRegexSingleOrFail, REGEXR_REGEX, removeUndefinedKeys, SUBMISSION_URL_ID } from "../src/util"; import dayjs from "dayjs"; import dduration, {Duration, DurationUnitType} from 'dayjs/plugin/duration.js'; @@ -13,6 +14,7 @@ import { parseGenericValueComparison, parseGenericValueOrPercentComparison, parseReportComparison } from "../src/Common/Infrastructure/Comparisons"; +import {RegExResult} from "../src/Common/interfaces"; dayjs.extend(dduration); @@ -301,5 +303,57 @@ describe('Link Recognition', function () { // it('should recognize submission id from reddit shortlink') // https://redd.it/92dd8 + }); + + describe('External URL Parsing', function() { + + it('should recognize and parse raw gist URLs', function() { + const res = parseRegexSingleOrFail(GIST_RAW_REGEX, 'https://gist.github.com/FoxxMD/2b035429fbf326a00d9a6ca2a38011d9/raw/97076d52114eb17a8754384d95087e8a0a74cf88/file-with-symbols.test.yaml'); + assert.exists(res); + const rese = res as RegExResult; + assert.equal(rese.named.user, 'FoxxMD'); + assert.equal(rese.named.gistId, '2b035429fbf326a00d9a6ca2a38011d9'); + }); + + it('should not parse non-raw gist URLs with raw regex', function() { + for(const url of [ + 'https://gist.github.com/FoxxMD/2b035429fbf326a00d9a6ca2a38011d9', + 'https://gist.github.com/FoxxMD/2b035429fbf326a00d9a6ca2a38011d9#file-file-with-symbols-test-yaml' + ]) { + const res = parseRegexSingleOrFail(GIST_RAW_REGEX, url); + assert.notExists(res, `Should not have parsed ${url} as RAW gist`); + } + }); + + it('should recognize and parse gist URLs', function() { + const res = parseRegexSingleOrFail(GIST_REGEX, 'https://gist.github.com/FoxxMD/2b035429fbf326a00d9a6ca2a38011d9'); + assert.exists(res); + const rese = res as RegExResult; + assert.equal(rese.named.user, 'FoxxMD'); + assert.equal(rese.named.gistId, '2b035429fbf326a00d9a6ca2a38011d9'); + }); + + it('should recognize and parse gist URLs with filename hashes', function() { + const res = parseRegexSingleOrFail(GIST_REGEX, 'https://gist.github.com/FoxxMD/2b035429fbf326a00d9a6ca2a38011d9#file-file-with-symbols-test-yaml'); + assert.exists(res); + const rese = res as RegExResult; + assert.equal(rese.named.user, 'FoxxMD'); + assert.equal(rese.named.gistId, '2b035429fbf326a00d9a6ca2a38011d9'); + assert.equal(rese.named.fileName, 'file-with-symbols-test-yaml'); + }); + + it('should recognize and parse github blob URLs', function() { + const res = parseRegexSingleOrFail(GH_BLOB_REGEX, 'https://github.com/FoxxMD/context-mod/blob/master/src/util.ts'); + assert.exists(res); + const rese = res as RegExResult; + assert.equal(rese.named.user, 'FoxxMD'); + assert.equal(rese.named.repo, 'context-mod'); + assert.equal(rese.named.path, 'master/src/util.ts'); + }); + + it('should recognize regexr URLs', function() { + const res = parseRegexSingleOrFail(REGEXR_REGEX, 'https://regexr.com/6pomb'); + assert.exists(res); + }); }) })