From 84505ac4a0d9729c768327146afc296187b5c4cb Mon Sep 17 00:00:00 2001 From: kata Date: Sat, 25 Feb 2023 09:03:16 +0800 Subject: [PATCH] integrate eslint --- .eslintignore | 1 + .eslintrc.json | 40 +++++++ .github/workflows/test.yaml | 2 + compiler/cli.js | 24 ++-- compiler/gen.js | 105 +++++++++--------- compiler/lexical.js | 9 +- package.json | 4 +- test/regex-compiler.test.js | 214 ++++++++++++++++++++++++++++++++++++ 8 files changed, 328 insertions(+), 71 deletions(-) create mode 100644 .eslintignore create mode 100644 .eslintrc.json create mode 100644 test/regex-compiler.test.js diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000..c795b05 --- /dev/null +++ b/.eslintignore @@ -0,0 +1 @@ +build \ No newline at end of file diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..0d5da31 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,40 @@ +{ + "env": { + "node": true + }, + "extends": [ + "eslint:recommended" + ], + "overrides": [ + ], + "parserOptions": { + "ecmaVersion": 2020, + "sourceType": "script" + }, + "globals": { + "process": "readonly", + "__dirname": "readonly", + "before": "readonly", + "it": "readonly", + "describe": "readonly", + "BigInt": "readonly" + }, + "rules": { + "indent": [ + "error", + 4 + ], + "linebreak-style": [ + "error", + "unix" + ], + "quotes": [ + "error", + "single" + ], + "semi": [ + "error", + "always" + ] + } +} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 31bda87..dfd4b4d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,5 +23,7 @@ jobs: run: yarn # - name: Create build folder # run: mkdir build + - name: Run Linters + run: yarn lint - name: Run tests run: yarn test diff --git a/compiler/cli.js b/compiler/cli.js index 8723b99..834d0e1 100644 --- a/compiler/cli.js +++ b/compiler/cli.js @@ -1,29 +1,27 @@ -const generator = require('../compiler/gen') +const generator = require('../compiler/gen'); -const program = require("commander"); +const program = require('commander'); const unescapeJs = require('unescape-js'); -program - .version("0.0.1") - .description("A sample CLI program") +program.version('0.0.1') + .description('A sample CLI program'); -program - .command("compile ") - .description("Compile a regular expression into circom circuits") +program.command('compile ') + .description('Compile a regular expression into circom circuits') .action((regex, circuit_name) => { - regex = unescapeJs(regex) - generator.generateCircuit(regex, undefined, circuit_name) + regex = unescapeJs(regex); + generator.generateCircuit(regex, undefined, circuit_name); }); -program.on("command:*", () => { +program.on('command:*', () => { console.error( - "Error: Invalid command. See --help for a list of available commands." + 'Error: Invalid command. See --help for a list of available commands.' ); process.exit(1); }); program.parse(process.argv); -if (!program.args.length) { +if (!process.args.length) { program.help(); } diff --git a/compiler/gen.js b/compiler/gen.js index c2f9bba..87f1544 100644 --- a/compiler/gen.js +++ b/compiler/gen.js @@ -1,20 +1,21 @@ -const fs = require("fs").promises; -const path = require("path") +/* eslint-disable no-undef */ +const fs = require('fs').promises; +const path = require('path'); const regexpTree = require('regexp-tree'); -const assert = require("assert") -const lexical = require('./lexical') +const assert = require('assert'); +const lexical = require('./lexical'); async function generateCircuit(regex, circuitLibPath, circuitName) { const ast = regexpTree.parse(`/${regex}/`); regexpTree.traverse(ast, { '*': function({node}) { - if (node.type === "CharacterClass") { - throw new Error('CharacterClass not supported') + if (node.type === 'CharacterClass') { + throw new Error('CharacterClass not supported'); } }, }); - const graph_json = lexical.compile(regex) + const graph_json = lexical.compile(regex); const N = graph_json.length; // Outgoing nodes @@ -28,14 +29,14 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { const accept_nodes = new Set(); for (let i = 0; i < N; i++) { - for (let k in graph_json[i]["edges"]) { + for (let k in graph_json[i]['edges']) { //assert len(k) == 1 //assert ord(k) < 128 - const v = graph_json[i]["edges"][k]; + const v = graph_json[i]['edges'][k]; graph[i][k] = v; rev_graph[v].push([k, i]); } - if (graph_json[i]["type"] === "accept") { + if (graph_json[i]['type'] === 'accept') { accept_nodes.add(i); } } @@ -48,7 +49,7 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { let multi_or_i = 0; let lines = []; - lines.push("for (var i = 0; i < num_bytes; i++) {"); + lines.push('for (var i = 0; i < num_bytes; i++) {'); assert.strictEqual(accept_nodes.has(0), false); @@ -58,13 +59,13 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { let vals = new Set(JSON.parse(k)); const eq_outputs = []; - const uppercase = new Set("ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("")); - const lowercase = new Set("abcdefghijklmnopqrstuvwxyz".split("")); - const digits = new Set("0123456789".split("")); + const uppercase = new Set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'.split('')); + const lowercase = new Set('abcdefghijklmnopqrstuvwxyz'.split('')); + const digits = new Set('0123456789'.split('')); if (new Set([...uppercase].filter((x) => vals.has(x))).size === uppercase.size) { vals = new Set([...vals].filter((x) => !uppercase.has(x))); - lines.push(`\t//UPPERCASE`); + lines.push('\t//UPPERCASE'); lines.push(`\tlt[${lt_i}][i] = LessThan(8);`); lines.push(`\tlt[${lt_i}][i].in[0] <== 64;`); lines.push(`\tlt[${lt_i}][i].in[1] <== in[i];`); @@ -77,13 +78,13 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { lines.push(`\tand[${and_i}][i].a <== lt[${lt_i}][i].out;`); lines.push(`\tand[${and_i}][i].b <== lt[${lt_i + 1}][i].out;`); - eq_outputs.push(["and", and_i]); + eq_outputs.push(['and', and_i]); lt_i += 2; and_i += 1; } if (new Set([...lowercase].filter((x) => vals.has(x))).size === lowercase.size) { vals = new Set([...vals].filter((x) => !lowercase.has(x))); - lines.push(`\t//lowercase`); + lines.push('\t//lowercase'); lines.push(`\tlt[${lt_i}][i] = LessThan(8);`); lines.push(`\tlt[${lt_i}][i].in[0] <== 96;`); lines.push(`\tlt[${lt_i}][i].in[1] <== in[i];`); @@ -96,13 +97,13 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { lines.push(`\tand[${and_i}][i].a <== lt[${lt_i}][i].out;`); lines.push(`\tand[${and_i}][i].b <== lt[${lt_i + 1}][i].out;`); - eq_outputs.push(["and", and_i]); + eq_outputs.push(['and', and_i]); lt_i += 2; and_i += 1; } if (new Set([...digits].filter((x) => vals.has(x))).size === digits.size) { vals = new Set([...vals].filter((x) => !digits.has(x))); - lines.push(`\t//digits`); + lines.push('\t//digits'); lines.push(`\tlt[${lt_i}][i] = LessThan(8);`); lines.push(`\tlt[${lt_i}][i].in[0] <== 47;`); lines.push(`\tlt[${lt_i}][i].in[1] <== in[i];`); @@ -115,7 +116,7 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { lines.push(`\tand[${and_i}][i].a <== lt[${lt_i}][i].out;`); lines.push(`\tand[${and_i}][i].b <== lt[${lt_i + 1}][i].out;`); - eq_outputs.push(["and", and_i]); + eq_outputs.push(['and', and_i]); lt_i += 2; and_i += 1; } @@ -125,7 +126,7 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { lines.push(`\teq[${eq_i}][i] = IsEqual();`); lines.push(`\teq[${eq_i}][i].in[0] <== in[i];`); lines.push(`\teq[${eq_i}][i].in[1] <== ${c.charCodeAt(0)};`); - eq_outputs.push(["eq", eq_i]); + eq_outputs.push(['eq', eq_i]); eq_i += 1; } @@ -158,14 +159,14 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { } } - lines.push("}"); + lines.push('}'); - lines.push(`signal final_state_sum[num_bytes+1];`); + lines.push('signal final_state_sum[num_bytes+1];'); lines.push(`final_state_sum[0] <== states[0][${N-1}];`); - lines.push(`for (var i = 1; i <= num_bytes; i++) {`); + lines.push('for (var i = 1; i <= num_bytes; i++) {'); lines.push(`\tfinal_state_sum[i] <== final_state_sum[i-1] + states[i][${N-1}];`); - lines.push(`}`); - lines.push(`entire_count <== final_state_sum[num_bytes];`); + lines.push('}'); + lines.push('entire_count <== final_state_sum[num_bytes];'); let declarations = []; @@ -182,58 +183,58 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { declarations.push(`component multi_or[${multi_or_i}][num_bytes];`); } declarations.push(`signal states[num_bytes+1][${N}];`); - declarations.push(""); + declarations.push(''); let init_code = []; - init_code.push("for (var i = 0; i < num_bytes; i++) {"); - init_code.push("\tstates[i][0] <== 1;"); - init_code.push("}"); + init_code.push('for (var i = 0; i < num_bytes; i++) {'); + init_code.push('\tstates[i][0] <== 1;'); + init_code.push('}'); init_code.push(`for (var i = 1; i < ${N}; i++) {`); - init_code.push("\tstates[0][i] <== 0;"); - init_code.push("}"); + init_code.push('\tstates[0][i] <== 0;'); + init_code.push('}'); - init_code.push(""); + init_code.push(''); // construct the match group indexes const node_edges = graph_json.map( node => Object.keys(node.edges).map(key => { - return {[key]: node.edges[key]} + return {[key]: node.edges[key]}; }) - ) - const node_edges_flat = node_edges.flat() + ); + const node_edges_flat = node_edges.flat(); - const node_edges_set = new Set() + const node_edges_set = new Set(); node_edges_flat.forEach(node => { if (JSON.parse(Object.keys(node)[0]).length > 1) { - node_edges_set.add(Object.values(node)[0]) + node_edges_set.add(Object.values(node)[0]); } - }) - const match_group_indexes = Array.from(node_edges_set).sort((a, b) => a - b) + }); + const match_group_indexes = Array.from(node_edges_set).sort((a, b) => a - b); init_code.push(`var match_group_indexes[${match_group_indexes.length}] = [${match_group_indexes.join(', ')}];`); const reveal_code = []; - reveal_code.push("signal output reveal[num_bytes];"); - reveal_code.push("for (var i = 0; i < num_bytes; i++) {"); - reveal_code.push(`\treveal[i] <== in[i] * states[i+1][match_group_indexes[group_idx]];`); - reveal_code.push("}"); - reveal_code.push(""); + reveal_code.push('signal output reveal[num_bytes];'); + reveal_code.push('for (var i = 0; i < num_bytes; i++) {'); + reveal_code.push('\treveal[i] <== in[i] * states[i+1][match_group_indexes[group_idx]];'); + reveal_code.push('}'); + reveal_code.push(''); lines = [...declarations, ...init_code, ...lines, ...reveal_code]; try { - let tpl = await (await fs.readFile(`${__dirname}/tpl.circom`)).toString() - tpl = tpl.replace('TEMPLATE_NAME_PLACEHOLDER', circuitName || 'Regex') - tpl = tpl.replace('COMPILED_CONTENT_PLACEHOLDER', lines.join('\n\t')) - tpl = tpl.replace(/CIRCUIT_FOLDER/g, circuitLibPath || `../circuits`) - tpl = tpl.replace(/\t/g, ' '.repeat(4)) + let tpl = await (await fs.readFile(`${__dirname}/tpl.circom`)).toString(); + tpl = tpl.replace('TEMPLATE_NAME_PLACEHOLDER', circuitName || 'Regex'); + tpl = tpl.replace('COMPILED_CONTENT_PLACEHOLDER', lines.join('\n\t')); + tpl = tpl.replace(/CIRCUIT_FOLDER/g, circuitLibPath || '../circuits'); + tpl = tpl.replace(/\t/g, ' '.repeat(4)); const outputPath = `${__dirname}/../build/${ circuitName || 'compiled'}.circom`; await fs.writeFile(outputPath, tpl); process.env.VERBOSE && console.log(`Circuit compiled to ${path.normalize(outputPath)}`); } catch (error) { - console.log(error) + console.log(error); } } @@ -241,4 +242,4 @@ async function generateCircuit(regex, circuitLibPath, circuitName) { module.exports = { generateCircuit, ...lexical -} +}; diff --git a/compiler/lexical.js b/compiler/lexical.js index 3194047..6cdbf46 100644 --- a/compiler/lexical.js +++ b/compiler/lexical.js @@ -1,5 +1,5 @@ +/* eslint-disable no-prototype-builtins */ /*jslint browser: true*/ -/*global require, exports*/ /** * Try parsing simple regular expression to syntax tree. @@ -141,7 +141,7 @@ function parseRegex(text) { let i = 0; while (i < text.length) { if (text[i] == '\\') { - new_text.push([text[i+1]]) + new_text.push([text[i+1]]); i += 2; } else { new_text.push(text[i]); @@ -550,7 +550,6 @@ function compile(regex) { let dfa = minDfa(nfaToDfa(nfa)); var i, - j, states = {}, nodes = [], stack = [dfa], @@ -589,7 +588,7 @@ function compile(regex) { graph[nodes[i].nature-1] = curr; } - return graph + return graph; } module.exports = { @@ -599,4 +598,4 @@ module.exports = { word_char, catch_all, catch_all_without_semicolon, -} \ No newline at end of file +}; \ No newline at end of file diff --git a/package.json b/package.json index ee3470d..ef1d1e8 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "test": "test" }, "scripts": { - "test": "NODE_OPTIONS=--max_old_space_size=56000 mocha --timeout 600000 -r ts-node/register 'test/**/*.ts'", + "test": "NODE_OPTIONS=--max_old_space_size=56000 mocha --timeout 600000 'test/**/*.js'", + "lint": "eslint ./", "compile": "VERBOSE=1 node compiler/cli.js compile" }, "repository": { @@ -30,6 +31,7 @@ "dependencies": { "circomlib": "^2.0.2", "commander": "^10.0.0", + "eslint": "^8.34.0", "regexp-tree": "^0.1.24", "unescape-js": "^1.1.4" }, diff --git a/test/regex-compiler.test.js b/test/regex-compiler.test.js new file mode 100644 index 0000000..173d902 --- /dev/null +++ b/test/regex-compiler.test.js @@ -0,0 +1,214 @@ +const fs = require('fs'); +const {expect} = require('chai'); +const path = require('path'); +const circom_tester = require('circom_tester'); +const generator = require('../compiler/gen'); +const wasm_tester = circom_tester.wasm; + +describe('regex compiler tests', function () { + [ + [ + ['1=(a|b) (2=(b|c)+ )+d', 0], + [ + [ + '1 entire match and 1st sub-group match', + convertMsg('1=a 2=b 2=bc 2=c d'), + 0, + (signals) => { + expect(signals.main.entire_count).to.equal(1n); + expect(signals.main.group_match_count).to.equal(1n); + expect(signals.main.start_idx).to.equal(2n); + const expected_reveal = encodeString('a'); + assert_reveal(signals, expected_reveal); + } + ], + ] + ], + [ + ['1=(a|b) (2=(b|c)+ )+d', 1], + [ + [ + '1 entire match and 1st sub-group match', + convertMsg('1=a 2=b 2=bc 2=c d'), + 0, + (signals) => { + expect(signals.main.entire_count).to.equal(1n); + expect(signals.main.group_match_count).to.equal(3n); + expect(signals.main.start_idx).to.equal(6n); + const expected_reveal = encodeString('b'); + assert_reveal(signals, expected_reveal); + } + ], + [ + '1 entire match and 2nd sub-group match', + convertMsg('1=a 2=b 2=bc 2=c d'), + 1, + (signals) => { + expect(signals.main.entire_count).to.equal(1n); + expect(signals.main.group_match_count).to.equal(3n); + expect(signals.main.start_idx).to.equal(10n); + const expected_reveal = encodeString('bc'); + assert_reveal(signals, expected_reveal); + } + ], + [ + '1 entire match and 3rd sub-group match', + convertMsg('1=a 2=b 2=bc 2=c d'), + 2, + (signals) => { + expect(signals.main.entire_count).to.equal(1n); + expect(signals.main.group_match_count).to.equal(3n); + expect(signals.main.start_idx).to.equal(15n); + const expected_reveal = encodeString('c'); + assert_reveal(signals, expected_reveal); + } + ], + [ + '0 entire match and 2 group matches', + convertMsg('1=a 2=b 2=bc 2=e d'), + 1, + (signals) => { + expect(signals.main.entire_count).to.equal(0n); + expect(signals.main.group_match_count).to.equal(2n); + } + ], + [ + '2 entire match and 2nd sub-group match', + convertMsg('1=a 2=b 2=bc 2=c da 1=a 2=cb 2=c 2=b dd'), + 1, + (signals) => { + expect(signals.main.entire_count).to.equal(2n); + expect(signals.main.group_match_count).to.equal(6n); + expect(signals.main.start_idx).to.equal(10n); + const expected_reveal = encodeString('bc'); + assert_reveal(signals, expected_reveal); + } + ], + // todo TOFIX + // [ + // '1 entire match and 1+ group matches with no trails behind the last group', + // convertMsg(`1=a 2=b 2=bc 2=c `), + // [`1=(a|b) (2=(b|c)+ )+`, 1, 1], + // (signals) => { + // for (let i = 0; i < signals.main.states.length; i++) { + // console.log(signals.main.states[i][8]) + // } + // expect(signals.main.entire_count).to.equal(1n) + // expect(signals.main.group_match_count).to.equal(3n) + // expect(signals.main.start_idx).to.equal(10n) + // const expected_reveal = 'bc'.split('').map((x) => BigInt(x.charCodeAt(0))) + // assert_reveal(signals, expected_reveal); + // } + // ], + ] + ], + [ + ['(\r\n|\x80)(to|from):((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9| |_|.|"|@|-)+<)?(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+>?\r\n', 2], + [ + [ + 'from to email header', + convertMsg(fs.readFileSync(path.join(__dirname, 'header.fixture.txt'), 'utf8')), + 0, + (signals) => { + expect(signals.main.entire_count).to.equal(2n); + expect(signals.main.group_match_count).to.equal(2n); + expect(signals.main.start_idx).to.equal(54n); + const expected_reveal = encodeString('verify'); + assert_reveal(signals, expected_reveal); + } + ], + ] + ], + [ + ['dkim-signature:((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)=(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|\\/|:|<|=|>|\\?|@|\\[|\\\\|\\]|^|_|`|{|\\||}|~| |\t|\n' + + '|\r|\x0B|\f)+; )+bh=(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|\\/|=)+; ', 2], + [ + [ + 'assert body hash', + convertMsg('\r\ndkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=twitter.com; s=dkim-201406; t=1671865957; bh=hEMyi6n9V0N6aGtz3lEc6fQBlZRVUok/tkwpRCmrnaa=; h=Date:From:To:Subject:MIME-Version:Content-Type:Message-ID; b='), + 0, + (signals) => { + expect(signals.main.entire_count).to.equal(1n); + expect(signals.main.group_match_count).to.equal(1n); + const expected_reveal = encodeString('hEMyi6n9V0N6aGtz3lEc6fQBlZRVUok/tkwpRCmrnaa='); + assert_reveal(signals, expected_reveal); + } + ] + ] + ], + ] + .forEach((regexSuite) => { + const regex = regexSuite[0][0]; + const group_idx = regexSuite[0][1]; + const tests = regexSuite[1]; + + const testCircomFile = `test_regex_compiler_group_${group_idx}.circom`; + let circuit; + describe(`/${regex}/ > group idx: ${group_idx} > ${testCircomFile}`, () => { + before(async function () { + await generator.generateCircuit( + regex, + '../circuits' + ); + circuit = await wasm_tester( + path.join(__dirname, 'circuits', testCircomFile), + {recompile: process.env.NO_COMPILE ? false : true, output: `${__dirname}/../build/`, O: 0} + ); + }); + tests.forEach((test) => { + const name = test[0]; + const content = test[1]; + const match_idx = test[2]; + const checkSignals = test[3]; + + describe(name, () => { + it('checks witness', async function() { + let witness = await circuit.calculateWitness({msg: content, match_idx}); + const signals = await circuit.getJSONOutput('main', witness); + checkSignals(signals); + await circuit.checkConstraints(witness); + }); + }); + }); + }); + }); + + describe('exceptions', () => { + it('character class not supported', async () => { + try { + await generator.generateCircuit( + '[a-z]', + '../circuits' + ); + } + catch (e) { + expect(e.message).to.equal('CharacterClass not supported'); + return; + } + + expect.fail('should have thrown'); + }); + }); +}); + +function encodeString(str) { + return str.split('').map((x) => BigInt(x.charCodeAt(0))); +} + +function convertMsg(msg, maxLen = 1536) { + let msgEncoded = msg.split('').map((x) => x.charCodeAt(0)); + while (msgEncoded.length < maxLen) { + msgEncoded.push(0); + } + msgEncoded = msgEncoded.map((x) => `${x}`); + return msgEncoded; +} + +function assert_reveal(signals, expected_reveal) { + for (let m in signals.main.reveal_shifted) { + const value = signals.main.reveal_shifted[m]; + if (expected_reveal[m]) { + expect(value).to.equal(expected_reveal[m]); + } + } +}