From c44592d074e2ff64369109302db1336d0de04de3 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:00:01 -0700 Subject: [PATCH] and/not, more refinements --- packages/jsparse/parser.js | 155 +++++++++++++++++----------------- packages/jsparse/parserlib.js | 55 ++++++------ 2 files changed, 107 insertions(+), 103 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 35972fb4f8..1478d6fb22 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -2,20 +2,22 @@ // XXX unit tests -// XXX examine uses of lookAhead(...) // XXX SeqParser // XXX examine when revalue(...) takes a constant vs. func, break into two // XXX chain revalue(...)? Chain other things? +// XXX better way to declare parsers, including boolean flagged ones // What we don't have from ECMA-262 5.1: // - object literal trailing comma // - object literal get/set var parse = function (tokenizer) { - var noLineTerminatorHere = new Parser( - 'noLineTerminator', function (t) { - return t.isLineTerminatorHere ? null : []; - }); + var NIL = new ParseNode('nil', []); + + var noLineTerminatorHere = expecting( + 'noLineTerminator', assertion(function (t) { + return ! t.isLineTerminatorHere; + })); // Like token, but marks tokens that need to defy the lexer's // heuristic about whether the next '/' is a division or @@ -110,17 +112,17 @@ var parse = function (tokenizer) { var objectLiteral = node('object', - seq(token('{'), - opt(list(nameColonValue, - token(',')), lookAheadToken('}')), - token('}'))); + seq(token('{'), + opt(list(nameColonValue, + token(',')), lookAheadToken('}')), + token('}'))); // not memoized; only call at construction time var functionFunc = function (nameRequired) { return seq(token('function'), (nameRequired ? tokenClass('IDENTIFIER') : or(tokenClass('IDENTIFIER'), - lookAhead(lookAheadToken('('), constant(ParseNode.NIL)))), + and(lookAheadToken('('), constant(NIL)))), token('('), opt(list(tokenClass('IDENTIFIER'), token(',')), lookAheadToken(')')), @@ -130,7 +132,7 @@ var parse = function (tokenizer) { token('}')); }; var functionExpression = node('functionExpr', - functionFunc(false)); + functionFunc(false)); var primaryOrFunctionExpression = expecting('expression', @@ -228,9 +230,9 @@ var parse = function (tokenizer) { nodeIfMultipart( 'postfix', seq(lhsExpression, - opt(lookAhead(noLineTerminatorHere, - lookAhead(postfixLookahead, - postfixToken)))))); + opt(and(noLineTerminatorHere, + postfixLookahead, + postfixToken))))); var unaryList = opt(list(or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false)))); var unaryExpression = new Parser( @@ -277,7 +279,7 @@ var parse = function (tokenizer) { token('||')]; return expecting( 'expression', - binaryLeft(unaryExpression, binaryOps)); + binaryLeft('binary', unaryExpression, binaryOps)); }); var binaryExpression = binaryExpressionFunc(false); @@ -345,20 +347,19 @@ var parse = function (tokenizer) { var maybeSemicolon = expecting( 'semicolon', or(token(';'), - lookAhead( + and( or( lookAheadToken('}'), lookAheadTokenClass('EOF'), - new Parser("lineTerminator", - function (t) { - return t.isLineTerminatorHere ? [] : null; - })), + assertion(function (t) { + return t.isLineTerminatorHere; + })), constant(new ParseNode(';', []))))); var expressionStatement = node( 'expressionStmnt', - negLookAhead( - or(lookAheadToken('{'), lookAheadToken('function')), + and( + not(or(lookAheadToken('{'), lookAheadToken('function'))), seq(expression, expecting('semicolon', or(maybeSemicolon, @@ -368,7 +369,8 @@ var parse = function (tokenizer) { // an implicit semicolon. This // is safe because a colon can never legally // follow a semicolon anyway. - lookAhead(lookAheadToken(':'), constant(new ParseNode(';', [])))))))); + and(lookAheadToken(':'), + constant(new ParseNode(';', [])))))))); // it's hard to parse statement labels, as in // `foo: x = 1`, because we can't tell from the @@ -379,8 +381,7 @@ var parse = function (tokenizer) { // followed by a colon. var labelColonAndStatement = seq(token(':'), statementPtr); var noColon = expecting( - 'semicolon', - negLookAhead(lookAheadToken(':'))); + 'semicolon', not(lookAheadToken(':'))); var expressionOrLabelStatement = new Parser( null, function (t) { @@ -442,14 +443,14 @@ var parse = function (tokenizer) { var secondThirdClauses = expecting( 'semicolon', - lookAhead(lookAheadToken(';'), - seq( - expecting('semicolon', token(';')), - opt(expressionPtr, lookAhead(lookAheadToken(';'), - constant(ParseNode.NIL))), - expecting('semicolon', token(';')), - opt(expressionPtr, lookAhead(lookAheadToken(')'), - constant(ParseNode.NIL)))))); + and(lookAheadToken(';'), + seq( + expecting('semicolon', token(';')), + opt(expressionPtr, and(lookAheadToken(';'), + constant(NIL))), + expecting('semicolon', token(';')), + opt(expressionPtr, and(lookAheadToken(')'), + constant(NIL)))))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); @@ -469,8 +470,8 @@ var parse = function (tokenizer) { // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(lookAhead(lookAheadToken(';'), - constant(ParseNode.NIL)), secondThirdClauses), + seq(and(lookAheadToken(';'), + constant(NIL)), secondThirdClauses), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". @@ -515,10 +516,10 @@ var parse = function (tokenizer) { var iterationStatement = or( node('doStmnt', seq(token('do'), statementPtr, token('while'), - token('('), expression, token(')'), - maybeSemicolon)), + token('('), expression, token(')'), + maybeSemicolon)), node('whileStmnt', seq(token('while'), token('('), expression, - closeParenBeforeStatement, statementPtr)), + closeParenBeforeStatement, statementPtr)), // semicolons must be real, not maybeSemicolons node('forStmnt', seq( token('for'), token('('), forSpec, closeParenBeforeStatement, @@ -527,31 +528,31 @@ var parse = function (tokenizer) { var returnStatement = node( 'returnStmnt', seq(token('return'), or( - lookAhead(noLineTerminatorHere, expression), constant(ParseNode.NIL)), + and(noLineTerminatorHere, expression), constant(NIL)), maybeSemicolon)); var continueStatement = node( 'continueStmnt', seq(token('continue'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), + and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var breakStatement = node( 'breakStmnt', seq(token('break'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), + and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var throwStatement = node( 'throwStmnt', seq(token('throw'), - lookAhead(new Parser(null, - function (t) { - var v = noLineTerminatorHere.parse(t); - if (v) - return v; - if (t.peekText) - throw parseError(t, expression, 'end of line'); - // EOF: - return null; - }), expression), + and(new Parser(null, + function (t) { + var v = noLineTerminatorHere.parse(t); + if (v) + return v; + if (t.peekText) + throw parseError(t, expression, 'end of line'); + // EOF: + return null; + }), expression), maybeSemicolon)); var withStatement = node( @@ -582,17 +583,17 @@ var parse = function (tokenizer) { var catchFinally = expecting( 'catch', - lookAhead(lookAheadToken('catch finally'), - seq( - or(node( - 'catch', - seq(token('catch'), token('('), tokenClass('IDENTIFIER'), - token(')'), blockStatement)), - constant(ParseNode.NIL)), - or(node( - 'finally', - seq(token('finally'), blockStatement)), - constant(ParseNode.NIL))))); + and(lookAheadToken('catch finally'), + seq( + or(node( + 'catch', + seq(token('catch'), token('('), tokenClass('IDENTIFIER'), + token(')'), blockStatement)), + constant(NIL)), + or(node( + 'finally', + seq(token('finally'), blockStatement)), + constant(NIL))))); var tryStatement = node( 'tryStmnt', seq(token('try'), blockStatement, catchFinally)); @@ -618,7 +619,7 @@ var parse = function (tokenizer) { // PROGRAM var functionDecl = node('functionDecl', - functionFunc(true)); + functionFunc(true)); var sourceElement = or(statement, functionDecl); var sourceElements = list(sourceElement); @@ -628,20 +629,20 @@ var parse = function (tokenizer) { lookAheadToken('}'))); var program = node('program', - seq(opt(sourceElements), - // we rely on the fact that opt(sourceElements) - // will never fail, and non-first arguments - // to seq are required to succeed -- meaning - // this parser will never fail without throwing - // a parse error. - expecting('statement', - revalue(lookAheadTokenClass("EOF"), - function (v, t) { - // eat the ending "EOF" so that - // our position is updated - t.consume(); - return v; - })))); + seq(opt(sourceElements), + // we rely on the fact that opt(sourceElements) + // will never fail, and non-first arguments + // to seq are required to succeed -- meaning + // this parser will never fail without throwing + // a parse error. + expecting('statement', + revalue(lookAheadTokenClass("EOF"), + function (v, t) { + // eat the ending "EOF" so that + // our position is updated + t.consume(); + return v; + })))); return program.parse(tokenizer); }; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 799e3dccc6..721229f0b4 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,6 +1,5 @@ ///// TOKENIZER AND PARSER COMBINATORS -// XXX make Parser object with parse method? // XXX track line/col position, for errors and maybe token info // XXX unit tests @@ -16,8 +15,6 @@ var ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; -ParseNode.NIL = new ParseNode('nil', []); - var Parser = function (expecting, runFunc) { this.expecting = expecting; this._run = runFunc; @@ -152,6 +149,13 @@ var lookAheadToken = function (text) { return _tokenImpl(text, true); }; +var assertion = function (test) { + return new Parser( + null, function (t) { + return test(t) ? [] : null; + }); +}; + ///// NON-TERMINAL PARSER CONSTRUCTORS var node = function (name, childrenParser) { @@ -187,7 +191,7 @@ var or = function (/*parsers*/) { // // opParser can also be an array of op parsers from high to low // precedence (tightest-binding first) -var binaryLeft = function (termParser, opParser) { +var binaryLeft = function (name, termParser, opParser) { if (isArray(opParser)) { if (opParser.length === 1) { // take single opParser out of its array @@ -196,7 +200,7 @@ var binaryLeft = function (termParser, opParser) { // pop off last opParser (non-destructively) and replace // termParser with a recursive binaryLeft on the remaining // ops. - termParser = binaryLeft(termParser, opParser.slice(0, -1)); + termParser = binaryLeft(name, termParser, opParser.slice(0, -1)); opParser = opParser[opParser.length - 1]; } } @@ -211,7 +215,7 @@ var binaryLeft = function (termParser, opParser) { var op; while ((op = opParser.parse(t))) { result = new ParseNode( - 'binary', + name, [result, op, termParser.parse(t, {required: true})]); } return result; @@ -257,8 +261,7 @@ var list = function (itemParser, sepParser) { var seq = function (/*parsers*/) { var args = arguments; if (! args.length) - return new Parser("(empty)", - function (t) { return []; }); + return constant([]); return new Parser( args[0].expecting, @@ -280,31 +283,31 @@ var seq = function (/*parsers*/) { }); }; -// lookAhead parser must never consume -var lookAhead = function (lookAheadParser, nextParser) { +// parsers except last must never consume +var and = function (/*parsers*/) { + var args = arguments; + if (! args.length) + return constant([]); + return new Parser( - nextParser.expecting, + args[args.length - 1].expecting, function (t) { - if (! lookAheadParser.parse(t)) - return null; - return nextParser.parse(t); + var result; + for(var i = 0, N = args.length; i < N; i++) { + result = args[i].parse(t); + if (! result) + return null; + } + return result; }); }; -var negLookAhead = function (lookAheadParser, nextParser) { - if (! nextParser) - return new Parser( - null, - function (t) { - return lookAheadParser.parse(t) ? null : []; - }); - +// parser must not consume +var not = function (parser) { return new Parser( - nextParser.expecting, + null, function (t) { - if (lookAheadParser.parse(t)) - return null; - return nextParser.parse(t); + return parser.parse(t) ? null : []; }); };