mirror of
https://github.com/meteor/meteor.git
synced 2026-05-02 03:01:46 -04:00
Parser object
This commit is contained in:
@@ -2,90 +2,119 @@
|
||||
|
||||
// XXX unit tests
|
||||
|
||||
// XXX remove unnecessary ParseNode.NILs in lookaheads
|
||||
// XXX SeqParser
|
||||
// XXX find all revalues, see if constant ones are necessary.
|
||||
// API may be confusing if constant affects only non-null.
|
||||
|
||||
// What we don't have from ECMA-262 5.1:
|
||||
// - object literal trailing comma
|
||||
// - object literal get/set
|
||||
|
||||
var parse = function (tokenizer) {
|
||||
var noLineTerminatorHere = describe(
|
||||
var noLineTerminatorHere = new Parser(
|
||||
'noLineTerminator', function (t) {
|
||||
return t.isLineTerminatorHere ? null : [];
|
||||
});
|
||||
|
||||
// Like token, but marks tokens that need to defy the lexer's
|
||||
// heuristic about whether the next '/' is a division or
|
||||
// starts a regex.
|
||||
var preSlashToken = function (text, divisionNotRegex) {
|
||||
var inner = token(text);
|
||||
return new Parser(
|
||||
inner.expecting,
|
||||
function (t) {
|
||||
// temporarily set divisionPermitted,
|
||||
// restoring it if we don't match.
|
||||
var oldValue = t.lexer.divisionPermitted;
|
||||
var result;
|
||||
try {
|
||||
t.lexer.divisionPermitted = divisionNotRegex;
|
||||
result = inner.parse(t);
|
||||
return result;
|
||||
} finally {
|
||||
if (! result)
|
||||
t.lexer.divisionPermitted = oldValue;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// Function that takes one-item arrays to their single item and names other
|
||||
// arrays with `name`. Works on parsers too.
|
||||
var nameIfMultipart = function (name, parser) {
|
||||
var nodeIfMultipart = function (name, arrayParser) {
|
||||
return revalue(
|
||||
parser,
|
||||
arrayParser,
|
||||
function (parts) {
|
||||
if (! parts)
|
||||
return null;
|
||||
return (parts.length === 1) ?
|
||||
parts[0] : named(name, parts);
|
||||
parts[0] : new ParseNode(name, parts);
|
||||
});
|
||||
};
|
||||
|
||||
// These "pointers" allow grammar circularity, i.e. accessing
|
||||
// later parsers from earlier ones.
|
||||
var expressionPtrFunc = function (noIn) {
|
||||
return describe(
|
||||
return new Parser(
|
||||
"expression",
|
||||
function (t) {
|
||||
return expressionFunc(noIn)(t);
|
||||
return expressionFunc(noIn).parse(t);
|
||||
});
|
||||
};
|
||||
var expressionPtr = expressionPtrFunc(false);
|
||||
|
||||
var assignmentExpressionPtrFunc = function (noIn) {
|
||||
return describe(
|
||||
return new Parser(
|
||||
"expression",
|
||||
function (t) {
|
||||
return assignmentExpressionFunc(noIn)(t);
|
||||
return assignmentExpressionFunc(noIn).parse(t);
|
||||
});
|
||||
};
|
||||
var assignmentExpressionPtr = assignmentExpressionPtrFunc(false);
|
||||
|
||||
var functionBodyPtr = describe(
|
||||
var functionBodyPtr = new Parser(
|
||||
"functionBody", function (t) {
|
||||
return functionBody(t);
|
||||
return functionBody.parse(t);
|
||||
});
|
||||
|
||||
var statementPtr = describe(
|
||||
var statementPtr = new Parser(
|
||||
"statement", function (t) {
|
||||
return statement(t);
|
||||
return statement.parse(t);
|
||||
});
|
||||
|
||||
var arrayLiteral =
|
||||
named('array',
|
||||
seq(token('['),
|
||||
unpack(opt(list(token(',')))),
|
||||
unpack(
|
||||
opt(
|
||||
list(
|
||||
describe(
|
||||
'expression',
|
||||
or(assignmentExpressionPtr,
|
||||
// count a peeked-at ']' as an expression
|
||||
// to support elisions at end, e.g.
|
||||
// `[1,2,3,,,,,,]`. Because it's unpacked,
|
||||
// the look-ahead won't show up in the
|
||||
// parse tree.
|
||||
unpack(lookAheadToken(']')))),
|
||||
// list seperator is one or more commas
|
||||
// to support elision
|
||||
unpack(list(token(',')))),
|
||||
lookAheadToken(']'))),
|
||||
token(']')));
|
||||
node('array',
|
||||
seq(token('['),
|
||||
unpack(opt(list(token(',')))),
|
||||
unpack(
|
||||
opt(
|
||||
list(
|
||||
expecting(
|
||||
'expression',
|
||||
or(assignmentExpressionPtr,
|
||||
// count a peeked-at ']' as an expression
|
||||
// to support elisions at end, e.g.
|
||||
// `[1,2,3,,,,,,]`. Because it's unpacked,
|
||||
// the look-ahead won't show up in the
|
||||
// parse tree.
|
||||
unpack(lookAheadToken(']')))),
|
||||
// list seperator is one or more commas
|
||||
// to support elision
|
||||
unpack(list(token(',')))),
|
||||
lookAheadToken(']'))),
|
||||
token(']')));
|
||||
|
||||
var propertyName = describe('propertyName', or(
|
||||
named('idPropName', seq(tokenClass('IDENTIFIER'))),
|
||||
named('numPropName', seq(tokenClass('NUMBER'))),
|
||||
named('strPropName', seq(tokenClass('STRING')))));
|
||||
var nameColonValue = describe(
|
||||
var propertyName = expecting('propertyName', or(
|
||||
node('idPropName', seq(tokenClass('IDENTIFIER'))),
|
||||
node('numPropName', seq(tokenClass('NUMBER'))),
|
||||
node('strPropName', seq(tokenClass('STRING')))));
|
||||
var nameColonValue = expecting(
|
||||
'name:value',
|
||||
named('prop', seq(propertyName, token(':'), assignmentExpressionPtr)));
|
||||
node('prop', seq(propertyName, token(':'), assignmentExpressionPtr)));
|
||||
|
||||
var objectLiteral =
|
||||
named('object',
|
||||
node('object',
|
||||
seq(token('{'),
|
||||
unpack(opt(list(nameColonValue,
|
||||
token(',')), lookAheadToken('}'))),
|
||||
@@ -96,7 +125,7 @@ var parse = function (tokenizer) {
|
||||
return seq(token('function'),
|
||||
(nameRequired ? tokenClass('IDENTIFIER') :
|
||||
or(tokenClass('IDENTIFIER'),
|
||||
revalue(lookAheadToken('('), named('nil', [])))),
|
||||
revalue(lookAheadToken('('), ParseNode.NIL))),
|
||||
token('('),
|
||||
unpack(opt(list(tokenClass('IDENTIFIER'), token(',')),
|
||||
lookAheadToken(')'))),
|
||||
@@ -105,23 +134,23 @@ var parse = function (tokenizer) {
|
||||
unpack(functionBodyPtr),
|
||||
token('}'));
|
||||
};
|
||||
var functionExpression = named('functionExpr',
|
||||
var functionExpression = node('functionExpr',
|
||||
functionFunc(false));
|
||||
|
||||
var primaryOrFunctionExpression =
|
||||
describe('expression',
|
||||
or(named('this', seq(token('this'))),
|
||||
named('identifier', seq(tokenClass('IDENTIFIER'))),
|
||||
named('number', seq(tokenClass('NUMBER'))),
|
||||
named('boolean', seq(tokenClass('BOOLEAN'))),
|
||||
named('null', seq(tokenClass('NULL'))),
|
||||
named('regex', seq(tokenClass('REGEX'))),
|
||||
named('string', seq(tokenClass('STRING'))),
|
||||
named('parens',
|
||||
expecting('expression',
|
||||
or(node('this', seq(token('this'))),
|
||||
node('identifier', seq(tokenClass('IDENTIFIER'))),
|
||||
node('number', seq(tokenClass('NUMBER'))),
|
||||
node('boolean', seq(tokenClass('BOOLEAN'))),
|
||||
node('null', seq(tokenClass('NULL'))),
|
||||
node('regex', seq(tokenClass('REGEX'))),
|
||||
node('string', seq(tokenClass('STRING'))),
|
||||
node('parens',
|
||||
seq(token('('), expressionPtr, token(')'))),
|
||||
arrayLiteral,
|
||||
objectLiteral,
|
||||
functionExpression));
|
||||
arrayLiteral,
|
||||
objectLiteral,
|
||||
functionExpression));
|
||||
|
||||
var dotEnding = seq(token('.'), tokenClass('IDENTIFIER'));
|
||||
var bracketEnding = seq(token('['), expressionPtr, token(']'));
|
||||
@@ -142,14 +171,14 @@ var parse = function (tokenizer) {
|
||||
// call to "return" a valid l-value, as in `foo(bar) = baz`,
|
||||
// though no built-in or user-specifiable call has this property
|
||||
// (it would have to be defined by a browser or other "host").
|
||||
var lhsExpression = describe(
|
||||
var lhsExpression = new Parser(
|
||||
'expression',
|
||||
function (t) {
|
||||
// Accumulate all initial "new" keywords, not yet knowing
|
||||
// if they have a corresponding argument list later.
|
||||
var news = [];
|
||||
var n;
|
||||
while ((n = newKeyword(t)))
|
||||
while ((n = newKeyword.parse(t)))
|
||||
news.push(n);
|
||||
|
||||
// Read the primaryOrFunctionExpression that will be the "core"
|
||||
@@ -158,8 +187,8 @@ var parse = function (tokenizer) {
|
||||
// and .foo add-ons.
|
||||
// if we have 'new' keywords, we are committed and must
|
||||
// match an expression or error.
|
||||
var result = runMaybeRequired(
|
||||
news.length, primaryOrFunctionExpression, t);
|
||||
var result = primaryOrFunctionExpression.parse(
|
||||
t, {required: news.length});
|
||||
if (! result)
|
||||
return null;
|
||||
|
||||
@@ -170,15 +199,15 @@ var parse = function (tokenizer) {
|
||||
var done = false;
|
||||
while (! done) {
|
||||
var r;
|
||||
if ((r = dotEnding(t))) {
|
||||
result = named('dot', [result].concat(r));
|
||||
} else if ((r = bracketEnding(t))) {
|
||||
result = named('bracket', [result].concat(r));
|
||||
} else if ((r = callArgs(t))) {
|
||||
if ((r = dotEnding.parse(t))) {
|
||||
result = new ParseNode('dot', [result].concat(r));
|
||||
} else if ((r = bracketEnding.parse(t))) {
|
||||
result = new ParseNode('bracket', [result].concat(r));
|
||||
} else if ((r = callArgs.parse(t))) {
|
||||
if (news.length)
|
||||
result = named('newcall', [news.pop(), result].concat(r));
|
||||
result = new ParseNode('newcall', [news.pop(), result].concat(r));
|
||||
else
|
||||
result = named('call', [result].concat(r));
|
||||
result = new ParseNode('call', [result].concat(r));
|
||||
} else {
|
||||
done = true;
|
||||
}
|
||||
@@ -188,7 +217,7 @@ var parse = function (tokenizer) {
|
||||
// paren-less constructions (`new Date`) are parsed. We've
|
||||
// already handled `new foo().bar()`, now handle `new new foo().bar`.
|
||||
while (news.length)
|
||||
result = named('new', [news.pop(), result]);
|
||||
result = new ParseNode('new', [news.pop(), result]);
|
||||
|
||||
// mark any LeftHandSideExpression, for the benefit of
|
||||
// assignmentExpression
|
||||
@@ -199,9 +228,9 @@ var parse = function (tokenizer) {
|
||||
|
||||
var postfixToken = token('++ --');
|
||||
var postfixLookahead = lookAheadToken('++ --');
|
||||
var postfixExpression = describe(
|
||||
var postfixExpression = expecting(
|
||||
'expression',
|
||||
nameIfMultipart(
|
||||
nodeIfMultipart(
|
||||
'postfix',
|
||||
seq(lhsExpression,
|
||||
unpack(opt(lookAhead(noLineTerminatorHere,
|
||||
@@ -209,18 +238,19 @@ var parse = function (tokenizer) {
|
||||
postfixToken)))))));
|
||||
var unaryList = opt(list(or(token('delete void typeof'),
|
||||
preSlashToken('++ -- + - ~ !', false))));
|
||||
var unaryExpression = describe(
|
||||
var unaryExpression = new Parser(
|
||||
'expression',
|
||||
function (t) {
|
||||
var unaries = unaryList(t);
|
||||
var unaries = unaryList.parse(t);
|
||||
// if we have unaries, we are committed and
|
||||
// have to match an expression or error.
|
||||
var result = runMaybeRequired(unaries.length, postfixExpression, t);
|
||||
var result = postfixExpression.parse(
|
||||
t, {required: unaries.length});
|
||||
if (! result)
|
||||
return null;
|
||||
|
||||
while (unaries.length)
|
||||
result = named('unary', [unaries.pop(), result]);
|
||||
result = new ParseNode('unary', [unaries.pop(), result]);
|
||||
return result;
|
||||
});
|
||||
|
||||
@@ -250,7 +280,7 @@ var parse = function (tokenizer) {
|
||||
token('|'),
|
||||
token('&&'),
|
||||
token('||')];
|
||||
return describe(
|
||||
return expecting(
|
||||
'expression',
|
||||
binaryLeft(unaryExpression, binaryOps));
|
||||
});
|
||||
@@ -258,9 +288,9 @@ var parse = function (tokenizer) {
|
||||
|
||||
var conditionalExpressionFunc = memoizeBooleanFunc(
|
||||
function (noIn) {
|
||||
return describe(
|
||||
return expecting(
|
||||
'expression',
|
||||
nameIfMultipart(
|
||||
nodeIfMultipart(
|
||||
'ternary',
|
||||
seq(binaryExpressionFunc(noIn), unpack(opt(seq(
|
||||
token('?'),
|
||||
@@ -273,10 +303,10 @@ var parse = function (tokenizer) {
|
||||
|
||||
var assignmentExpressionFunc = memoizeBooleanFunc(
|
||||
function (noIn) {
|
||||
return describe(
|
||||
return new Parser(
|
||||
'expression',
|
||||
function (t) {
|
||||
var r = conditionalExpressionFunc(noIn)(t);
|
||||
var r = conditionalExpressionFunc(noIn).parse(t);
|
||||
if (! r)
|
||||
return null;
|
||||
|
||||
@@ -286,15 +316,16 @@ var parse = function (tokenizer) {
|
||||
// and then fold them up at the end.
|
||||
var parts = [r];
|
||||
var op;
|
||||
while (r.lhs && (op = assignOp(t)))
|
||||
while (r.lhs && (op = assignOp.parse(t)))
|
||||
parts.push(op,
|
||||
runRequired(conditionalExpressionFunc(noIn), t));
|
||||
conditionalExpressionFunc(noIn).parse(
|
||||
t, {required: true}));
|
||||
|
||||
var result = parts.pop();
|
||||
while (parts.length) {
|
||||
op = parts.pop();
|
||||
var lhs = parts.pop();
|
||||
result = named('assignment', [lhs, op, result]);
|
||||
result = new ParseNode('assignment', [lhs, op, result]);
|
||||
}
|
||||
return result;
|
||||
});
|
||||
@@ -303,9 +334,9 @@ var parse = function (tokenizer) {
|
||||
|
||||
var expressionFunc = memoizeBooleanFunc(
|
||||
function (noIn) {
|
||||
return describe(
|
||||
return expecting(
|
||||
'expression',
|
||||
nameIfMultipart(
|
||||
nodeIfMultipart(
|
||||
'comma',
|
||||
list(assignmentExpressionFunc(noIn), token(','))));
|
||||
});
|
||||
@@ -316,31 +347,32 @@ var parse = function (tokenizer) {
|
||||
var statements = list(statementPtr);
|
||||
|
||||
// implements JavaScript's semicolon "insertion" rules
|
||||
var maybeSemicolon = describe(
|
||||
var maybeSemicolon = expecting(
|
||||
'semicolon',
|
||||
or(token(';'),
|
||||
revalue(
|
||||
or(
|
||||
lookAheadToken('}'),
|
||||
lookAheadTokenClass('EOF'),
|
||||
function (t) {
|
||||
return t.isLineTerminatorHere ? [] : null;
|
||||
}), named(';', []))));
|
||||
new Parser(null,
|
||||
function (t) {
|
||||
return t.isLineTerminatorHere ? [] : null;
|
||||
})), new ParseNode(';', []))));
|
||||
|
||||
var expressionStatement = named(
|
||||
var expressionStatement = node(
|
||||
'expressionStmnt',
|
||||
negLookAhead(
|
||||
or(lookAheadToken('{'), lookAheadToken('function')),
|
||||
seq(expression,
|
||||
describe('semicolon',
|
||||
or(maybeSemicolon,
|
||||
// allow presence of colon to terminate
|
||||
// statement legally, for the benefit of
|
||||
// expressionOrLabelStatement. Basically assume
|
||||
// an implicit semicolon. This
|
||||
// is safe because a colon can never legally
|
||||
// follow a semicolon anyway.
|
||||
revalue(lookAheadToken(':'), named(';', [])))))));
|
||||
expecting('semicolon',
|
||||
or(maybeSemicolon,
|
||||
// allow presence of colon to terminate
|
||||
// statement legally, for the benefit of
|
||||
// expressionOrLabelStatement. Basically assume
|
||||
// an implicit semicolon. This
|
||||
// is safe because a colon can never legally
|
||||
// follow a semicolon anyway.
|
||||
revalue(lookAheadToken(':'), new ParseNode(';', [])))))));
|
||||
|
||||
// it's hard to parse statement labels, as in
|
||||
// `foo: x = 1`, because we can't tell from the
|
||||
@@ -350,44 +382,46 @@ var parse = function (tokenizer) {
|
||||
// then rewrites the result if it is an identifier
|
||||
// followed by a colon.
|
||||
var labelColonAndStatement = seq(token(':'), statementPtr);
|
||||
var noColon = describe(
|
||||
var noColon = expecting(
|
||||
'semicolon',
|
||||
negLookAhead(lookAheadToken(':')));
|
||||
var expressionOrLabelStatement = function (t) {
|
||||
var exprStmnt = expressionStatement(t);
|
||||
if (! exprStmnt)
|
||||
return null;
|
||||
var expressionOrLabelStatement = new Parser(
|
||||
null,
|
||||
function (t) {
|
||||
var exprStmnt = expressionStatement.parse(t);
|
||||
if (! exprStmnt)
|
||||
return null;
|
||||
|
||||
var expr = exprStmnt.children[0];
|
||||
var maybeSemi = exprStmnt.children[1];
|
||||
if (expr.name !== 'identifier' ||
|
||||
! (maybeSemi instanceof ParseNode)) {
|
||||
// We either have a non-identifier expression or a present
|
||||
// semicolon. This is not a label.
|
||||
//
|
||||
// Fail now if we are looking at a colon, causing an
|
||||
// error message on input like `1+1:` of the same kind
|
||||
// you'd get without statement label parsing.
|
||||
runRequired(noColon, t);
|
||||
return exprStmnt;
|
||||
}
|
||||
var expr = exprStmnt.children[0];
|
||||
var maybeSemi = exprStmnt.children[1];
|
||||
if (expr.name !== 'identifier' ||
|
||||
! (maybeSemi instanceof ParseNode)) {
|
||||
// We either have a non-identifier expression or a present
|
||||
// semicolon. This is not a label.
|
||||
//
|
||||
// Fail now if we are looking at a colon, causing an
|
||||
// error message on input like `1+1:` of the same kind
|
||||
// you'd get without statement label parsing.
|
||||
noColon.parse(t, {required: true});
|
||||
return exprStmnt;
|
||||
}
|
||||
|
||||
var rest = labelColonAndStatement(t);
|
||||
if (! rest)
|
||||
return exprStmnt;
|
||||
var rest = labelColonAndStatement.parse(t);
|
||||
if (! rest)
|
||||
return exprStmnt;
|
||||
|
||||
return named('labelStmnt',
|
||||
[expr.children[0]].concat(rest));
|
||||
};
|
||||
return new ParseNode('labelStmnt',
|
||||
[expr.children[0]].concat(rest));
|
||||
});
|
||||
|
||||
var emptyStatement = named('emptyStmnt', seq(token(';'))); // not maybeSemicolon
|
||||
var emptyStatement = node('emptyStmnt', seq(token(';'))); // not maybeSemicolon
|
||||
|
||||
var blockStatement = describe('block', named('blockStmnt', seq(
|
||||
var blockStatement = expecting('block', node('blockStmnt', seq(
|
||||
token('{'), unpack(opt(statements, lookAheadToken('}'))),
|
||||
token('}'))));
|
||||
|
||||
var varDeclFunc = memoizeBooleanFunc(function (noIn) {
|
||||
return named(
|
||||
return node(
|
||||
'varDecl',
|
||||
seq(tokenClass('IDENTIFIER'),
|
||||
unpack(opt(seq(token('='),
|
||||
@@ -395,7 +429,7 @@ var parse = function (tokenizer) {
|
||||
});
|
||||
var varDecl = varDeclFunc(false);
|
||||
|
||||
var variableStatement = named(
|
||||
var variableStatement = node(
|
||||
'varStmnt',
|
||||
seq(token('var'), unpack(list(varDecl, token(','))),
|
||||
maybeSemicolon));
|
||||
@@ -404,28 +438,28 @@ var parse = function (tokenizer) {
|
||||
// beginning with a regex literal.
|
||||
var closeParenBeforeStatement = preSlashToken(')', false);
|
||||
|
||||
var ifStatement = named(
|
||||
var ifStatement = node(
|
||||
'ifStmnt',
|
||||
seq(token('if'), token('('), expression,
|
||||
closeParenBeforeStatement, statementPtr,
|
||||
unpack(opt(seq(token('else'), statementPtr)))));
|
||||
|
||||
var secondThirdClauses = describe(
|
||||
var secondThirdClauses = expecting(
|
||||
'semicolon',
|
||||
lookAhead(lookAheadToken(';'),
|
||||
seq(
|
||||
describe('semicolon', token(';')),
|
||||
opt(expressionPtr, revalue(lookAheadToken(';'), named('nil', []))),
|
||||
describe('semicolon', token(';')),
|
||||
opt(expressionPtr, revalue(lookAheadToken(')'), named('nil', []))))));
|
||||
expecting('semicolon', token(';')),
|
||||
opt(expressionPtr, revalue(lookAheadToken(';'), ParseNode.NIL)),
|
||||
expecting('semicolon', token(';')),
|
||||
opt(expressionPtr, revalue(lookAheadToken(')'), ParseNode.NIL)))));
|
||||
var inExpr = seq(token('in'), expression);
|
||||
var inExprExpectingSemi = describe('semicolon',
|
||||
seq(token('in'), expression));
|
||||
var forSpec = revalue(named(
|
||||
var inExprExpectingSemi = expecting('semicolon',
|
||||
seq(token('in'), expression));
|
||||
var forSpec = revalue(node(
|
||||
'forSpec',
|
||||
or(seq(token('var'),
|
||||
varDeclFunc(true),
|
||||
describe(
|
||||
expecting(
|
||||
'commaOrIn',
|
||||
or(unpack(inExpr),
|
||||
unpack(seq(
|
||||
@@ -437,76 +471,78 @@ var parse = function (tokenizer) {
|
||||
// get the case where the first clause is empty out of the way.
|
||||
// the lookAhead's return value is the empty placeholder for the
|
||||
// missing expression.
|
||||
seq(revalue(lookAheadToken(';'), named('nil', [])), unpack(secondThirdClauses)),
|
||||
seq(revalue(lookAheadToken(';'), ParseNode.NIL), unpack(secondThirdClauses)),
|
||||
// custom parser the non-var case because we have to
|
||||
// read the first expression before we know if there's
|
||||
// an "in".
|
||||
function (t) {
|
||||
var firstExpr = expressionFunc(true)(t);
|
||||
if (! firstExpr)
|
||||
return null;
|
||||
var rest = secondThirdClauses(t);
|
||||
if (! rest) {
|
||||
// we need a left-hand-side expression for a
|
||||
// `for (x in y)` loop.
|
||||
if (! firstExpr.lhs)
|
||||
throw parseError(t, secondThirdClauses);
|
||||
// if we don't see 'in' at this point, it's probably
|
||||
// a missing semicolon
|
||||
rest = runRequired(inExprExpectingSemi, t);
|
||||
}
|
||||
new Parser(
|
||||
null,
|
||||
function (t) {
|
||||
var firstExpr = expressionFunc(true).parse(t);
|
||||
if (! firstExpr)
|
||||
return null;
|
||||
var rest = secondThirdClauses.parse(t);
|
||||
if (! rest) {
|
||||
// we need a left-hand-side expression for a
|
||||
// `for (x in y)` loop.
|
||||
if (! firstExpr.lhs)
|
||||
throw parseError(t, secondThirdClauses);
|
||||
// if we don't see 'in' at this point, it's probably
|
||||
// a missing semicolon
|
||||
rest = inExprExpectingSemi.parse(t, {required: true});
|
||||
}
|
||||
|
||||
return [firstExpr].concat(rest);
|
||||
})),
|
||||
function (clauses) {
|
||||
// There are four kinds of for-loop, and we call the
|
||||
// part between the parens one of forSpec, forVarSpec,
|
||||
// forInSpec, and forVarInSpec. Having parsed it
|
||||
// already, we rewrite the node name based on how
|
||||
// many items came out. forIn and forVarIn always
|
||||
// have 3 and 4 items respectively. for has 5
|
||||
// (the optional expressions are present as nils).
|
||||
// forVar has 6 or more, because `for(var x;;);`
|
||||
// produces [`var` `x` `;` nil `;` nil].
|
||||
if (! clauses)
|
||||
return null;
|
||||
var numChildren = clauses.children.length;
|
||||
if (numChildren === 3)
|
||||
return new ParseNode('forInSpec', clauses.children);
|
||||
else if (numChildren === 4)
|
||||
return new ParseNode('forVarInSpec', clauses.children);
|
||||
else if (numChildren >= 6)
|
||||
return new ParseNode('forVarSpec', clauses.children);
|
||||
return clauses;
|
||||
});
|
||||
return [firstExpr].concat(rest);
|
||||
}))),
|
||||
function (clauses) {
|
||||
// There are four kinds of for-loop, and we call the
|
||||
// part between the parens one of forSpec, forVarSpec,
|
||||
// forInSpec, and forVarInSpec. Having parsed it
|
||||
// already, we rewrite the node name based on how
|
||||
// many items came out. forIn and forVarIn always
|
||||
// have 3 and 4 items respectively. for has 5
|
||||
// (the optional expressions are present as nils).
|
||||
// forVar has 6 or more, because `for(var x;;);`
|
||||
// produces [`var` `x` `;` nil `;` nil].
|
||||
if (! clauses)
|
||||
return null;
|
||||
var numChildren = clauses.children.length;
|
||||
if (numChildren === 3)
|
||||
return new ParseNode('forInSpec', clauses.children);
|
||||
else if (numChildren === 4)
|
||||
return new ParseNode('forVarInSpec', clauses.children);
|
||||
else if (numChildren >= 6)
|
||||
return new ParseNode('forVarSpec', clauses.children);
|
||||
return clauses;
|
||||
});
|
||||
|
||||
var iterationStatement = or(
|
||||
named('doStmnt', seq(token('do'), statementPtr, token('while'),
|
||||
node('doStmnt', seq(token('do'), statementPtr, token('while'),
|
||||
token('('), expression, token(')'),
|
||||
maybeSemicolon)),
|
||||
named('whileStmnt', seq(token('while'), token('('), expression,
|
||||
node('whileStmnt', seq(token('while'), token('('), expression,
|
||||
closeParenBeforeStatement, statementPtr)),
|
||||
// semicolons must be real, not maybeSemicolons
|
||||
named('forStmnt', seq(
|
||||
node('forStmnt', seq(
|
||||
token('for'), token('('), forSpec, closeParenBeforeStatement,
|
||||
statementPtr)));
|
||||
|
||||
var returnStatement = named(
|
||||
var returnStatement = node(
|
||||
'returnStmnt',
|
||||
seq(token('return'), or(
|
||||
lookAhead(noLineTerminatorHere, expression), constant(named('nil', []))),
|
||||
lookAhead(noLineTerminatorHere, expression), constant(ParseNode.NIL)),
|
||||
maybeSemicolon));
|
||||
var continueStatement = named(
|
||||
var continueStatement = node(
|
||||
'continueStmnt',
|
||||
seq(token('continue'), or(
|
||||
lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))),
|
||||
lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)),
|
||||
maybeSemicolon));
|
||||
var breakStatement = named(
|
||||
var breakStatement = node(
|
||||
'breakStmnt',
|
||||
seq(token('break'), or(
|
||||
lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))),
|
||||
lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)),
|
||||
maybeSemicolon));
|
||||
var throwStatement = named(
|
||||
var throwStatement = node(
|
||||
'throwStmnt',
|
||||
seq(token('throw'),
|
||||
lookAhead(revalue(noLineTerminatorHere,
|
||||
@@ -519,23 +555,23 @@ var parse = function (tokenizer) {
|
||||
}), expression),
|
||||
maybeSemicolon));
|
||||
|
||||
var withStatement = named(
|
||||
var withStatement = node(
|
||||
'withStmnt',
|
||||
seq(token('with'), token('('), expression, closeParenBeforeStatement,
|
||||
statementPtr));
|
||||
|
||||
var switchCase = named(
|
||||
var switchCase = node(
|
||||
'case',
|
||||
seq(token('case'), expression, token(':'),
|
||||
unpack(opt(statements, or(lookAheadToken('}'),
|
||||
lookAheadToken('case default'))))));
|
||||
var switchDefault = named(
|
||||
var switchDefault = node(
|
||||
'default',
|
||||
seq(token('default'), token(':'),
|
||||
unpack(opt(statements, or(lookAheadToken('}'),
|
||||
lookAheadToken('case'))))));
|
||||
|
||||
var switchStatement = named(
|
||||
var switchStatement = node(
|
||||
'switchStmnt',
|
||||
seq(token('switch'), token('('), expression, token(')'),
|
||||
token('{'), unpack(opt(list(switchCase),
|
||||
@@ -545,70 +581,70 @@ var parse = function (tokenizer) {
|
||||
unpack(opt(list(switchCase)))))),
|
||||
token('}')));
|
||||
|
||||
var catchFinally = describe(
|
||||
var catchFinally = expecting(
|
||||
'catch',
|
||||
lookAhead(lookAheadToken('catch finally'),
|
||||
seq(
|
||||
or(named(
|
||||
or(node(
|
||||
'catch',
|
||||
seq(token('catch'), token('('), tokenClass('IDENTIFIER'),
|
||||
token(')'), blockStatement)),
|
||||
constant(named('nil', []))),
|
||||
or(named(
|
||||
constant(ParseNode.NIL)),
|
||||
or(node(
|
||||
'finally',
|
||||
seq(token('finally'), blockStatement)),
|
||||
constant(named('nil', []))))));
|
||||
var tryStatement = named(
|
||||
constant(ParseNode.NIL)))));
|
||||
var tryStatement = node(
|
||||
'tryStmnt',
|
||||
seq(token('try'), blockStatement, unpack(catchFinally)));
|
||||
var debuggerStatement = named(
|
||||
var debuggerStatement = node(
|
||||
'debuggerStmnt', seq(token('debugger'), maybeSemicolon));
|
||||
|
||||
var statement = describe('statement',
|
||||
or(expressionOrLabelStatement,
|
||||
emptyStatement,
|
||||
blockStatement,
|
||||
variableStatement,
|
||||
ifStatement,
|
||||
iterationStatement,
|
||||
returnStatement,
|
||||
continueStatement,
|
||||
breakStatement,
|
||||
withStatement,
|
||||
switchStatement,
|
||||
throwStatement,
|
||||
tryStatement,
|
||||
debuggerStatement));
|
||||
var statement = expecting('statement',
|
||||
or(expressionOrLabelStatement,
|
||||
emptyStatement,
|
||||
blockStatement,
|
||||
variableStatement,
|
||||
ifStatement,
|
||||
iterationStatement,
|
||||
returnStatement,
|
||||
continueStatement,
|
||||
breakStatement,
|
||||
withStatement,
|
||||
switchStatement,
|
||||
throwStatement,
|
||||
tryStatement,
|
||||
debuggerStatement));
|
||||
|
||||
// PROGRAM
|
||||
|
||||
var functionDecl = named('functionDecl',
|
||||
var functionDecl = node('functionDecl',
|
||||
functionFunc(true));
|
||||
|
||||
var sourceElement = or(statement, functionDecl);
|
||||
var sourceElements = list(sourceElement);
|
||||
|
||||
var functionBody = describe('functionBody',
|
||||
opt(sourceElements,
|
||||
lookAheadToken('}')));
|
||||
var functionBody = expecting('functionBody',
|
||||
opt(sourceElements,
|
||||
lookAheadToken('}')));
|
||||
|
||||
var program = named('program',
|
||||
var program = node('program',
|
||||
seq(unpack(opt(sourceElements)),
|
||||
// we rely on the fact that opt(sourceElements)
|
||||
// will never fail, and non-first arguments
|
||||
// to seq are required to succeed -- meaning
|
||||
// this parser will never fail without throwing
|
||||
// a parse error.
|
||||
describe('statement',
|
||||
revalue(lookAheadTokenClass("EOF"),
|
||||
function (v, t) {
|
||||
if (! v)
|
||||
return null;
|
||||
// eat the ending "EOF" so that
|
||||
// our position is updated
|
||||
t.consume();
|
||||
return unpack([]);
|
||||
}))));
|
||||
expecting('statement',
|
||||
revalue(lookAheadTokenClass("EOF"),
|
||||
function (v, t) {
|
||||
if (! v)
|
||||
return null;
|
||||
// eat the ending "EOF" so that
|
||||
// our position is updated
|
||||
t.consume();
|
||||
return unpack([]);
|
||||
}))));
|
||||
|
||||
return program(tokenizer);
|
||||
return program.parse(tokenizer);
|
||||
};
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
///// TOKENIZER AND PARSER COMBINATORS
|
||||
|
||||
// XXX make Parser object with parse method?
|
||||
// XXX rework describe, call "expecting"?
|
||||
// XXX track line/col position, for errors and maybe token info
|
||||
// XXX unit tests
|
||||
|
||||
@@ -17,6 +16,26 @@ var ParseNode = function (name, children) {
|
||||
throw new Error("Expected array in new ParseNode(" + name + ", ...)");
|
||||
};
|
||||
|
||||
ParseNode.NIL = new ParseNode('nil', []);
|
||||
|
||||
var Parser = function (expecting, runFunc) {
|
||||
this.expecting = expecting;
|
||||
this._run = runFunc;
|
||||
};
|
||||
|
||||
_.extend(Parser.prototype, {
|
||||
parse: function (t, options) {
|
||||
var result = this._run(t);
|
||||
|
||||
if (options) {
|
||||
if (options.required && ! result)
|
||||
throw parseError(t, this);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
});
|
||||
|
||||
Tokenizer = function (codeOrLexer) {
|
||||
// XXX rethink codeOrLexer later
|
||||
this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer :
|
||||
@@ -66,16 +85,17 @@ _.extend(Tokenizer.prototype, {
|
||||
// A parser that consume()s has to succeed.
|
||||
// Similarly, a parser that fails can't have consumed.
|
||||
|
||||
// mutates the parser; don't describe an existing parser.
|
||||
var describe = function (description, parser) {
|
||||
parser.description = description;
|
||||
// mutates the parser
|
||||
var expecting = function (expecting, parser) {
|
||||
parser.expecting = expecting;
|
||||
return parser;
|
||||
};
|
||||
|
||||
// Call this as `throw parseError(...)`.
|
||||
// `expected` is a parser, `after` is a string.
|
||||
var parseError = function (t, expected, found) {
|
||||
var str = (expected.description ? "Expected " + expected.description :
|
||||
var parseError = function (t, expectedParser, found) {
|
||||
var str = (expectedParser.expecting ? "Expected " +
|
||||
expectedParser.expecting :
|
||||
// all parsers that might error should have descriptions,
|
||||
// but just in case:
|
||||
"Unexpected token");
|
||||
@@ -89,14 +109,14 @@ var parseError = function (t, expected, found) {
|
||||
|
||||
///// TERMINAL PARSER CONSTRUCTORS
|
||||
|
||||
var _tokenClassImpl = function (type, text, dontConsume) {
|
||||
var _tokenClassImpl = function (type, text, onlyLook) {
|
||||
var textSet = (text ? makeSet(text.split(' ')) : null);
|
||||
var description = (text ? text.split(' ').join(', ') : type);
|
||||
return describe(
|
||||
description,
|
||||
var expecting = (text ? text.split(' ').join(', ') : type);
|
||||
return new Parser(
|
||||
expecting,
|
||||
function (t) {
|
||||
if (t.peekType == type && (!text || textSet[t.peekText])) {
|
||||
if (dontConsume)
|
||||
if (onlyLook)
|
||||
return [];
|
||||
var ret = {text: t.peekText, pos: t.pos};
|
||||
t.consume();
|
||||
@@ -106,10 +126,10 @@ var _tokenClassImpl = function (type, text, dontConsume) {
|
||||
});
|
||||
};
|
||||
|
||||
var _tokenImpl = function (text, dontConsume) {
|
||||
var _tokenImpl = function (text, onlyLook) {
|
||||
if (/\w/.test(text))
|
||||
return _tokenClassImpl('KEYWORD', text, dontConsume);
|
||||
return _tokenClassImpl('PUNCTUATION', text, dontConsume);
|
||||
return _tokenClassImpl('KEYWORD', text, onlyLook);
|
||||
return _tokenClassImpl('PUNCTUATION', text, onlyLook);
|
||||
};
|
||||
|
||||
var tokenClass = function (type, text) {
|
||||
@@ -122,28 +142,6 @@ var token = function (text) {
|
||||
return _tokenImpl(text);
|
||||
};
|
||||
|
||||
// Like token, but marks tokens that need to defy the lexer's
|
||||
// heuristic about whether the next '/' is a division or
|
||||
// starts a regex.
|
||||
var preSlashToken = function (text, divisionNotRegex) {
|
||||
var impl = _tokenImpl(text);
|
||||
return describe(impl.description,
|
||||
function (t) {
|
||||
// temporarily set divisionPermitted,
|
||||
// restoring it if we don't match.
|
||||
var oldValue = t.lexer.divisionPermitted;
|
||||
var result;
|
||||
try {
|
||||
t.lexer.divisionPermitted = divisionNotRegex;
|
||||
result = impl(t);
|
||||
return result;
|
||||
} finally {
|
||||
if (! result)
|
||||
t.lexer.divisionPermitted = oldValue;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// NON-CONSUMING PARSER CONSTRUCTORS
|
||||
|
||||
var lookAheadTokenClass = function (type, text) {
|
||||
@@ -156,45 +154,28 @@ var lookAheadToken = function (text) {
|
||||
|
||||
///// NON-TERMINAL PARSER CONSTRUCTORS
|
||||
|
||||
// run parser(tokenizer) and assert it matches
|
||||
var runRequired = function (parser, tokenizer) {
|
||||
return revalue(
|
||||
tokenizer ? parser(tokenizer) : parser,
|
||||
function (v, t) {
|
||||
if (! v)
|
||||
throw parseError(t || tokenizer, parser);
|
||||
return v;
|
||||
});
|
||||
};
|
||||
|
||||
var runMaybeRequired = function (require, parser, tokenizer) {
|
||||
return require ? runRequired(parser, tokenizer) : parser(tokenizer);
|
||||
};
|
||||
|
||||
// Polymorphic in parsers and results; an experiment.
|
||||
var named = function (name, parserOrResult) {
|
||||
return describe(
|
||||
name,
|
||||
revalue(
|
||||
parserOrResult,
|
||||
function (value) {
|
||||
if (! value)
|
||||
return null;
|
||||
return new ParseNode(name, Array.prototype.slice.call(value));
|
||||
}));
|
||||
var node = function (name, childrenParser) {
|
||||
return new Parser(name, function (t) {
|
||||
var children = childrenParser.parse(t);
|
||||
if (! children)
|
||||
return null;
|
||||
return new ParseNode(name, children);
|
||||
});
|
||||
};
|
||||
|
||||
var or = function (/*parsers*/) {
|
||||
var args = arguments;
|
||||
return function (t) {
|
||||
var result;
|
||||
for(var i = 0, N = args.length; i < N; i++) {
|
||||
result = args[i](t);
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return new Parser(
|
||||
null,
|
||||
function (t) {
|
||||
var result;
|
||||
for(var i = 0, N = args.length; i < N; i++) {
|
||||
result = args[i].parse(t);
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
};
|
||||
|
||||
// Parses a left-recursive expression with zero or more occurrences
|
||||
@@ -220,18 +201,18 @@ var binaryLeft = function (termParser, opParser) {
|
||||
}
|
||||
}
|
||||
|
||||
return describe(
|
||||
termParser.description,
|
||||
return new Parser(
|
||||
termParser.expecting,
|
||||
function (t) {
|
||||
var result = termParser(t);
|
||||
var result = termParser.parse(t);
|
||||
if (! result)
|
||||
return null;
|
||||
|
||||
var op;
|
||||
while ((op = opParser(t))) {
|
||||
result = named(
|
||||
while ((op = opParser.parse(t))) {
|
||||
result = new ParseNode(
|
||||
'binary',
|
||||
[result, op, runRequired(termParser, t, op)]);
|
||||
[result, op, termParser.parse(t, {required: true})]);
|
||||
}
|
||||
return result;
|
||||
});
|
||||
@@ -250,25 +231,24 @@ var list = function (itemParser, sepParser) {
|
||||
else
|
||||
array.push(newThing);
|
||||
};
|
||||
return describe(
|
||||
itemParser.description,
|
||||
return new Parser(
|
||||
itemParser.expecting,
|
||||
function (t) {
|
||||
var result = [];
|
||||
var firstItem = itemParser(t);
|
||||
var firstItem = itemParser.parse(t);
|
||||
if (! firstItem)
|
||||
return null;
|
||||
push(result, firstItem);
|
||||
|
||||
if (sepParser) {
|
||||
var sep;
|
||||
while ((sep = sepParser(t))) {
|
||||
while ((sep = sepParser.parse(t))) {
|
||||
push(result, sep);
|
||||
push(result, runRequired(itemParser, t,
|
||||
sep.unpack ? sep[sep.length - 1] : sep));
|
||||
push(result, itemParser.parse(t, {required: true}));
|
||||
}
|
||||
} else {
|
||||
var item;
|
||||
while ((item = itemParser(t)))
|
||||
while ((item = itemParser.parse(t)))
|
||||
push(result, item);
|
||||
}
|
||||
return result;
|
||||
@@ -278,20 +258,17 @@ var list = function (itemParser, sepParser) {
|
||||
var seq = function (/*parsers*/) {
|
||||
var args = arguments;
|
||||
if (! args.length)
|
||||
return describe("(empty)",
|
||||
function (t) { return []; });
|
||||
return new Parser("(empty)",
|
||||
function (t) { return []; });
|
||||
|
||||
var description = args[0].description;
|
||||
for (var i = 1; i < args.length; i++)
|
||||
description += " " + args[i].description;
|
||||
return describe(
|
||||
description,
|
||||
return new Parser(
|
||||
args[0].expecting,
|
||||
function (t) {
|
||||
var result = [];
|
||||
for (var i = 0, N = args.length; i < N; i++) {
|
||||
// first item in sequence can fail, and we
|
||||
// fail (without error); after that, error on failure
|
||||
var r = runMaybeRequired(i > 0, args[i], t);
|
||||
var r = args[i].parse(t, {required: i > 0});
|
||||
if (! r)
|
||||
return null;
|
||||
|
||||
@@ -304,8 +281,12 @@ var seq = function (/*parsers*/) {
|
||||
});
|
||||
};
|
||||
|
||||
var unpack = function (arrayParser) {
|
||||
return revalue(arrayParser, function (v) {
|
||||
var unpack = function (arrayOrParser) {
|
||||
if (isArray(arrayOrParser)) {
|
||||
arrayOrParser.unpack = true;
|
||||
return arrayOrParser;
|
||||
}
|
||||
return revalue(arrayOrParser, function (v) {
|
||||
if (v && isArray(v))
|
||||
v.unpack = true;
|
||||
return v;
|
||||
@@ -314,35 +295,36 @@ var unpack = function (arrayParser) {
|
||||
|
||||
// lookAhead parser must never consume
|
||||
var lookAhead = function (lookAheadParser, nextParser) {
|
||||
return describe(
|
||||
nextParser.description,
|
||||
return new Parser(
|
||||
nextParser.expecting,
|
||||
function (t) {
|
||||
if (! lookAheadParser(t))
|
||||
if (! lookAheadParser.parse(t))
|
||||
return null;
|
||||
return nextParser(t);
|
||||
return nextParser.parse(t);
|
||||
});
|
||||
};
|
||||
|
||||
var negLookAhead = function (lookAheadParser, nextParser) {
|
||||
if (! nextParser)
|
||||
return function (t) {
|
||||
return lookAheadParser(t) ? null : [];
|
||||
};
|
||||
return new Parser(
|
||||
null,
|
||||
function (t) {
|
||||
return lookAheadParser.parse(t) ? null : [];
|
||||
});
|
||||
|
||||
return describe(
|
||||
nextParser.description,
|
||||
return new Parser(
|
||||
nextParser.expecting,
|
||||
function (t) {
|
||||
if (lookAheadParser(t))
|
||||
if (lookAheadParser.parse(t))
|
||||
return null;
|
||||
return nextParser(t);
|
||||
return nextParser.parse(t);
|
||||
});
|
||||
};
|
||||
|
||||
// parser that looks at nothing and returns result
|
||||
var constant = function (result) {
|
||||
// no description
|
||||
return function (t) {
|
||||
return result;
|
||||
};
|
||||
return new Parser(null,
|
||||
function (t) { return result; });
|
||||
};
|
||||
|
||||
// afterLookAhead allows the parser to fail rather than
|
||||
@@ -356,14 +338,13 @@ var constant = function (result) {
|
||||
// instead of "Expected ;" when the optional expression
|
||||
// turns out to be an illegal `var`.
|
||||
var opt = function (parser, afterLookAhead) {
|
||||
return describe(parser.description,
|
||||
or(parser, afterLookAhead ? afterLookAhead : seq()));
|
||||
return expecting(parser.expecting,
|
||||
or(parser, afterLookAhead ? afterLookAhead : seq()));
|
||||
};
|
||||
|
||||
// note: valueTransformFunc gets the tokenizer as a second argument
|
||||
// if it's called on a parser. This func is allowed to then
|
||||
// run more parsers.
|
||||
var revalue = function (parserOrValue, valueTransformFunc) {
|
||||
// note: valueTransformFunc gets the tokenizer as a second argument.
|
||||
// This func is allowed to then run more parsers.
|
||||
var revalue = function (parser, valueTransformFunc) {
|
||||
if (typeof valueTransformFunc !== 'function') {
|
||||
var value = valueTransformFunc;
|
||||
valueTransformFunc = function (v) {
|
||||
@@ -371,12 +352,9 @@ var revalue = function (parserOrValue, valueTransformFunc) {
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof parserOrValue === 'function')
|
||||
// it's a parser
|
||||
return describe(parserOrValue.description,
|
||||
function (t) {
|
||||
return valueTransformFunc(parserOrValue(t), t);
|
||||
});
|
||||
else
|
||||
return valueTransformFunc(parserOrValue);
|
||||
return new Parser(
|
||||
parser.expecting,
|
||||
function (t) {
|
||||
return valueTransformFunc(parser.parse(t), t);
|
||||
});
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user