ParseNode.stringify and unstringify

This commit is contained in:
David Greenspan
2012-09-12 15:15:05 -07:00
parent f9dba29b2d
commit 4ee4e2451c
3 changed files with 102 additions and 80 deletions

View File

@@ -28,13 +28,26 @@ var makeSet = function (array) {
};
JSParser = function (code) {
JSParser = function (code, options) {
this.lexer = new JSLexer(code);
this.oldToken = null;
this.newToken = null;
this.pos = 0;
this.isLineTerminatorHere = false;
options = options || {};
// pass {tokens:'strings'} to get strings for
// tokens instead of token objects
if (options.tokens === 'strings') {
this.tokenFunc = function (tok) {
return tok.text();
};
} else {
this.tokenFunc = function (tok) {
return tok;
};
}
this.consumeNewToken();
};
@@ -71,6 +84,8 @@ JSParser.prototype.getParseError = function (expecting, found) {
};
JSParser.prototype.getSyntaxTree = function () {
var self = this;
var NIL = new ParseNode('nil', []);
var booleanFlaggedParser = function (parserConstructFunc) {
@@ -106,7 +121,7 @@ JSParser.prototype.getSyntaxTree = function () {
function (t) {
if (t.newToken.type() === type && textSet[t.newToken.text()]) {
t.consumeNewToken();
return t.oldToken;
return self.tokenFunc(t.oldToken);
}
return null;
});
@@ -116,7 +131,7 @@ JSParser.prototype.getSyntaxTree = function () {
return new Parser(type, function (t) {
if (t.newToken.type() === type) {
t.consumeNewToken();
return t.oldToken;
return self.tokenFunc(t.oldToken);
}
return null;
});

View File

@@ -61,83 +61,12 @@ var allNodeNames = [
var allNodeNamesSet = {};
_.each(allNodeNames, function (n) { allNodeNamesSet[n] = true; });
// The "tree string" format is a simple format for representing syntax trees.
//
// For example, the parse of `x++;` is written as:
// "program(expressionStmnt(postfix(identifier(x) ++) ;))"
//
// A Node is written as "name(item1 item2 item3)", with additional whitespace
// allowed anywhere between the name, parentheses, and items.
//
// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or
// backticks. If they do, they can be written enclosed in backticks. To escape
// a backtick within backticks, double it.
//
// `stringifyTree` generates "canonical" tree strings, which have no extra escaping
// or whitespace, just one space between items in a Node.
var parseTreeString = function (str) {
var results = [];
var ptrStack = [];
var ptr = results;
_.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) {
switch (txt.charAt(0)) {
case '(':
if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string"))
throw new Error("Nameless node in " + str);
var newArray = [ptr.pop()];
ptr.push(newArray);
ptrStack.push(ptr);
ptr = newArray;
break;
case ')':
ptr = ptrStack.pop();
var nodeArray = ptr.pop();
ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1)));
break;
case '`':
if (txt.length === 1)
throw new Error("Mismatched ` in " + str);
ptr.push(txt.slice(1, -1).replace(/``/g, '`'));
break;
default:
ptr.push(txt);
break;
}
if (results.length > 1)
throw new Error("Not expecting " + txt + " in " + str);
});
if (ptr !== results)
throw new Error("Mismatched parentheses in " + str);
return results[0];
};
var escapeTokenString = function (str) {
if (/[\s()`]/.test(str))
return '`' + str.replace(/`/g, '``') + '`';
else
return str;
};
var stringifyTree = function (tree) {
if (tree instanceof ParseNode)
return (escapeTokenString(tree.name) + '(' +
_.map(tree.children, stringifyTree).join(' ') +
')');
// Treat a token object or string as a token.
if (tree.text)
tree = tree.text();
return escapeTokenString(tree);
};
var parseToTreeString = function (code) {
return stringifyTree(new JSParser(code).getSyntaxTree());
};
var makeTester = function (test) {
return {
// Parse code and make sure it matches expectedTreeString.
goodParse: function (code, expectedTreeString, regexTokenHints) {
var expectedTree = parseTreeString(expectedTreeString);
var expectedTree = ParseNode.unstringify(expectedTreeString);
// first use lexer to collect all tokens
var lexer = new JSLexer(code);
@@ -189,8 +118,8 @@ var makeTester = function (test) {
test.equal(parser.pos, code.length);
test.equal(stringifyTree(actualTree),
stringifyTree(expectedTree), code);
test.equal(ParseNode.stringify(actualTree),
ParseNode.stringify(expectedTree), code);
},
// Takes code with part of it surrounding with backticks.
// Removes the two backtick characters, tries to parse the code,
@@ -579,9 +508,6 @@ Tinytest.add("jsparse - syntax forms", function (test) {
});
});
// Generating a trial:
//(function (s) { return JSON.stringify([s, parseToTreeString(s)]); })('...')
Tinytest.add("jsparse - bad parses", function (test) {
var tester = makeTester(test);
var trials = [

View File

@@ -16,6 +16,87 @@ ParseNode = function (name, children) {
throw new Error("Expected array in new ParseNode(" + name + ", ...)");
};
ParseNode.prototype.stringify = function () {
return ParseNode.stringify(this);
};
var escapeTokenString = function (str) {
if (/[\s()`]/.test(str))
return '`' + str.replace(/`/g, '``') + '`';
else if (! str)
return '``';
else
return str;
};
// The "tree string" format is a simple format for representing syntax trees.
//
// For example, the parse of `x++;` is written as:
// "program(expressionStmnt(postfix(identifier(x) ++) ;))"
//
// A Node is written as "name(item1 item2 item3)", with additional whitespace
// allowed anywhere between the name, parentheses, and items.
//
// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or
// backticks, or are empty. If they do, they can be written enclosed in backticks.
// To escape a backtick within backticks, double it.
//
// `stringify` generates "canonical" tree strings, which have no extra escaping
// or whitespace, just one space between items in a Node.
ParseNode.stringify = function (tree) {
if (tree instanceof ParseNode)
return (escapeTokenString(tree.name) + '(' +
_.map(tree.children, ParseNode.stringify).join(' ') +
')');
// Treat a token object or string as a token.
if (typeof tree.text === 'function')
tree = tree.text();
else if (tree.text)
tree = tree.text;
return escapeTokenString(String(tree));
};
ParseNode.unstringify = function (str) {
var results = [];
var ptrStack = [];
var ptr = results;
_.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) {
switch (txt.charAt(0)) {
case '(':
if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string"))
throw new Error("Nameless node in " + str);
var newArray = [ptr.pop()];
ptr.push(newArray);
ptrStack.push(ptr);
ptr = newArray;
break;
case ')':
ptr = ptrStack.pop();
var nodeArray = ptr.pop();
ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1)));
break;
case '`':
if (txt.length === 1)
throw new Error("Mismatched ` in " + str);
if (txt.length === 2)
ptr.push('');
else
ptr.push(txt.slice(1, -1).replace(/``/g, '`'));
break;
default:
ptr.push(txt);
break;
}
if (results.length > 1)
throw new Error("Not expecting " + txt + " in " + str);
});
if (ptr !== results)
throw new Error("Mismatched parentheses in " + str);
return results[0];
};
Parser = function (expecting, runFunc) {
this.expecting = expecting;
this._run = runFunc;