rewriter is halfway done, and working

This commit is contained in:
Jeremy Ashkenas
2010-01-30 17:24:48 -05:00
parent 84feab3492
commit 557cdbba71
5 changed files with 259 additions and 57 deletions

View File

@@ -49,12 +49,12 @@
re.prototype.rewrite = function rewrite(tokens) {
this.tokens = tokens;
this.adjust_comments();
// this.remove_leading_newlines()
// this.remove_mid_expression_newlines()
// this.move_commas_outside_outdents()
// this.close_open_calls_and_indexes()
this.remove_leading_newlines();
this.remove_mid_expression_newlines();
this.move_commas_outside_outdents();
this.close_open_calls_and_indexes();
// this.add_implicit_parentheses()
// this.add_implicit_indentation()
this.add_implicit_indentation();
// this.ensure_balance(BALANCED_PAIRS)
// this.rewrite_closing_parens()
return this.tokens;
@@ -78,27 +78,149 @@
// Massage newlines and indentations so that comments don't have to be
// correctly indented, or appear on their own line.
re.prototype.adjust_comments = function adjust_comments() {
return this.scan_tokens(function(prev, token, post, i) {
var after, before;
if (!(token[0] === 'COMMENT')) {
return 1;
}
before = this.tokens[i - 2];
after = this.tokens[i + 2];
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
this.tokens.splice(i + 2, 1);
this.tokens.splice(i - 2, 1);
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
var after, before;
if (!(token[0] === 'COMMENT')) {
return 1;
}
before = this.tokens[i - 2];
after = this.tokens[i + 2];
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
this.tokens.splice(i + 2, 1);
this.tokens.splice(i - 2, 1);
return 0;
} else if (prev[0] === "\n" && after[0] === 'INDENT') {
this.tokens.splice(i + 2, 1);
this.tokens[i - 1] = after;
return 1;
} else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
this.tokens.splice(i, 0, ["\n", "\n"]);
return 2;
} else {
return 1;
}
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
// Leading newlines would introduce an ambiguity in the grammar, so we
// dispatch them here.
re.prototype.remove_leading_newlines = function remove_leading_newlines() {
if (this.tokens[0][0] === "\n") {
return this.tokens.shift();
}
};
// Some blocks occur in the middle of expressions -- when we're expecting
// this, remove their trailing newlines.
re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() {
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === "\n")) {
return 1;
}
this.tokens.splice(i, 1);
return 0;
} else if (prev[0] === "\n" && after[0] === 'INDENT') {
this.tokens.splice(i + 2, 1);
this.tokens[i - 1] = after;
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
// Make sure that we don't accidentally break trailing commas, which need
// to go on the outside of expression closers.
re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() {
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
if (token[0] === 'OUTDENT' && prev[0] === ',') {
this.tokens.splice(i, 1, token);
}
return 1;
} else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
this.tokens.splice(i, 0, ["\n", "\n"]);
return 2;
} else {
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
// We've tagged the opening parenthesis of a method call, and the opening
// bracket of an indexing operation. Match them with their close.
re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() {
var brackets, parens;
parens = [0];
brackets = [0];
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
if (token[0] === 'CALL_START') {
parens.push(0);
} else if (token[0] === 'INDEX_START') {
brackets.push(0);
} else if (token[0] === '(') {
parens[-1] += 1;
} else if (token[0] === '[') {
brackets[-1] += 1;
} else if (token[0] === ')') {
if (parens[parens.length - 1] === 0) {
parens.pop;
token[0] = 'CALL_END';
} else {
parens[parens.length - 1] -= 1;
}
} else if (token[0] === ']') {
if (brackets[brackets.length - 1] === 0) {
brackets.pop();
token[0] = 'INDEX_END';
} else {
brackets[brackets.length - 1] -= 1;
}
}
return 1;
}
});
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
// Because our grammar is LALR(1), it can't handle some single-line
// expressions that lack ending delimiters. Use the lexer to add the implicit
// blocks, so it doesn't need to.
// ')' can close a single-line block, but we need to make sure it's balanced.
re.prototype.add_implicit_indentation = function add_implicit_indentation() {
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
var idx, insertion, parens, starter, tok;
if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) {
return 1;
}
starter = token[0];
this.tokens.splice(i + 1, 0, ['INDENT', 2]);
idx = i + 1;
parens = 0;
while (true) {
idx += 1;
tok = this.tokens[idx];
if ((!tok || SINGLE_CLOSERS.indexOf(tok[0]) >= 0 || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) {
insertion = this.tokens[idx - 1][0] === "," ? idx - 1 : idx;
this.tokens.splice(insertion, 0, ['OUTDENT', 2]);
break;
}
if (tok[0] === '(') {
parens += 1;
}
if (tok[0] === ')') {
parens -= 1;
}
}
if (!(token[0] === 'THEN')) {
return 1;
}
this.tokens.splice(i, 1);
return 0;
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
})();