the rewriter is done

This commit is contained in:
Jeremy Ashkenas
2010-01-30 18:29:53 -05:00
parent c6457e010d
commit bad50c9aee
4 changed files with 193 additions and 55 deletions

View File

@@ -51,6 +51,8 @@
// The stack of all indent levels we are currently within. // The stack of all indent levels we are currently within.
this.tokens = []; this.tokens = [];
// Collection of all parsed tokens in the form [:TOKEN_TYPE, value] // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
this.spaced = null;
// The last token that has a space following it.
while (this.i < this.code.length) { while (this.i < this.code.length) {
this.chunk = this.code.slice(this.i); this.chunk = this.code.slice(this.i);
this.extract_next_token(); this.extract_next_token();
@@ -235,7 +237,7 @@
if (!((space = this.match(WHITESPACE, 1)))) { if (!((space = this.match(WHITESPACE, 1)))) {
return false; return false;
} }
this.value().spaced = true; this.spaced = this.value();
this.i += space.length; this.i += space.length;
return true; return true;
}; };
@@ -266,7 +268,7 @@
} }
value = value || this.chunk.substr(0, 1); value = value || this.chunk.substr(0, 1);
tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value; tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) { if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
if (value === '(') { if (value === '(') {
tag = 'CALL_START'; tag = 'CALL_START';
} }
@@ -331,12 +333,11 @@
// parameter identifiers in order to avoid this. Also, parameter lists can // parameter identifiers in order to avoid this. Also, parameter lists can
// make use of splats. // make use of splats.
lex.prototype.tag_parameters = function tag_parameters() { lex.prototype.tag_parameters = function tag_parameters() {
var __a, i, tok; var i, tok;
if (this.tag() !== ')') { if (this.tag() !== ')') {
return null; return null;
} }
i = 0; i = 0;
__a = [];
while (true) { while (true) {
i += 1; i += 1;
tok = this.tokens[this.tokens.length - i]; tok = this.tokens[this.tokens.length - i];
@@ -351,7 +352,7 @@
return (tok[0] = 'PARAM_START'); return (tok[0] = 'PARAM_START');
} }
} }
return __a; return true;
}; };
// Close up all remaining open blocks. IF the first token is an indent, // Close up all remaining open blocks. IF the first token is an indent,
// axe it. // axe it.

View File

@@ -1,5 +1,6 @@
(function(){ (function(){
var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, __a, __b, __c, __d, __e, __f, __g, __h, pair, re; var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, __a, __b, __c, __d, __e, __f, __g, __h, pair, re;
var __hasProp = Object.prototype.hasOwnProperty;
// In order to keep the grammar simple, the stream of tokens that the Lexer // In order to keep the grammar simple, the stream of tokens that the Lexer
// emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested // emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested
// indentation, and single-line flavors of expressions. // indentation, and single-line flavors of expressions.
@@ -55,8 +56,8 @@
this.close_open_calls_and_indexes(); this.close_open_calls_and_indexes();
this.add_implicit_parentheses(); this.add_implicit_parentheses();
this.add_implicit_indentation(); this.add_implicit_indentation();
// this.ensure_balance(BALANCED_PAIRS) this.ensure_balance(BALANCED_PAIRS);
// this.rewrite_closing_parens() this.rewrite_closing_parens();
return this.tokens; return this.tokens;
}; };
// Rewrite the token stream, looking one token ahead and behind. // Rewrite the token stream, looking one token ahead and behind.
@@ -157,12 +158,12 @@
} else if (token[0] === 'INDEX_START') { } else if (token[0] === 'INDEX_START') {
brackets.push(0); brackets.push(0);
} else if (token[0] === '(') { } else if (token[0] === '(') {
parens[-1] += 1; parens[parens.length - 1] += 1;
} else if (token[0] === '[') { } else if (token[0] === '[') {
brackets[-1] += 1; brackets[brackets.length - 1] += 1;
} else if (token[0] === ')') { } else if (token[0] === ')') {
if (parens[parens.length - 1] === 0) { if (parens[parens.length - 1] === 0) {
parens.pop; parens.pop();
token[0] = 'CALL_END'; token[0] = 'CALL_END';
} else { } else {
parens[parens.length - 1] -= 1; parens[parens.length - 1] -= 1;
@@ -261,4 +262,116 @@
}); });
})(this)); })(this));
}; };
// Ensure that all listed pairs of tokens are correctly balanced throughout
// the course of the token stream.
re.prototype.ensure_balance = function ensure_balance(pairs) {
var __i, __j, key, levels, unclosed, value;
levels = {
};
this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
var __i, __j, __k, close, open;
__i = pairs;
for (__j = 0; __j < __i.length; __j++) {
pair = __i[__j];
__k = pair;
open = __k[0];
close = __k[1];
levels[open] = levels[open] || 0;
if (token[0] === open) {
levels[open] += 1;
}
if (token[0] === close) {
levels[open] -= 1;
}
if (levels[open] < 0) {
throw "too many " + token[1];
}
}
return 1;
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
unclosed = (function() {
__i = []; __j = levels;
for (key in __j) {
value = __j[key];
if (__hasProp.call(__j, key)) {
if (value > 0) {
__i.push(key);
}
}
}
return __i;
}).call(this);
if (unclosed.length) {
throw "unclosed " + unclosed[0];
}
};
// We'd like to support syntax like this:
// el.click((event) ->
// el.hide())
// In order to accomplish this, move outdents that follow closing parens
// inwards, safely. The steps to accomplish this are:
//
// 1. Check that all paired tokens are balanced and in order.
// 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
// to the stack. If you see an ')' or OUTDENT, pop the stack and replace
// it with the inverse of what we've just popped.
// 3. Keep track of "debt" for tokens that we fake, to make sure we end
// up balanced in the end.
re.prototype.rewrite_closing_parens = function rewrite_closing_parens() {
var __i, debt, key, stack, val;
stack = [];
debt = {
};
__i = INVERSES;
for (key in __i) {
val = __i[key];
if (__hasProp.call(__i, key)) {
((debt[key] = 0));
}
}
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
var inv, match, mtag, tag;
tag = token[0];
inv = INVERSES[token[0]];
// Push openers onto the stack.
if (EXPRESSION_START.indexOf(tag) >= 0) {
stack.push(token);
return 1;
// The end of an expression, check stack and debt for a pair.
} else if (EXPRESSION_TAIL.indexOf(tag) >= 0) {
// If the tag is already in our debt, swallow it.
if (debt[inv] > 0) {
debt[inv] -= 1;
this.tokens.splice(i, 1);
return 0;
} else {
// Pop the stack of open delimiters.
match = stack.pop();
mtag = match[0];
// Continue onwards if it's the expected tag.
if (tag === INVERSES[mtag]) {
return 1;
} else {
// Unexpected close, insert correct close, adding to the debt.
debt[mtag] += 1;
val = mtag === 'INDENT' ? match[1] : INVERSES[mtag];
this.tokens.splice(i, 0, [INVERSES[mtag], val]);
return 1;
}
}
} else {
return 1;
}
};
return (function() {
return __func.apply(__this, arguments);
});
})(this));
};
})(); })();

View File

@@ -66,6 +66,7 @@ lex::tokenize: (code) ->
this.indent : 0 # The current indent level. this.indent : 0 # The current indent level.
this.indents : [] # The stack of all indent levels we are currently within. this.indents : [] # The stack of all indent levels we are currently within.
this.tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] this.tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
this.spaced : null # The last token that has a space following it.
while this.i < this.code.length while this.i < this.code.length
this.chunk: this.code.slice(this.i) this.chunk: this.code.slice(this.i)
this.extract_next_token() this.extract_next_token()
@@ -192,7 +193,7 @@ lex::outdent_token: (move_out) ->
# Matches and consumes non-meaningful whitespace. # Matches and consumes non-meaningful whitespace.
lex::whitespace_token: -> lex::whitespace_token: ->
return false unless space: this.match WHITESPACE, 1 return false unless space: this.match WHITESPACE, 1
this.value().spaced: true this.spaced: this.value()
this.i += space.length this.i += space.length
true true
@@ -216,7 +217,7 @@ lex::literal_token: ->
this.tag_parameters() if value and value.match(CODE) this.tag_parameters() if value and value.match(CODE)
value ||= this.chunk.substr(0, 1) value ||= this.chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0) if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
tag: 'CALL_START' if value is '(' tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '[' tag: 'INDEX_START' if value is '['
this.token tag, value this.token tag, value
@@ -272,6 +273,7 @@ lex::tag_parameters: ->
when 'IDENTIFIER' then tok[0]: 'PARAM' when 'IDENTIFIER' then tok[0]: 'PARAM'
when ')' then tok[0]: 'PARAM_END' when ')' then tok[0]: 'PARAM_END'
when '(' then return tok[0]: 'PARAM_START' when '(' then return tok[0]: 'PARAM_START'
true
# Close up all remaining open blocks. IF the first token is an indent, # Close up all remaining open blocks. IF the first token is an indent,
# axe it. # axe it.

View File

@@ -47,8 +47,8 @@ re::rewrite: (tokens) ->
this.close_open_calls_and_indexes() this.close_open_calls_and_indexes()
this.add_implicit_parentheses() this.add_implicit_parentheses()
this.add_implicit_indentation() this.add_implicit_indentation()
# this.ensure_balance(BALANCED_PAIRS) this.ensure_balance(BALANCED_PAIRS)
# this.rewrite_closing_parens() this.rewrite_closing_parens()
this.tokens this.tokens
# Rewrite the token stream, looking one token ahead and behind. # Rewrite the token stream, looking one token ahead and behind.
@@ -116,11 +116,11 @@ re::close_open_calls_and_indexes: ->
switch token[0] switch token[0]
when 'CALL_START' then parens.push(0) when 'CALL_START' then parens.push(0)
when 'INDEX_START' then brackets.push(0) when 'INDEX_START' then brackets.push(0)
when '(' then parens[-1] += 1 when '(' then parens[parens.length - 1] += 1
when '[' then brackets[-1] += 1 when '[' then brackets[brackets.length - 1] += 1
when ')' when ')'
if parens[parens.length - 1] is 0 if parens[parens.length - 1] is 0
parens.pop parens.pop()
token[0]: 'CALL_END' token[0]: 'CALL_END'
else else
parens[parens.length - 1] -= 1 parens[parens.length - 1] -= 1
@@ -181,42 +181,64 @@ re::add_implicit_indentation: ->
this.tokens.splice(i, 1) this.tokens.splice(i, 1)
return 0 return 0
# Ensure that all listed pairs of tokens are correctly balanced throughout
# the course of the token stream.
re::ensure_balance: (pairs) ->
levels: {}
this.scan_tokens (prev, token, post, i) =>
for pair in pairs
[open, close]: pair
levels[open] ||= 0
levels[open] += 1 if token[0] is open
levels[open] -= 1 if token[0] is close
throw "too many " + token[1] if levels[open] < 0
return 1
unclosed: key for key, value of levels when value > 0
throw "unclosed " + unclosed[0] if unclosed.length
# We'd like to support syntax like this:
# el.click((event) ->
# el.hide())
# In order to accomplish this, move outdents that follow closing parens
# inwards, safely. The steps to accomplish this are:
#
# 1. Check that all paired tokens are balanced and in order.
# 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
# to the stack. If you see an ')' or OUTDENT, pop the stack and replace
# it with the inverse of what we've just popped.
# 3. Keep track of "debt" for tokens that we fake, to make sure we end
# up balanced in the end.
#
re::rewrite_closing_parens: ->
stack: []
debt: {}
(debt[key]: 0) for key, val of INVERSES
this.scan_tokens (prev, token, post, i) =>
tag: token[0]
inv: INVERSES[token[0]]
# Push openers onto the stack.
if EXPRESSION_START.indexOf(tag) >= 0
stack.push(token)
return 1
# The end of an expression, check stack and debt for a pair.
else if EXPRESSION_TAIL.indexOf(tag) >= 0
# If the tag is already in our debt, swallow it.
if debt[inv] > 0
debt[inv] -= 1
this.tokens.splice(i, 1)
return 0
else
# Pop the stack of open delimiters.
match: stack.pop()
mtag: match[0]
# Continue onwards if it's the expected tag.
if tag is INVERSES[mtag]
return 1
else
# Unexpected close, insert correct close, adding to the debt.
debt[mtag] += 1
val: if mtag is 'INDENT' then match[1] else INVERSES[mtag]
this.tokens.splice(i, 0, [INVERSES[mtag], val])
return 1
else
return 1