diff --git a/lib/coffee_script/lexer.js b/lib/coffee_script/lexer.js index f8b25f1c..0b0ea180 100644 --- a/lib/coffee_script/lexer.js +++ b/lib/coffee_script/lexer.js @@ -51,6 +51,8 @@ // The stack of all indent levels we are currently within. this.tokens = []; // Collection of all parsed tokens in the form [:TOKEN_TYPE, value] + this.spaced = null; + // The last token that has a space following it. while (this.i < this.code.length) { this.chunk = this.code.slice(this.i); this.extract_next_token(); @@ -235,7 +237,7 @@ if (!((space = this.match(WHITESPACE, 1)))) { return false; } - this.value().spaced = true; + this.spaced = this.value(); this.i += space.length; return true; }; @@ -266,7 +268,7 @@ } value = value || this.chunk.substr(0, 1); tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value; - if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) { + if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) { if (value === '(') { tag = 'CALL_START'; } @@ -331,12 +333,11 @@ // parameter identifiers in order to avoid this. Also, parameter lists can // make use of splats. lex.prototype.tag_parameters = function tag_parameters() { - var __a, i, tok; + var i, tok; if (this.tag() !== ')') { return null; } i = 0; - __a = []; while (true) { i += 1; tok = this.tokens[this.tokens.length - i]; @@ -351,7 +352,7 @@ return (tok[0] = 'PARAM_START'); } } - return __a; + return true; }; // Close up all remaining open blocks. IF the first token is an indent, // axe it. diff --git a/lib/coffee_script/rewriter.js b/lib/coffee_script/rewriter.js index 8f9a3720..2e274485 100644 --- a/lib/coffee_script/rewriter.js +++ b/lib/coffee_script/rewriter.js @@ -1,5 +1,6 @@ (function(){ var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, __a, __b, __c, __d, __e, __f, __g, __h, pair, re; + var __hasProp = Object.prototype.hasOwnProperty; // In order to keep the grammar simple, the stream of tokens that the Lexer // emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested // indentation, and single-line flavors of expressions. @@ -55,8 +56,8 @@ this.close_open_calls_and_indexes(); this.add_implicit_parentheses(); this.add_implicit_indentation(); - // this.ensure_balance(BALANCED_PAIRS) - // this.rewrite_closing_parens() + this.ensure_balance(BALANCED_PAIRS); + this.rewrite_closing_parens(); return this.tokens; }; // Rewrite the token stream, looking one token ahead and behind. @@ -157,12 +158,12 @@ } else if (token[0] === 'INDEX_START') { brackets.push(0); } else if (token[0] === '(') { - parens[-1] += 1; + parens[parens.length - 1] += 1; } else if (token[0] === '[') { - brackets[-1] += 1; + brackets[brackets.length - 1] += 1; } else if (token[0] === ')') { if (parens[parens.length - 1] === 0) { - parens.pop; + parens.pop(); token[0] = 'CALL_END'; } else { parens[parens.length - 1] -= 1; @@ -261,4 +262,116 @@ }); })(this)); }; + // Ensure that all listed pairs of tokens are correctly balanced throughout + // the course of the token stream. + re.prototype.ensure_balance = function ensure_balance(pairs) { + var __i, __j, key, levels, unclosed, value; + levels = { + }; + this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var __i, __j, __k, close, open; + __i = pairs; + for (__j = 0; __j < __i.length; __j++) { + pair = __i[__j]; + __k = pair; + open = __k[0]; + close = __k[1]; + levels[open] = levels[open] || 0; + if (token[0] === open) { + levels[open] += 1; + } + if (token[0] === close) { + levels[open] -= 1; + } + if (levels[open] < 0) { + throw "too many " + token[1]; + } + } + return 1; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + unclosed = (function() { + __i = []; __j = levels; + for (key in __j) { + value = __j[key]; + if (__hasProp.call(__j, key)) { + if (value > 0) { + __i.push(key); + } + } + } + return __i; + }).call(this); + if (unclosed.length) { + throw "unclosed " + unclosed[0]; + } + }; + // We'd like to support syntax like this: + // el.click((event) -> + // el.hide()) + // In order to accomplish this, move outdents that follow closing parens + // inwards, safely. The steps to accomplish this are: + // + // 1. Check that all paired tokens are balanced and in order. + // 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it + // to the stack. If you see an ')' or OUTDENT, pop the stack and replace + // it with the inverse of what we've just popped. + // 3. Keep track of "debt" for tokens that we fake, to make sure we end + // up balanced in the end. + re.prototype.rewrite_closing_parens = function rewrite_closing_parens() { + var __i, debt, key, stack, val; + stack = []; + debt = { + }; + __i = INVERSES; + for (key in __i) { + val = __i[key]; + if (__hasProp.call(__i, key)) { + ((debt[key] = 0)); + } + } + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var inv, match, mtag, tag; + tag = token[0]; + inv = INVERSES[token[0]]; + // Push openers onto the stack. + if (EXPRESSION_START.indexOf(tag) >= 0) { + stack.push(token); + return 1; + // The end of an expression, check stack and debt for a pair. + } else if (EXPRESSION_TAIL.indexOf(tag) >= 0) { + // If the tag is already in our debt, swallow it. + if (debt[inv] > 0) { + debt[inv] -= 1; + this.tokens.splice(i, 1); + return 0; + } else { + // Pop the stack of open delimiters. + match = stack.pop(); + mtag = match[0]; + // Continue onwards if it's the expected tag. + if (tag === INVERSES[mtag]) { + return 1; + } else { + // Unexpected close, insert correct close, adding to the debt. + debt[mtag] += 1; + val = mtag === 'INDENT' ? match[1] : INVERSES[mtag]; + this.tokens.splice(i, 0, [INVERSES[mtag], val]); + return 1; + } + } + } else { + return 1; + } + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; })(); \ No newline at end of file diff --git a/src/lexer.coffee b/src/lexer.coffee index 500706a0..42021a3b 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -66,6 +66,7 @@ lex::tokenize: (code) -> this.indent : 0 # The current indent level. this.indents : [] # The stack of all indent levels we are currently within. this.tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] + this.spaced : null # The last token that has a space following it. while this.i < this.code.length this.chunk: this.code.slice(this.i) this.extract_next_token() @@ -192,7 +193,7 @@ lex::outdent_token: (move_out) -> # Matches and consumes non-meaningful whitespace. lex::whitespace_token: -> return false unless space: this.match WHITESPACE, 1 - this.value().spaced: true + this.spaced: this.value() this.i += space.length true @@ -216,7 +217,7 @@ lex::literal_token: -> this.tag_parameters() if value and value.match(CODE) value ||= this.chunk.substr(0, 1) tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value - if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0) + if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0 tag: 'CALL_START' if value is '(' tag: 'INDEX_START' if value is '[' this.token tag, value @@ -272,6 +273,7 @@ lex::tag_parameters: -> when 'IDENTIFIER' then tok[0]: 'PARAM' when ')' then tok[0]: 'PARAM_END' when '(' then return tok[0]: 'PARAM_START' + true # Close up all remaining open blocks. IF the first token is an indent, # axe it. diff --git a/src/rewriter.coffee b/src/rewriter.coffee index 1e070340..79a74db8 100644 --- a/src/rewriter.coffee +++ b/src/rewriter.coffee @@ -47,8 +47,8 @@ re::rewrite: (tokens) -> this.close_open_calls_and_indexes() this.add_implicit_parentheses() this.add_implicit_indentation() - # this.ensure_balance(BALANCED_PAIRS) - # this.rewrite_closing_parens() + this.ensure_balance(BALANCED_PAIRS) + this.rewrite_closing_parens() this.tokens # Rewrite the token stream, looking one token ahead and behind. @@ -116,11 +116,11 @@ re::close_open_calls_and_indexes: -> switch token[0] when 'CALL_START' then parens.push(0) when 'INDEX_START' then brackets.push(0) - when '(' then parens[-1] += 1 - when '[' then brackets[-1] += 1 + when '(' then parens[parens.length - 1] += 1 + when '[' then brackets[brackets.length - 1] += 1 when ')' if parens[parens.length - 1] is 0 - parens.pop + parens.pop() token[0]: 'CALL_END' else parens[parens.length - 1] -= 1 @@ -181,42 +181,64 @@ re::add_implicit_indentation: -> this.tokens.splice(i, 1) return 0 +# Ensure that all listed pairs of tokens are correctly balanced throughout +# the course of the token stream. +re::ensure_balance: (pairs) -> + levels: {} + this.scan_tokens (prev, token, post, i) => + for pair in pairs + [open, close]: pair + levels[open] ||= 0 + levels[open] += 1 if token[0] is open + levels[open] -= 1 if token[0] is close + throw "too many " + token[1] if levels[open] < 0 + return 1 + unclosed: key for key, value of levels when value > 0 + throw "unclosed " + unclosed[0] if unclosed.length - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# We'd like to support syntax like this: +# el.click((event) -> +# el.hide()) +# In order to accomplish this, move outdents that follow closing parens +# inwards, safely. The steps to accomplish this are: +# +# 1. Check that all paired tokens are balanced and in order. +# 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it +# to the stack. If you see an ')' or OUTDENT, pop the stack and replace +# it with the inverse of what we've just popped. +# 3. Keep track of "debt" for tokens that we fake, to make sure we end +# up balanced in the end. +# +re::rewrite_closing_parens: -> + stack: [] + debt: {} + (debt[key]: 0) for key, val of INVERSES + this.scan_tokens (prev, token, post, i) => + tag: token[0] + inv: INVERSES[token[0]] + # Push openers onto the stack. + if EXPRESSION_START.indexOf(tag) >= 0 + stack.push(token) + return 1 + # The end of an expression, check stack and debt for a pair. + else if EXPRESSION_TAIL.indexOf(tag) >= 0 + # If the tag is already in our debt, swallow it. + if debt[inv] > 0 + debt[inv] -= 1 + this.tokens.splice(i, 1) + return 0 + else + # Pop the stack of open delimiters. + match: stack.pop() + mtag: match[0] + # Continue onwards if it's the expected tag. + if tag is INVERSES[mtag] + return 1 + else + # Unexpected close, insert correct close, adding to the debt. + debt[mtag] += 1 + val: if mtag is 'INDENT' then match[1] else INVERSES[mtag] + this.tokens.splice(i, 0, [INVERSES[mtag], val]) + return 1 + else + return 1