diff --git a/lib/coffee_script/lexer.js b/lib/coffee_script/lexer.js index 1028280b..f8b25f1c 100644 --- a/lib/coffee_script/lexer.js +++ b/lib/coffee_script/lexer.js @@ -266,7 +266,7 @@ } value = value || this.chunk.substr(0, 1); tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value; - if (this.value() && this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) { + if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) { if (value === '(') { tag = 'CALL_START'; } diff --git a/lib/coffee_script/rewriter.js b/lib/coffee_script/rewriter.js index ef2c92c1..df985321 100644 --- a/lib/coffee_script/rewriter.js +++ b/lib/coffee_script/rewriter.js @@ -49,12 +49,12 @@ re.prototype.rewrite = function rewrite(tokens) { this.tokens = tokens; this.adjust_comments(); - // this.remove_leading_newlines() - // this.remove_mid_expression_newlines() - // this.move_commas_outside_outdents() - // this.close_open_calls_and_indexes() + this.remove_leading_newlines(); + this.remove_mid_expression_newlines(); + this.move_commas_outside_outdents(); + this.close_open_calls_and_indexes(); // this.add_implicit_parentheses() - // this.add_implicit_indentation() + this.add_implicit_indentation(); // this.ensure_balance(BALANCED_PAIRS) // this.rewrite_closing_parens() return this.tokens; @@ -78,27 +78,149 @@ // Massage newlines and indentations so that comments don't have to be // correctly indented, or appear on their own line. re.prototype.adjust_comments = function adjust_comments() { - return this.scan_tokens(function(prev, token, post, i) { - var after, before; - if (!(token[0] === 'COMMENT')) { - return 1; - } - before = this.tokens[i - 2]; - after = this.tokens[i + 2]; - if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) { - this.tokens.splice(i + 2, 1); - this.tokens.splice(i - 2, 1); + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var after, before; + if (!(token[0] === 'COMMENT')) { + return 1; + } + before = this.tokens[i - 2]; + after = this.tokens[i + 2]; + if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) { + this.tokens.splice(i + 2, 1); + this.tokens.splice(i - 2, 1); + return 0; + } else if (prev[0] === "\n" && after[0] === 'INDENT') { + this.tokens.splice(i + 2, 1); + this.tokens[i - 1] = after; + return 1; + } else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') { + this.tokens.splice(i, 0, ["\n", "\n"]); + return 2; + } else { + return 1; + } + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Leading newlines would introduce an ambiguity in the grammar, so we + // dispatch them here. + re.prototype.remove_leading_newlines = function remove_leading_newlines() { + if (this.tokens[0][0] === "\n") { + return this.tokens.shift(); + } + }; + // Some blocks occur in the middle of expressions -- when we're expecting + // this, remove their trailing newlines. + re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === "\n")) { + return 1; + } + this.tokens.splice(i, 1); return 0; - } else if (prev[0] === "\n" && after[0] === 'INDENT') { - this.tokens.splice(i + 2, 1); - this.tokens[i - 1] = after; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Make sure that we don't accidentally break trailing commas, which need + // to go on the outside of expression closers. + re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + if (token[0] === 'OUTDENT' && prev[0] === ',') { + this.tokens.splice(i, 1, token); + } return 1; - } else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') { - this.tokens.splice(i, 0, ["\n", "\n"]); - return 2; - } else { + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // We've tagged the opening parenthesis of a method call, and the opening + // bracket of an indexing operation. Match them with their close. + re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() { + var brackets, parens; + parens = [0]; + brackets = [0]; + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + if (token[0] === 'CALL_START') { + parens.push(0); + } else if (token[0] === 'INDEX_START') { + brackets.push(0); + } else if (token[0] === '(') { + parens[-1] += 1; + } else if (token[0] === '[') { + brackets[-1] += 1; + } else if (token[0] === ')') { + if (parens[parens.length - 1] === 0) { + parens.pop; + token[0] = 'CALL_END'; + } else { + parens[parens.length - 1] -= 1; + } + } else if (token[0] === ']') { + if (brackets[brackets.length - 1] === 0) { + brackets.pop(); + token[0] = 'INDEX_END'; + } else { + brackets[brackets.length - 1] -= 1; + } + } return 1; - } - }); + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); + }; + // Because our grammar is LALR(1), it can't handle some single-line + // expressions that lack ending delimiters. Use the lexer to add the implicit + // blocks, so it doesn't need to. + // ')' can close a single-line block, but we need to make sure it's balanced. + re.prototype.add_implicit_indentation = function add_implicit_indentation() { + return this.scan_tokens((function(__this) { + var __func = function(prev, token, post, i) { + var idx, insertion, parens, starter, tok; + if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) { + return 1; + } + starter = token[0]; + this.tokens.splice(i + 1, 0, ['INDENT', 2]); + idx = i + 1; + parens = 0; + while (true) { + idx += 1; + tok = this.tokens[idx]; + if ((!tok || SINGLE_CLOSERS.indexOf(tok[0]) >= 0 || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) { + insertion = this.tokens[idx - 1][0] === "," ? idx - 1 : idx; + this.tokens.splice(insertion, 0, ['OUTDENT', 2]); + break; + } + if (tok[0] === '(') { + parens += 1; + } + if (tok[0] === ')') { + parens -= 1; + } + } + if (!(token[0] === 'THEN')) { + return 1; + } + this.tokens.splice(i, 1); + return 0; + }; + return (function() { + return __func.apply(__this, arguments); + }); + })(this)); }; })(); \ No newline at end of file diff --git a/lib/coffee_script/rewriter.rb b/lib/coffee_script/rewriter.rb index 287d11c2..4a3845fc 100644 --- a/lib/coffee_script/rewriter.rb +++ b/lib/coffee_script/rewriter.rb @@ -151,6 +151,30 @@ module CoffeeScript end end + # Methods may be optionally called without parentheses, for simple cases. + # Insert the implicit parentheses here, so that the parser doesn't have to + # deal with them. + def add_implicit_parentheses + stack = [0] + scan_tokens do |prev, token, post, i| + stack.push(0) if token[0] == :INDENT + if token[0] == :OUTDENT + last = stack.pop + stack[-1] += last + end + if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?) + idx = token[0] == :OUTDENT ? i + 1 : i + stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) } + size, stack[-1] = stack[-1] + 1, 0 + next size + end + next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0]) + @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)]) + stack[-1] += 1 + next 2 + end + end + # Because our grammar is LALR(1), it can't handle some single-line # expressions that lack ending delimiters. Use the lexer to add the implicit # blocks, so it doesn't need to. @@ -183,30 +207,6 @@ module CoffeeScript end end - # Methods may be optionally called without parentheses, for simple cases. - # Insert the implicit parentheses here, so that the parser doesn't have to - # deal with them. - def add_implicit_parentheses - stack = [0] - scan_tokens do |prev, token, post, i| - stack.push(0) if token[0] == :INDENT - if token[0] == :OUTDENT - last = stack.pop - stack[-1] += last - end - if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?) - idx = token[0] == :OUTDENT ? i + 1 : i - stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) } - size, stack[-1] = stack[-1] + 1, 0 - next size - end - next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0]) - @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)]) - stack[-1] += 1 - next 2 - end - end - # Ensure that all listed pairs of tokens are correctly balanced throughout # the course of the token stream. def ensure_balance(*pairs) diff --git a/src/lexer.coffee b/src/lexer.coffee index dbe14504..500706a0 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -216,7 +216,7 @@ lex::literal_token: -> this.tag_parameters() if value and value.match(CODE) value ||= this.chunk.substr(0, 1) tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value - if this.value() and this.value().spaced and CALLABLE.indexOf(this.tag() >= 0) + if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0) tag: 'CALL_START' if value is '(' tag: 'INDEX_START' if value is '[' this.token tag, value diff --git a/src/rewriter.coffee b/src/rewriter.coffee index 21775b72..a2cb5d4e 100644 --- a/src/rewriter.coffee +++ b/src/rewriter.coffee @@ -41,12 +41,12 @@ SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'P re::rewrite: (tokens) -> this.tokens: tokens this.adjust_comments() - # this.remove_leading_newlines() - # this.remove_mid_expression_newlines() - # this.move_commas_outside_outdents() - # this.close_open_calls_and_indexes() + this.remove_leading_newlines() + this.remove_mid_expression_newlines() + this.move_commas_outside_outdents() + this.close_open_calls_and_indexes() # this.add_implicit_parentheses() - # this.add_implicit_indentation() + this.add_implicit_indentation() # this.ensure_balance(BALANCED_PAIRS) # this.rewrite_closing_parens() this.tokens @@ -56,7 +56,7 @@ re::rewrite: (tokens) -> # forwards (or backwards) in the stream, to make sure we don't miss anything # as the stream changes length under our feet. re::scan_tokens: (yield) -> - i = 0 + i: 0 while true break unless this.tokens[i] move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i) @@ -66,7 +66,7 @@ re::scan_tokens: (yield) -> # Massage newlines and indentations so that comments don't have to be # correctly indented, or appear on their own line. re::adjust_comments: -> - this.scan_tokens (prev, token, post, i) -> + this.scan_tokens (prev, token, post, i) => return 1 unless token[0] is 'COMMENT' before: this.tokens[i - 2] after: this.tokens[i + 2] @@ -87,6 +87,86 @@ re::adjust_comments: -> else return 1 +# Leading newlines would introduce an ambiguity in the grammar, so we +# dispatch them here. +re::remove_leading_newlines: -> + this.tokens.shift() if this.tokens[0][0] is "\n" + +# Some blocks occur in the middle of expressions -- when we're expecting +# this, remove their trailing newlines. +re::remove_mid_expression_newlines: -> + this.scan_tokens (prev, token, post, i) => + return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n" + this.tokens.splice(i, 1) + return 0 + +# Make sure that we don't accidentally break trailing commas, which need +# to go on the outside of expression closers. +re::move_commas_outside_outdents: -> + this.scan_tokens (prev, token, post, i) => + this.tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ',' + return 1 + +# We've tagged the opening parenthesis of a method call, and the opening +# bracket of an indexing operation. Match them with their close. +re::close_open_calls_and_indexes: -> + parens: [0] + brackets: [0] + this.scan_tokens (prev, token, post, i) => + switch token[0] + when 'CALL_START' then parens.push(0) + when 'INDEX_START' then brackets.push(0) + when '(' then parens[-1] += 1 + when '[' then brackets[-1] += 1 + when ')' + if parens[parens.length - 1] is 0 + parens.pop + token[0]: 'CALL_END' + else + parens[parens.length - 1] -= 1 + when ']' + if brackets[brackets.length - 1] == 0 + brackets.pop() + token[0]: 'INDEX_END' + else + brackets[brackets.length - 1] -= 1 + return 1 + +# Because our grammar is LALR(1), it can't handle some single-line +# expressions that lack ending delimiters. Use the lexer to add the implicit +# blocks, so it doesn't need to. +# ')' can close a single-line block, but we need to make sure it's balanced. +re::add_implicit_indentation: -> + this.scan_tokens (prev, token, post, i) => + return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and + not (token[0] is 'ELSE' and post[0] is 'IF') + starter: token[0] + this.tokens.splice(i + 1, 0, ['INDENT', 2]) + idx: i + 1 + parens: 0 + while true + idx += 1 + tok: this.tokens[idx] + if (not tok or SINGLE_CLOSERS.indexOf(tok[0]) >= 0 or + (tok[0] is ')' && parens is 0)) and + not (starter is 'ELSE' and tok[0] is 'ELSE') + insertion: if this.tokens[idx - 1][0] is "," then idx - 1 else idx + this.tokens.splice(insertion, 0, ['OUTDENT', 2]) + break + parens += 1 if tok[0] is '(' + parens -= 1 if tok[0] is ')' + return 1 unless token[0] is 'THEN' + this.tokens.splice(i, 1) + return 0 + + + + + + + + +