the rewriter is done

2026-04-11 03:00:13 -04:00 · 2010-01-30 18:29:53 -05:00
parent c6457e010d
commit bad50c9aee
4 changed files with 193 additions and 55 deletions
--- a/lib/coffee_script/lexer.js
+++ b/lib/coffee_script/lexer.js
@@ -51,6 +51,8 @@
    // The stack of all indent levels we are currently within.
    this.tokens = [];
    // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
    this.spaced = null;
    // The last token that has a space following it.
    while (this.i < this.code.length) {
      this.chunk = this.code.slice(this.i);
      this.extract_next_token();
@@ -235,7 +237,7 @@
    if (!((space = this.match(WHITESPACE, 1)))) {
      return false;
    }
-    this.value().spaced = true;
+    this.spaced = this.value();
    this.i += space.length;
    return true;
  };
@@ -266,7 +268,7 @@
    }
    value = value || this.chunk.substr(0, 1);
    tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
-    if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) {
+    if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
      if (value === '(') {
        tag = 'CALL_START';
      }
@@ -331,12 +333,11 @@
  // parameter identifiers in order to avoid this. Also, parameter lists can
  // make use of splats.
  lex.prototype.tag_parameters = function tag_parameters() {
-    var __a, i, tok;
+    var i, tok;
    if (this.tag() !== ')') {
      return null;
    }
    i = 0;
    __a = [];
    while (true) {
      i += 1;
      tok = this.tokens[this.tokens.length - i];
@@ -351,7 +352,7 @@
        return (tok[0] = 'PARAM_START');
      }
    }
-    return __a;
+    return true;
  };
  // Close up all remaining open blocks. IF the first token is an indent,
  // axe it.
--- a/lib/coffee_script/rewriter.js
+++ b/lib/coffee_script/rewriter.js
@@ -1,5 +1,6 @@
 (function(){
  var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, __a, __b, __c, __d, __e, __f, __g, __h, pair, re;
  var __hasProp = Object.prototype.hasOwnProperty;
  // In order to keep the grammar simple, the stream of tokens that the Lexer
  // emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested
  // indentation, and single-line flavors of expressions.
@@ -55,8 +56,8 @@
    this.close_open_calls_and_indexes();
    this.add_implicit_parentheses();
    this.add_implicit_indentation();
-    // this.ensure_balance(BALANCED_PAIRS)
+    this.ensure_balance(BALANCED_PAIRS);
-    // this.rewrite_closing_parens()
+    this.rewrite_closing_parens();
    return this.tokens;
  };
  // Rewrite the token stream, looking one token ahead and behind.
@@ -157,12 +158,12 @@
        } else if (token[0] === 'INDEX_START') {
          brackets.push(0);
        } else if (token[0] === '(') {
-          parens[-1] += 1;
+          parens[parens.length - 1] += 1;
        } else if (token[0] === '[') {
-          brackets[-1] += 1;
+          brackets[brackets.length - 1] += 1;
        } else if (token[0] === ')') {
          if (parens[parens.length - 1] === 0) {
-            parens.pop;
+            parens.pop();
            token[0] = 'CALL_END';
          } else {
            parens[parens.length - 1] -= 1;
@@ -261,4 +262,116 @@
      });
    })(this));
  };
  // Ensure that all listed pairs of tokens are correctly balanced throughout
  // the course of the token stream.
  re.prototype.ensure_balance = function ensure_balance(pairs) {
    var __i, __j, key, levels, unclosed, value;
    levels = {
    };
    this.scan_tokens((function(__this) {
      var __func = function(prev, token, post, i) {
        var __i, __j, __k, close, open;
        __i = pairs;
        for (__j = 0; __j < __i.length; __j++) {
          pair = __i[__j];
          __k = pair;
          open = __k[0];
          close = __k[1];
          levels[open] = levels[open] || 0;
          if (token[0] === open) {
            levels[open] += 1;
          }
          if (token[0] === close) {
            levels[open] -= 1;
          }
          if (levels[open] < 0) {
            throw "too many " + token[1];
          }
        }
        return 1;
      };
      return (function() {
        return __func.apply(__this, arguments);
      });
    })(this));
    unclosed = (function() {
      __i = []; __j = levels;
      for (key in __j) {
        value = __j[key];
        if (__hasProp.call(__j, key)) {
          if (value > 0) {
            __i.push(key);
          }
        }
      }
      return __i;
    }).call(this);
    if (unclosed.length) {
      throw "unclosed " + unclosed[0];
    }
  };
  // We'd like to support syntax like this:
  //    el.click((event) ->
  //      el.hide())
  // In order to accomplish this, move outdents that follow closing parens
  // inwards, safely. The steps to accomplish this are:
  //
  // 1. Check that all paired tokens are balanced and in order.
  // 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
  //    to the stack. If you see an ')' or OUTDENT, pop the stack and replace
  //    it with the inverse of what we've just popped.
  // 3. Keep track of "debt" for tokens that we fake, to make sure we end
  //    up balanced in the end.
  re.prototype.rewrite_closing_parens = function rewrite_closing_parens() {
    var __i, debt, key, stack, val;
    stack = [];
    debt = {
    };
    __i = INVERSES;
    for (key in __i) {
      val = __i[key];
      if (__hasProp.call(__i, key)) {
        ((debt[key] = 0));
      }
    }
    return this.scan_tokens((function(__this) {
      var __func = function(prev, token, post, i) {
        var inv, match, mtag, tag;
        tag = token[0];
        inv = INVERSES[token[0]];
        // Push openers onto the stack.
        if (EXPRESSION_START.indexOf(tag) >= 0) {
          stack.push(token);
          return 1;
          // The end of an expression, check stack and debt for a pair.
        } else if (EXPRESSION_TAIL.indexOf(tag) >= 0) {
          // If the tag is already in our debt, swallow it.
          if (debt[inv] > 0) {
            debt[inv] -= 1;
            this.tokens.splice(i, 1);
            return 0;
          } else {
            // Pop the stack of open delimiters.
            match = stack.pop();
            mtag = match[0];
            // Continue onwards if it's the expected tag.
            if (tag === INVERSES[mtag]) {
              return 1;
            } else {
              // Unexpected close, insert correct close, adding to the debt.
              debt[mtag] += 1;
              val = mtag === 'INDENT' ? match[1] : INVERSES[mtag];
              this.tokens.splice(i, 0, [INVERSES[mtag], val]);
              return 1;
            }
          }
        } else {
          return 1;
        }
      };
      return (function() {
        return __func.apply(__this, arguments);
      });
    })(this));
  };
 })();
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -66,6 +66,7 @@ lex::tokenize: (code) ->
  this.indent  : 0          # The current indent level.
  this.indents : []         # The stack of all indent levels we are currently within.
  this.tokens  : []         # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
  this.spaced  : null       # The last token that has a space following it.
  while this.i < this.code.length
    this.chunk: this.code.slice(this.i)
    this.extract_next_token()
@@ -192,7 +193,7 @@ lex::outdent_token: (move_out) ->
 # Matches and consumes non-meaningful whitespace.
 lex::whitespace_token: ->
  return false unless space: this.match WHITESPACE, 1
-  this.value().spaced: true
+  this.spaced: this.value()
  this.i += space.length
  true
@@ -216,7 +217,7 @@ lex::literal_token: ->
  this.tag_parameters() if value and value.match(CODE)
  value ||= this.chunk.substr(0, 1)
  tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
-  if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
+  if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
    tag: 'CALL_START'  if value is '('
    tag: 'INDEX_START' if value is '['
  this.token tag, value
@@ -272,6 +273,7 @@ lex::tag_parameters: ->
      when 'IDENTIFIER' then tok[0]: 'PARAM'
      when ')'          then tok[0]: 'PARAM_END'
      when '('          then return tok[0]: 'PARAM_START'
  true
 # Close up all remaining open blocks. IF the first token is an indent,
 # axe it.
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@@ -47,8 +47,8 @@ re::rewrite: (tokens) ->
  this.close_open_calls_and_indexes()
  this.add_implicit_parentheses()
  this.add_implicit_indentation()
-  # this.ensure_balance(BALANCED_PAIRS)
+  this.ensure_balance(BALANCED_PAIRS)
-  # this.rewrite_closing_parens()
+  this.rewrite_closing_parens()
  this.tokens
 # Rewrite the token stream, looking one token ahead and behind.
@@ -116,11 +116,11 @@ re::close_open_calls_and_indexes: ->
    switch token[0]
      when 'CALL_START'  then parens.push(0)
      when 'INDEX_START' then brackets.push(0)
-      when '('           then parens[-1] += 1
+      when '('           then parens[parens.length - 1] += 1
-      when '['           then brackets[-1] += 1
+      when '['           then brackets[brackets.length - 1] += 1
      when ')'
        if parens[parens.length - 1] is 0
-          parens.pop
+          parens.pop()
          token[0]: 'CALL_END'
        else
          parens[parens.length - 1] -= 1
@@ -181,42 +181,64 @@ re::add_implicit_indentation: ->
    this.tokens.splice(i, 1)
    return 0
 # Ensure that all listed pairs of tokens are correctly balanced throughout
 # the course of the token stream.
 re::ensure_balance: (pairs) ->
  levels: {}
  this.scan_tokens (prev, token, post, i) =>
    for pair in pairs
      [open, close]: pair
      levels[open] ||= 0
      levels[open] += 1 if token[0] is open
      levels[open] -= 1 if token[0] is close
      throw "too many " + token[1] if levels[open] < 0
    return 1
  unclosed: key for key, value of levels when value > 0
  throw "unclosed " + unclosed[0] if unclosed.length
-
+# We'd like to support syntax like this:
-
+#    el.click((event) ->
-
+#      el.hide())
-
+# In order to accomplish this, move outdents that follow closing parens
-
+# inwards, safely. The steps to accomplish this are:
-
+#
-
+# 1. Check that all paired tokens are balanced and in order.
-
+# 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
-
+#    to the stack. If you see an ')' or OUTDENT, pop the stack and replace
-
+#    it with the inverse of what we've just popped.
-
+# 3. Keep track of "debt" for tokens that we fake, to make sure we end
-
+#    up balanced in the end.
-
+#
-
+re::rewrite_closing_parens: ->
-
+  stack: []
-
+  debt:  {}
-
+  (debt[key]: 0) for key, val of INVERSES
-
+  this.scan_tokens (prev, token, post, i) =>
-
+    tag: token[0]
-
+    inv: INVERSES[token[0]]
-
+    # Push openers onto the stack.
-
+    if EXPRESSION_START.indexOf(tag) >= 0
-
+      stack.push(token)
-
+      return 1
-
+      # The end of an expression, check stack and debt for a pair.
-
+    else if EXPRESSION_TAIL.indexOf(tag) >= 0
-
+      # If the tag is already in our debt, swallow it.
-
+      if debt[inv] > 0
-
+        debt[inv] -= 1
-
+        this.tokens.splice(i, 1)
-
+        return 0
-
+      else
-
+        # Pop the stack of open delimiters.
-
+        match: stack.pop()
-
+        mtag:  match[0]
-
+        # Continue onwards if it's the expected tag.
-
+        if tag is INVERSES[mtag]
-
+          return 1
        else
          # Unexpected close, insert correct close, adding to the debt.
          debt[mtag] += 1
          val: if mtag is 'INDENT' then match[1] else INVERSES[mtag]
          this.tokens.splice(i, 0, [INVERSES[mtag], val])
          return 1
    else
      return 1