unary-new: merged master

2026-02-19 03:44:23 -05:00 · 2010-09-27 01:22:33 +09:00
parent 0b3b0ab68b ecb23d15c4
commit db531495b8
21 changed files with 383 additions and 344 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -3,7 +3,7 @@
  var __slice = Array.prototype.slice;
  _ref = require('./rewriter');
  Rewriter = _ref.Rewriter;
-  _ref = require('./helpers').helpers;
+  _ref = require('./helpers');
  include = _ref.include;
  count = _ref.count;
  starts = _ref.starts;
@@ -23,7 +23,7 @@
      this.indents = [];
      this.tokens = [];
      while ((this.chunk = code.slice(this.i))) {
-        this.extractNextToken();
+        this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
      }
      this.closeIndentation();
      if (o.rewrite === false) {
@@ -31,20 +31,18 @@
      }
      return (new Rewriter).rewrite(this.tokens);
    };
-    Lexer.prototype.extractNextToken = function() {
-      return this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
-    };
    Lexer.prototype.identifierToken = function() {
-      var closeIndex, forcedIdentifier, id, tag;
-      if (!(id = this.match(IDENTIFIER))) {
+      var closeIndex, forcedIdentifier, id, match, tag;
+      if (!(match = IDENTIFIER.exec(this.chunk))) {
        return false;
      }
+      id = match[0];
      this.i += id.length;
      if (id === 'all' && this.tag() === 'FOR') {
        this.token('ALL', id);
        return true;
      }
-      forcedIdentifier = this.tagAccessor() || this.match(ASSIGNED, 1);
+      forcedIdentifier = this.tagAccessor() || ASSIGNED.test(this.chunk);
      tag = 'IDENTIFIER';
      if (include(JS_KEYWORDS, id) || !forcedIdentifier && include(COFFEE_KEYWORDS, id)) {
        tag = id.toUpperCase();
@@ -86,10 +84,11 @@
      return true;
    };
    Lexer.prototype.numberToken = function() {
-      var number;
-      if (!(number = this.match(NUMBER))) {
+      var match, number;
+      if (!(match = NUMBER.exec(this.chunk))) {
        return false;
      }
+      number = match[0];
      if (this.tag() === '.' && number.charAt(0) === '.') {
        return false;
      }
@@ -98,19 +97,19 @@
      return true;
    };
    Lexer.prototype.stringToken = function() {
-      var string;
+      var match, string;
      switch (this.chunk.charAt(0)) {
        case "'":
-          if (!(string = this.match(SIMPLESTR))) {
+          if (!(match = SIMPLESTR.exec(this.chunk))) {
            return false;
          }
-          this.token('STRING', string.replace(MULTILINER, '\\\n'));
+          this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
          break;
        case '"':
          if (!(string = this.balancedToken(['"', '"'], ['#{', '}']))) {
            return false;
          }
-          this.interpolateString(string.replace(MULTILINER, '\\\n'));
+          this.interpolateString(string);
          break;
        default:
          return false;
@@ -127,7 +126,8 @@
      heredoc = match[0];
      quote = heredoc.charAt(0);
      doc = this.sanitizeHeredoc(match[2], {
-        quote: quote
+        quote: quote,
+        indent: null
      });
      this.interpolateString(quote + doc + quote, {
        heredoc: true
@@ -156,11 +156,11 @@
      return true;
    };
    Lexer.prototype.jsToken = function() {
-      var script;
-      if (!(this.chunk.charAt(0) === '`' && (script = this.match(JSTOKEN)))) {
+      var match, script;
+      if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
        return false;
      }
-      this.token('JS', script.slice(1, -1));
+      this.token('JS', (script = match[0]).slice(1, -1));
      this.i += script.length;
      return true;
    };
@@ -205,16 +205,17 @@
      return this.balancedString(this.chunk, delimited);
    };
    Lexer.prototype.lineToken = function() {
-      var diff, indent, nextCharacter, noNewlines, prev, size;
-      if (!(indent = this.match(MULTI_DENT))) {
+      var diff, indent, match, nextCharacter, noNewlines, prev, size;
+      if (!(match = MULTI_DENT.exec(this.chunk))) {
        return false;
      }
+      indent = match[0];
      this.line += count(indent, '\n');
      this.i += indent.length;
      prev = this.prev(2);
      size = indent.length - 1 - indent.lastIndexOf('\n');
-      nextCharacter = this.match(NEXT_CHARACTER, 1);
-      noNewlines = nextCharacter === '.' || nextCharacter === ',' || this.unfinished();
+      nextCharacter = NEXT_CHARACTER.exec(this.chunk)[1];
+      noNewlines = (('.' === nextCharacter || ',' === nextCharacter)) || this.unfinished();
      if (size - this.indebt === this.indent) {
        if (noNewlines) {
          return this.suppressNewlines();
@@ -265,15 +266,15 @@
      return true;
    };
    Lexer.prototype.whitespaceToken = function() {
-      var prev, space;
-      if (!(space = this.match(WHITESPACE))) {
+      var match, prev;
+      if (!(match = WHITESPACE.exec(this.chunk))) {
        return false;
      }
      prev = this.prev();
      if (prev) {
        prev.spaced = true;
      }
-      this.i += space.length;
+      this.i += match[0].length;
      return true;
    };
    Lexer.prototype.newlineToken = function(newlines) {
@@ -369,25 +370,32 @@
      return accessor ? 'accessor' : false;
    };
    Lexer.prototype.sanitizeHeredoc = function(doc, options) {
-      var _ref2, attempt, indent, match;
-      indent = options.indent;
-      if (options.herecomment && !include(doc, '\n')) {
+      var _ref2, attempt, herecomment, indent, match;
+      _ref2 = options;
+      indent = _ref2.indent;
+      herecomment = _ref2.herecomment;
+      if (herecomment && !include(doc, '\n')) {
        return doc;
      }
-      if (!(options.herecomment)) {
+      if (!(herecomment)) {
        while ((match = HEREDOC_INDENT.exec(doc))) {
-          attempt = (typeof (_ref2 = match[1]) !== "undefined" && _ref2 !== null) ? match[1] : match[2];
-          if (!(typeof indent !== "undefined" && indent !== null) || (0 < attempt.length) && (attempt.length < indent.length)) {
+          attempt = match[1];
+          if (indent === null || (0 < attempt.length) && (attempt.length < indent.length)) {
            indent = attempt;
          }
        }
      }
-      indent || (indent = '');
-      doc = doc.replace(new RegExp('^' + indent, 'gm'), '');
-      if (options.herecomment) {
+      if (indent) {
+        doc = doc.replace(RegExp("\\n" + (indent), "g"), '\n');
+      }
+      if (herecomment) {
        return doc;
      }
-      return doc.replace(/^\n/, '').replace(MULTILINER, '\\n').replace(new RegExp(options.quote, 'g'), "\\" + (options.quote));
+      doc = doc.replace(/^\n/, '').replace(RegExp("" + (options.quote), "g"), '\\$&');
+      if (options.quote === "'") {
+        doc = this.escapeLines(doc, true);
+      }
+      return doc;
    };
    Lexer.prototype.tagParameters = function() {
      var i, tok;
@@ -469,83 +477,86 @@
      return !i ? false : str.slice(0, i);
    };
    Lexer.prototype.interpolateString = function(str, options) {
-      var _len, _ref2, _ref3, end, escaped, expr, i, idx, inner, interpolated, lexer, nested, pi, quote, tag, tok, token, tokens, value;
-      options || (options = {});
-      if (str.length < 3 || str.charAt(0) !== '"') {
+      var _len, _ref2, _ref3, end, escapeQuotes, escaped, expr, heredoc, i, idx, inner, interpolated, lexer, nested, pi, push, quote, s, tag, tok, token, tokens, value;
+      _ref2 = options || {};
+      heredoc = _ref2.heredoc;
+      escapeQuotes = _ref2.escapeQuotes;
+      quote = str.charAt(0);
+      if (quote !== '"' || str.length < 3) {
        return this.token('STRING', str);
-      } else {
-        lexer = new Lexer;
-        tokens = [];
-        quote = str.charAt(0);
-        _ref2 = [1, 1];
-        i = _ref2[0];
-        pi = _ref2[1];
-        end = str.length - 1;
-        while (i < end) {
-          if (str.charAt(i) === '\\') {
-            i += 1;
-          } else if (expr = this.balancedString(str.slice(i), [['#{', '}']])) {
-            if (pi < i) {
-              tokens.push(['STRING', quote + str.slice(pi, i) + quote]);
-            }
-            inner = expr.slice(2, -1);
-            if (inner.length) {
-              if (options.heredoc) {
-                inner = inner.replace(new RegExp('\\\\' + quote, 'g'), quote);
-              }
-              nested = lexer.tokenize("(" + (inner) + ")", {
-                line: this.line
-              });
-              _ref2 = nested;
-              for (idx = 0, _len = _ref2.length; idx < _len; idx++) {
-                tok = _ref2[idx];
-                if (tok[0] === 'CALL_END') {
-                  (tok[0] = ')');
-                }
-              }
-              nested.pop();
-              tokens.push(['TOKENS', nested]);
-            } else {
-              tokens.push(['STRING', quote + quote]);
-            }
-            i += expr.length - 1;
-            pi = i + 1;
-          }
-          i += 1;
-        }
-        if ((i > pi) && (pi < str.length - 1)) {
-          tokens.push(['STRING', quote + str.slice(pi, i) + quote]);
-        }
-        if (tokens[0][0] !== 'STRING') {
-          tokens.unshift(['STRING', '""']);
-        }
-        interpolated = tokens.length > 1;
-        if (interpolated) {
-          this.token('(', '(');
-        }
-        _ref2 = tokens;
-        for (i = 0, _len = _ref2.length; i < _len; i++) {
-          token = _ref2[i];
-          _ref3 = token;
-          tag = _ref3[0];
-          value = _ref3[1];
-          if (tag === 'TOKENS') {
-            this.tokens = this.tokens.concat(value);
-          } else if (tag === 'STRING' && options.escapeQuotes) {
-            escaped = value.slice(1, -1).replace(/"/g, '\\"');
-            this.token(tag, "\"" + (escaped) + "\"");
-          } else {
-            this.token(tag, value);
-          }
-          if (i < tokens.length - 1) {
-            this.token('+', '+');
-          }
-        }
-        if (interpolated) {
-          this.token(')', ')');
-        }
-        return tokens;
      }
+      lexer = new Lexer;
+      tokens = [];
+      i = (pi = 1);
+      end = str.length - 1;
+      while (i < end) {
+        if (str.charAt(i) === '\\') {
+          i += 1;
+        } else if (expr = this.balancedString(str.slice(i), [['#{', '}']])) {
+          if (pi < i) {
+            s = quote + this.escapeLines(str.slice(pi, i), heredoc) + quote;
+            tokens.push(['STRING', s]);
+          }
+          inner = expr.slice(2, -1).replace(/^[ \t]*\n/, '');
+          if (inner.length) {
+            if (heredoc) {
+              inner = inner.replace(RegExp('\\\\' + quote, 'g'), quote);
+            }
+            nested = lexer.tokenize("(" + (inner) + ")", {
+              line: this.line
+            });
+            _ref2 = nested;
+            for (idx = 0, _len = _ref2.length; idx < _len; idx++) {
+              tok = _ref2[idx];
+              if (tok[0] === 'CALL_END') {
+                (tok[0] = ')');
+              }
+            }
+            nested.pop();
+            tokens.push(['TOKENS', nested]);
+          } else {
+            tokens.push(['STRING', quote + quote]);
+          }
+          i += expr.length - 1;
+          pi = i + 1;
+        }
+        i += 1;
+      }
+      if ((i > pi) && (pi < str.length - 1)) {
+        s = str.slice(pi, i).replace(MULTILINER, heredoc ? '\\n' : '');
+        tokens.push(['STRING', quote + s + quote]);
+      }
+      if (tokens[0][0] !== 'STRING') {
+        tokens.unshift(['STRING', '""']);
+      }
+      interpolated = tokens.length > 1;
+      if (interpolated) {
+        this.token('(', '(');
+      }
+      _ref2 = tokens;
+      push = _ref2.push;
+      _ref2 = tokens;
+      for (i = 0, _len = _ref2.length; i < _len; i++) {
+        token = _ref2[i];
+        _ref3 = token;
+        tag = _ref3[0];
+        value = _ref3[1];
+        if (tag === 'TOKENS') {
+          push.apply(this.tokens, value);
+        } else if (tag === 'STRING' && escapeQuotes) {
+          escaped = value.slice(1, -1).replace(/"/g, '\\"');
+          this.token(tag, "\"" + (escaped) + "\"");
+        } else {
+          this.token(tag, value);
+        }
+        if (i < tokens.length - 1) {
+          this.token('+', '+');
+        }
+      }
+      if (interpolated) {
+        this.token(')', ')');
+      }
+      return tokens;
    };
    Lexer.prototype.token = function(tag, value) {
      return this.tokens.push([tag, value, this.line]);
@@ -573,15 +584,12 @@
    Lexer.prototype.prev = function(index) {
      return this.tokens[this.tokens.length - (index || 1)];
    };
-    Lexer.prototype.match = function(regex, index) {
-      var m;
-      return (m = this.chunk.match(regex)) ? m[index || 0] : false;
-    };
    Lexer.prototype.unfinished = function() {
      var prev, value;
-      prev = this.prev(2);
-      value = this.value();
-      return value && NO_NEWLINE.test(value) && prev && prev[0] !== '.' && !CODE.test(value) && !ASSIGNED.test(this.chunk);
+      return (prev = this.prev(2)) && prev[0] !== '.' && (value = this.value()) && NO_NEWLINE.test(value) && !CODE.test(value) && !ASSIGNED.test(this.chunk);
+    };
+    Lexer.prototype.escapeLines = function(str, heredoc) {
+      return str.replace(MULTILINER, heredoc ? '\\n' : '');
    };
    return Lexer;
  })();
@@ -591,11 +599,11 @@
  RESERVED = ['case', 'default', 'do', 'function', 'var', 'void', 'with', 'const', 'let', 'enum', 'export', 'import', 'native', '__hasProp', '__extends', '__slice'];
  JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED);
  IDENTIFIER = /^[a-zA-Z_$][\w$]*/;
-  NUMBER = /^(?:0x[\da-f]+)|^(?:\d+(\.\d+)?|\.\d+)(?:e[+-]?\d+)?/i;
+  NUMBER = /^0x[\da-f]+|^(?:\d+(\.\d+)?|\.\d+)(?:e[+-]?\d+)?/i;
  HEREDOC = /^("""|''')([\s\S]*?)\n?[ \t]*\1/;
  OPERATOR = /^(?:-[-=>]?|\+[+=]?|[*&|\/%=<>^:!?]+)(?=([ \t]*))/;
  WHITESPACE = /^[ \t]+/;
-  COMMENT = /^###([^#][\s\S]*?)(?:###[ \t]*\n|(?:###)?$)|^(?:\s*#(?!##[^#])[^\n]*)+/;
+  COMMENT = /^###([^#][\s\S]*?)(?:###[ \t]*\n|(?:###)?$)|^(?:\s*#(?!##[^#]).*)+/;
  CODE = /^[-=]>/;
  MULTI_DENT = /^(?:\n[ \t]*)+/;
  SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
@@ -606,9 +614,9 @@
  REGEX_ESCAPE = /\\[^#]/g;
  MULTILINER = /\n/g;
  NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;
-  HEREDOC_INDENT = /\n+([ \t]*)|^([ \t]+)/g;
-  ASSIGNED = /^\s*((?:[a-zA-Z$_@]\w*|["'][^\n]+?["']|\d+)[ \t]*?[:=][^:=])/;
-  NEXT_CHARACTER = /^\s*(\S)/;
+  HEREDOC_INDENT = /\n+([ \t]*)/g;
+  ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
+  NEXT_CHARACTER = /^\s*(\S?)/;
  COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|='];
  UNARY = ['UMINUS', 'UPLUS', '!', '!!', '~', 'NEW', 'TYPEOF', 'DELETE'];
  LOGIC = ['&', '|', '^', '&&', '||'];