minor cleanups to balanced_group -> balanced_token, removing optional escaper (unused), and using it to implement interpolated javascript.

2026-05-03 03:00:14 -04:00 · 2010-03-06 16:06:47 -05:00
parent f9cde1b46d
commit c4ad6d1ee6
2 changed files with 38 additions and 46 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
+  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
  // a token is produced, we consume the match, and start again. Tokens are in the
@@ -35,7 +35,6 @@
  NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
  HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
  INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
-  JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
  OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
  WHITESPACE = /^([ \t]+)/;
  COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
@@ -77,12 +76,13 @@
    Lexer = function Lexer() {    };
    // Scan by attempting to match tokens one at a time. Slow and steady.
    Lexer.prototype.tokenize = function tokenize(code, options) {
-      options = options || {};
+      var o;
+      o = options || {};
      this.code = code;
      // The remainder of the source code.
      this.i = 0;
      // Current character position we're parsing.
-      this.line = 0;
+      this.line = o.line || 0;
      // The current line.
      this.indent = 0;
      // The current indent level.
@@ -95,7 +95,7 @@
        this.extract_next_token();
      }
      this.close_indentation();
-      if (options.rewrite === false) {
+      if (o.rewrite === false) {
        return this.tokens;
      }
      return (new Rewriter()).rewrite(this.tokens);
@@ -168,9 +168,9 @@
    // Matches strings, including multi-line strings.
    Lexer.prototype.string_token = function string_token() {
      var string;
-      string = this.balanced_group(['"'], ['${', '}']);
+      string = this.balanced_token(['"', '"'], ['${', '}']);
      if (string === false) {
-        string = this.balanced_group(["'"]);
+        string = this.balanced_token(["'", "'"]);
      }
      if (!(string)) {
        return false;
@@ -195,7 +195,7 @@
    // Matches interpolated JavaScript.
    Lexer.prototype.js_token = function js_token() {
      var script;
-      if (!((script = this.match(JS, 1)))) {
+      if (!((script = this.balanced_token(['`', '`'])))) {
        return false;
      }
      this.token('JS', script.replace(JS_CLEANER, ''));
@@ -215,28 +215,20 @@
      this.i += regex.length;
      return true;
    };
-    // Matches a balanced group such as a single or double-quoted string.
-    Lexer.prototype.balanced_group = function balanced_group() {
-      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, delimited, each, i, levels, type;
+    // Matches a balanced group such as a single or double-quoted string. Pass in
+    // a series of delimiters, all of which must be balanced correctly within the
+    // token's contents.
+    Lexer.prototype.balanced_token = function balanced_token() {
+      var _a, _b, delimited, each, i, levels, type;
      delimited = Array.prototype.slice.call(arguments, 0);
-      _a = delimited;
-      for (_b = 0, _c = _a.length; _b < _c; _b++) {
-        each = _a[_b];
-        !(typeof (_d = each[1]) !== "undefined" && _d !== null) ? ((each[1] = each[0])) : null;
-      }
-      _e = delimited;
-      for (_f = 0, _g = _e.length; _f < _g; _f++) {
-        each = _e[_f];
-        !(typeof (_h = each[2]) !== "undefined" && _h !== null) ? ((each[2] = '\\')) : null;
-      }
      levels = [];
      i = 0;
      while (i < this.chunk.length) {
-        _i = delimited;
-        for (type = 0, _j = _i.length; type < _j; type++) {
-          each = _i[type];
-          if (each[2] !== false && this.chunk.substring(i, i + each[2].length) === each[2]) {
-            i += each[2].length;
+        _a = delimited;
+        for (type = 0, _b = _a.length; type < _b; type++) {
+          each = _a[type];
+          if (levels.length && this.chunk.substring(i, i + 1) === '\\') {
+            i += 1;
            break;
          } else if (levels.length && this.chunk.substring(i, i + each[1].length) === each[1] && levels[levels.length - 1] === type) {
            levels.pop();
@@ -485,7 +477,8 @@
              if (interp.substring(0, 1) === '{') {
                inner = interp.substring(1, interp.length - 1);
                nested = lexer.tokenize("(" + inner + ")", {
-                  rewrite: false
+                  rewrite: false,
+                  line: this.line
                });
                nested.pop();
                tokens.push(['TOKENS', nested]);
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -60,7 +60,6 @@ IDENTIFIER    : /^([a-zA-Z$_](\w|\$)*)/
 NUMBER        : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
 HEREDOC       : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
 INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
-JS            : /^(``|`([\s\S]*?)([^\\]|\\\\)`)/
 OPERATOR      : /^([+\*&|\/\-%=<>:!?]+)/
 WHITESPACE    : /^([ \t]+)/
 COMMENT       : /^(((\n?[ \t]*)?#[^\n]*)+)/
@@ -113,18 +112,18 @@ exports.Lexer: class Lexer

  # Scan by attempting to match tokens one at a time. Slow and steady.
  tokenize: (code, options) ->
-    options  ||= {}
-    @code    : code  # The remainder of the source code.
-    @i       : 0     # Current character position we're parsing.
-    @line    : 0     # The current line.
-    @indent  : 0     # The current indent level.
-    @indents : []    # The stack of all indent levels we are currently within.
-    @tokens  : []    # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
+    o        : options or {}
+    @code    : code         # The remainder of the source code.
+    @i       : 0            # Current character position we're parsing.
+    @line    : o.line or 0  # The current line.
+    @indent  : 0            # The current indent level.
+    @indents : []           # The stack of all indent levels we are currently within.
+    @tokens  : []           # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
    while @i < @code.length
      @chunk: @code.slice(@i)
      @extract_next_token()
    @close_indentation()
-    return @tokens if options.rewrite is no
+    return @tokens if o.rewrite is no
    (new Rewriter()).rewrite @tokens

  # At every position, run through this list of attempted matches,
@@ -166,8 +165,8 @@ exports.Lexer: class Lexer

  # Matches strings, including multi-line strings.
  string_token: ->
-    string: @balanced_group ['"'], ['${', '}']
-    string: @balanced_group ["'"] if string is false
+    string: @balanced_token ['"', '"'], ['${', '}']
+    string: @balanced_token ["'", "'"] if string is false
    return false unless string
    @interpolate_string string.replace STRING_NEWLINES, " \\\n"
    @line += count string, "\n"
@@ -185,7 +184,7 @@ exports.Lexer: class Lexer

  # Matches interpolated JavaScript.
  js_token: ->
-    return false unless script: @match JS, 1
+    return false unless script: @balanced_token ['`', '`']
    @token 'JS', script.replace(JS_CLEANER, '')
    @i += script.length
    true
@@ -198,16 +197,16 @@ exports.Lexer: class Lexer
    @i += regex.length
    true

-  # Matches a balanced group such as a single or double-quoted string.
-  balanced_group: (delimited...) ->
-    (each[1]: each[0]) for each in delimited when not each[1]?
-    (each[2]: '\\') for each in delimited when not each[2]?
+  # Matches a balanced group such as a single or double-quoted string. Pass in
+  # a series of delimiters, all of which must be balanced correctly within the
+  # token's contents.
+  balanced_token: (delimited...) ->
    levels: []
    i: 0
    while i < @chunk.length
      for each, type in delimited
-        if each[2] isnt false and @chunk.substring(i, i + each[2].length) is each[2]
-          i += each[2].length
+        if levels.length and @chunk.substring(i, i + 1) is '\\'
+          i += 1
          break
        else if levels.length and @chunk.substring(i, i + each[1].length) is each[1] and levels[levels.length - 1] is type
          levels.pop()
@@ -394,7 +393,7 @@ exports.Lexer: class Lexer
            tokens.push ['STRING', "$quote$before$quote"] if before.length
            if interp.substring(0, 1) is '{'
              inner: interp.substring(1, interp.length - 1)
-              nested: lexer.tokenize "($inner)", {rewrite: no}
+              nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
              nested.pop()
              tokens.push ['TOKENS', nested]
            else