From 83fd84745d18f9ee9c902407ae821348d888f599 Mon Sep 17 00:00:00 2001
From: Stan Angeloff <insaned@abv.bg>
Date: Sat, 6 Mar 2010 22:16:37 +0200
Subject: [PATCH] Rewriting string tokenizer; allowing nested double-quoted
 strings inside expression interpolations.

---
 lib/lexer.js                          | 63 +++++++++++++++++++++++++--
 src/lexer.coffee                      | 32 +++++++++++++-
 test/test_string_interpolation.coffee |  5 +++
 3 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/lib/lexer.js b/lib/lexer.js
index 35fe07d4..ef182c6a 100644
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, compact, count, include;
+  var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
   // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
   // matches against the beginning of the source code. When a match is found,
   // a token is produced, we consume the match, and start again. Tokens are in the
@@ -33,7 +33,6 @@
   // Token matching regexes.
   IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
   NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
-  STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
   HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
   INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
   JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
@@ -169,7 +168,11 @@
     // Matches strings, including multi-line strings.
     Lexer.prototype.string_token = function string_token() {
       var string;
-      if (!((string = this.match(STRING, 1)))) {
+      string = this.balanced_group(['"'], ['${', '}']);
+      if (string === false) {
+        string = this.balanced_group(["'"]);
+      }
+      if (!(string)) {
         return false;
       }
       this.interpolate_string(string.replace(STRING_NEWLINES, " \\\n"));
@@ -212,6 +215,60 @@
       this.i += regex.length;
       return true;
     };
+    // Matches a balanced group such as a single or double-quoted string.
+    Lexer.prototype.balanced_group = function balanced_group() {
+      var _a, _b, _c, _d, _e, _f, delimited, each, escaped, i, levels, next, type;
+      delimited = Array.prototype.slice.call(arguments, 0);
+      _a = delimited;
+      for (_b = 0, _c = _a.length; _b < _c; _b++) {
+        each = _a[_b];
+        !(typeof (_d = each[1]) !== "undefined" && _d !== null) ? ((each[1] = each[0])) : null;
+      }
+      escaped = '\\';
+      next = (function(__this) {
+        var __func = function(length) {
+          return this.chunk.substring(i, i + length);
+        };
+        return (function next() {
+          return __func.apply(__this, arguments);
+        });
+      })(this);
+      levels = [];
+      i = 0;
+      while (i < this.chunk.length) {
+        if (next(1) === escaped) {
+          i += 1;
+        } else {
+          _e = delimited;
+          for (type = 0, _f = _e.length; type < _f; type++) {
+            each = _e[type];
+            if (levels.length && next(each[1].length) === each[1] && levels[levels.length - 1] === type) {
+              levels.pop();
+              i += each[1].length - 1;
+              if (!(levels.length)) {
+                i += 1;
+              }
+              break;
+            } else if (next(each[0].length) === each[0]) {
+              levels.push(type);
+              i += each[0].length - 1;
+              break;
+            }
+          }
+        }
+        if (!(levels.length)) {
+          break;
+        }
+        i += 1;
+      }
+      if (levels.length) {
+        throw new Error("SyntaxError: Unterminated " + (delimited[levels.pop()][0]) + " starting on line " + this.line);
+      }
+      if (i === 0) {
+        return false;
+      }
+      return this.chunk.substring(0, i);
+    };
     // Matches and conumes comments.
     Lexer.prototype.comment_token = function comment_token() {
       var comment, lines;
diff --git a/src/lexer.coffee b/src/lexer.coffee
index e88db1a1..4fd3bb60 100644
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -58,7 +58,6 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
 # Token matching regexes.
 IDENTIFIER    : /^([a-zA-Z$_](\w|\$)*)/
 NUMBER        : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
-STRING        : /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/
 HEREDOC       : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
 INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
 JS            : /^(``|`([\s\S]*?)([^\\]|\\\\)`)/
@@ -167,7 +166,9 @@ exports.Lexer: class Lexer
 
   # Matches strings, including multi-line strings.
   string_token: ->
-    return false unless string: @match STRING, 1
+    string: @balanced_group ['"'], ['${', '}']
+    string: @balanced_group ["'"] if string is false
+    return false unless string
     @interpolate_string string.replace STRING_NEWLINES, " \\\n"
     @line += count string, "\n"
     @i += string.length
@@ -197,6 +198,33 @@ exports.Lexer: class Lexer
     @i += regex.length
     true
 
+  # Matches a balanced group such as a single or double-quoted string.
+  balanced_group: (delimited...) ->
+    (each[1]: each[0]) for each in delimited when not each[1]?
+    escaped: '\\'
+    next: (length) => @chunk.substring i, i + length
+    levels: []
+    i: 0
+    while i < @chunk.length
+      if next(1) is escaped
+        i += 1
+      else
+        for each, type in delimited
+          if levels.length and next(each[1].length) is each[1] and levels[levels.length - 1] is type
+            levels.pop()
+            i += each[1].length - 1
+            i += 1 unless levels.length
+            break
+          else if next(each[0].length) is each[0]
+            levels.push(type)
+            i += each[0].length - 1
+            break
+      break unless levels.length
+      i += 1
+    throw new Error "SyntaxError: Unterminated ${delimited[levels.pop()][0]} starting on line $@line" if levels.length
+    return false if i is 0
+    return @chunk.substring(0, i)
+
   # Matches and conumes comments.
   comment_token: ->
     return false unless comment: @match COMMENT, 1
diff --git a/test/test_string_interpolation.coffee b/test/test_string_interpolation.coffee
index 526ca24d..bde36f56 100644
--- a/test/test_string_interpolation.coffee
+++ b/test/test_string_interpolation.coffee
@@ -48,3 +48,8 @@ obj: {
   hi: -> "Hello $@name."
 }
 ok obj.hi() is "Hello Joe."
+
+ok "I can has ${"cheeze"}" is 'I can has cheeze'
+ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'
+
+ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'