From f74fae58e3833d87a92ed642019425056f8c24f5 Mon Sep 17 00:00:00 2001
From: Stan Angeloff <insaned@abv.bg>
Date: Sun, 7 Mar 2010 14:56:27 +0200
Subject: [PATCH] Rewritting lexer.coffee to accept nested string
 interpolations.

---
 lib/lexer.js                          | 83 +++++++++++++++------------
 src/lexer.coffee                      | 60 ++++++++++---------
 test/test_string_interpolation.coffee | 30 ++++++----
 3 files changed, 99 insertions(+), 74 deletions(-)

diff --git a/lib/lexer.js b/lib/lexer.js
index 1d850d05..31ae0f6e 100644
--- a/lib/lexer.js
+++ b/lib/lexer.js
@@ -34,7 +34,7 @@
   IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
   NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
   HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
-  INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
+  INTERPOLATION = /^\$([a-zA-Z_@]\w*)/;
   OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
   WHITESPACE = /^([ \t]+)/;
   COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
@@ -217,30 +217,30 @@
     };
     // Matches a balanced group such as a single or double-quoted string. Pass in
     // a series of delimiters, all of which must be balanced correctly within the
-    // token's contents.
-    Lexer.prototype.balanced_token = function balanced_token() {
+    // string.
+    Lexer.prototype.balanced_string = function balanced_string(str) {
       var _a, _b, _c, _d, close, delimited, i, levels, open, pair;
-      delimited = Array.prototype.slice.call(arguments, 0);
+      delimited = Array.prototype.slice.call(arguments, 1);
       levels = [];
       i = 0;
-      while (i < this.chunk.length) {
+      while (i < str.length) {
         _a = delimited;
         for (_b = 0, _c = _a.length; _b < _c; _b++) {
           pair = _a[_b];
           _d = pair;
           open = _d[0];
           close = _d[1];
-          if (levels.length && starts(this.chunk, '\\', i)) {
+          if (levels.length && starts(str, '\\', i)) {
             i += 1;
             break;
-          } else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) {
+          } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) {
             levels.pop();
             i += close.length - 1;
             if (!(levels.length)) {
               i += 1;
             }
             break;
-          } else if (starts(this.chunk, open, i)) {
+          } else if (starts(str, open, i)) {
             levels.push(pair);
             i += open.length - 1;
             break;
@@ -257,7 +257,13 @@
       if (i === 0) {
         return false;
       }
-      return this.chunk.substring(0, i);
+      return str.substring(0, i);
+    };
+    // Matches a balanced string within the token's contents.
+    Lexer.prototype.balanced_token = function balanced_token() {
+      var delimited;
+      delimited = Array.prototype.slice.call(arguments, 0);
+      return this.balanced_string.apply(this, [this.chunk].concat(delimited));
     };
     // Matches and conumes comments.
     Lexer.prototype.comment_token = function comment_token() {
@@ -453,50 +459,55 @@
     //     "Hello $name."
     //     "Hello ${name.capitalize()}."
     Lexer.prototype.interpolate_string = function interpolate_string(str) {
-      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens;
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens;
       if (str.length < 3 || !starts(str, '"')) {
         return this.token('STRING', str);
       } else {
         lexer = new Lexer();
         tokens = [];
         quote = str.substring(0, 1);
-        str = str.substring(1, str.length - 1);
-        while (str.length) {
-          match = str.match(INTERPOLATION);
-          if (match) {
-            _a = match;
-            group = _a[0];
-            before = _a[1];
-            interp = _a[2];
-            if (starts(before, '\\', before.length - 1)) {
-              prev = before.substring(0, before.length - 1);
-              if (before.length) {
-                tokens.push(['STRING', quote + prev + "$" + interp + quote]);
+        i = 1;
+        last_i = i;
+        while (i < str.length - 1) {
+          if (starts(str, '\\', i)) {
+            i += 1;
+          } else {
+            match = str.substring(i).match(INTERPOLATION);
+            if (match) {
+              _a = match;
+              group = _a[0];
+              interp = _a[1];
+              if (starts(interp, '@')) {
+                interp = "this." + (interp.substring(1));
               }
+              if (last_i < i) {
+                tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
+              }
+              tokens.push(['IDENTIFIER', interp]);
+              i += group.length - 1;
+              last_i = i + 1;
             } else {
-              if (before.length) {
-                tokens.push(['STRING', quote + before + quote]);
-              }
-              if (starts(interp, '{')) {
-                inner = interp.substring(1, interp.length - 1);
+              expression = this.balanced_string(str.substring(i), ['${', '}']);
+              if (expression && expression.length > 3) {
+                inner = expression.substring(2, expression.length - 1);
                 nested = lexer.tokenize("(" + inner + ")", {
                   rewrite: false,
                   line: this.line
                 });
                 nested.pop();
-                tokens.push(['TOKENS', nested]);
-              } else {
-                if (starts(interp, '@')) {
-                  interp = "this." + (interp.substring(1));
+                if (last_i < i) {
+                  tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
                 }
-                tokens.push(['IDENTIFIER', interp]);
+                tokens.push(['TOKENS', nested]);
+                i += expression.length - 1;
+                last_i = i + 1;
               }
             }
-            str = str.substring(group.length);
-          } else {
-            tokens.push(['STRING', quote + str + quote]);
-            str = '';
           }
+          i += 1;
+        }
+        if (last_i < i && last_i < str.length - 1) {
+          tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]);
         }
         if (tokens.length > 1) {
           _d = tokens.length - 1; _e = 1;
diff --git a/src/lexer.coffee b/src/lexer.coffee
index fb6e4fb2..dec23165 100644
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@@ -59,7 +59,7 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED
 IDENTIFIER    : /^([a-zA-Z$_](\w|\$)*)/
 NUMBER        : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
 HEREDOC       : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
-INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
+INTERPOLATION : /^\$([a-zA-Z_@]\w*)/
 OPERATOR      : /^([+\*&|\/\-%=<>:!?]+)/
 WHITESPACE    : /^([ \t]+)/
 COMMENT       : /^(((\n?[ \t]*)?#[^\n]*)+)/
@@ -199,22 +199,22 @@ exports.Lexer: class Lexer
 
   # Matches a balanced group such as a single or double-quoted string. Pass in
   # a series of delimiters, all of which must be balanced correctly within the
-  # token's contents.
-  balanced_token: (delimited...) ->
+  # string.
+  balanced_string: (str, delimited...) ->
     levels: []
     i: 0
-    while i < @chunk.length
+    while i < str.length
       for pair in delimited
         [open, close]: pair
-        if levels.length and starts @chunk, '\\', i
+        if levels.length and starts str, '\\', i
           i += 1
           break
-        else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair
+        else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair
           levels.pop()
           i += close.length - 1
           i += 1 unless levels.length
           break
-        else if starts @chunk, open, i
+        else if starts str, open, i
           levels.push(pair)
           i += open.length - 1
           break
@@ -222,7 +222,11 @@ exports.Lexer: class Lexer
       i += 1
     throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length
     return false if i is 0
-    return @chunk.substring(0, i)
+    return str.substring(0, i)
+
+  # Matches a balanced string within the token's contents.
+  balanced_token: (delimited...) ->
+    @balanced_string @chunk, delimited...
 
   # Matches and conumes comments.
   comment_token: ->
@@ -382,28 +386,32 @@ exports.Lexer: class Lexer
       lexer:  new Lexer()
       tokens: []
       quote:  str.substring(0, 1)
-      str:    str.substring(1, str.length - 1)
-      while str.length
-        match: str.match INTERPOLATION
-        if match
-          [group, before, interp]: match
-          if starts before, '\\', before.length - 1
-            prev: before.substring(0, before.length - 1)
-            tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length
+      i:      1
+      last_i: i
+      while i < str.length - 1
+        if starts str, '\\', i
+          i += 1
+        else
+          match: str.substring(i).match INTERPOLATION
+          if match
+            [group, interp]: match
+            interp: "this.${ interp.substring(1) }" if starts interp, '@'
+            tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
+            tokens.push ['IDENTIFIER', interp]
+            i += group.length - 1
+            last_i: i + 1
           else
-            tokens.push ['STRING', "$quote$before$quote"] if before.length
-            if starts interp, '{'
-              inner: interp.substring(1, interp.length - 1)
+            expression: @balanced_string str.substring(i), ['${', '}']
+            if expression and expression.length > 3
+              inner: expression.substring(2, expression.length - 1)
               nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
               nested.pop()
+              tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i
               tokens.push ['TOKENS', nested]
-            else
-              interp: "this.${ interp.substring(1) }" if starts interp, '@'
-              tokens.push ['IDENTIFIER', interp]
-          str: str.substring(group.length)
-        else
-          tokens.push ['STRING', "$quote$str$quote"]
-          str: ''
+              i += expression.length - 1
+              last_i: i + 1
+        i += 1
+      tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1
       if tokens.length > 1
         for i in [tokens.length - 1..1]
           [prev, tok]: [tokens[i - 1], tokens[i]]
diff --git a/test/test_string_interpolation.coffee b/test/test_string_interpolation.coffee
index bde36f56..2356202b 100644
--- a/test/test_string_interpolation.coffee
+++ b/test/test_string_interpolation.coffee
@@ -14,22 +14,23 @@ ok "$hello ${ 1 + 2 } $world" is "Hello 3 World"
 [s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g']
 ok "$s$t$r$i$n$g" is 'string'
 ok "${s}${t}${r}${i}${n}${g}" is 'string'
-ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g'
-ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}'
-ok "\\$string" is '$string'
-ok "\\${string}" is '${string}'
+ok "\$s\$t\$r\$i\$n\$g" is '$s$t$r$i$n$g'
+ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '\\s\\t\\r\\i\\n\\g'
+ok "\${s}\${t}\${r}\${i}\${n}\${g}" is '${s}${t}${r}${i}${n}${g}'
+ok "\$string" is '$string'
+ok "\${string}" is '${string}'
 
-ok "\\$Escaping first" is '$Escaping first'
-ok "\\${Escaping} first" is '${Escaping} first'
-ok "Escaping \\$in middle" is 'Escaping $in middle'
-ok "Escaping \\${in} middle" is 'Escaping ${in} middle'
-ok "Escaping \\$last" is 'Escaping $last'
-ok "Escaping \\${last}" is 'Escaping ${last}'
+ok "\$Escaping first" is '$Escaping first'
+ok "\${Escaping} first" is '${Escaping} first'
+ok "Escaping \$in middle" is 'Escaping $in middle'
+ok "Escaping \${in} middle" is 'Escaping ${in} middle'
+ok "Escaping \$last" is 'Escaping $last'
+ok "Escaping \${last}" is 'Escaping ${last}'
 
 ok "$$" is '$$'
 ok "${}" is '${}'
-ok "\\\\$$" is '\\\\$$'
-ok "\\\\${}" is '\\\\${}'
+ok "\\\\\$$" is '\\\\\$$'
+ok "\\\${}" is '\\${}'
 
 ok "I won $20 last night." is 'I won $20 last night.'
 ok "I won $${20} last night." is 'I won $20 last night.'
@@ -53,3 +54,8 @@ ok "I can has ${"cheeze"}" is 'I can has cheeze'
 ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}'
 
 ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?'
+
+ok "Where is ${"the new ${obj["name"]}"}?" is 'Where is the new Joe?'
+ok "Hello ${world ? "$hello"}" is 'Hello World'
+
+ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!'