From f74fae58e3833d87a92ed642019425056f8c24f5 Mon Sep 17 00:00:00 2001 From: Stan Angeloff Date: Sun, 7 Mar 2010 14:56:27 +0200 Subject: [PATCH] Rewritting lexer.coffee to accept nested string interpolations. --- lib/lexer.js | 83 +++++++++++++++------------ src/lexer.coffee | 60 ++++++++++--------- test/test_string_interpolation.coffee | 30 ++++++---- 3 files changed, 99 insertions(+), 74 deletions(-) diff --git a/lib/lexer.js b/lib/lexer.js index 1d850d05..31ae0f6e 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -34,7 +34,7 @@ IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/; NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i; HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/; - INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/; + INTERPOLATION = /^\$([a-zA-Z_@]\w*)/; OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/; WHITESPACE = /^([ \t]+)/; COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/; @@ -217,30 +217,30 @@ }; // Matches a balanced group such as a single or double-quoted string. Pass in // a series of delimiters, all of which must be balanced correctly within the - // token's contents. - Lexer.prototype.balanced_token = function balanced_token() { + // string. + Lexer.prototype.balanced_string = function balanced_string(str) { var _a, _b, _c, _d, close, delimited, i, levels, open, pair; - delimited = Array.prototype.slice.call(arguments, 0); + delimited = Array.prototype.slice.call(arguments, 1); levels = []; i = 0; - while (i < this.chunk.length) { + while (i < str.length) { _a = delimited; for (_b = 0, _c = _a.length; _b < _c; _b++) { pair = _a[_b]; _d = pair; open = _d[0]; close = _d[1]; - if (levels.length && starts(this.chunk, '\\', i)) { + if (levels.length && starts(str, '\\', i)) { i += 1; break; - } else if (levels.length && starts(this.chunk, close, i) && levels[levels.length - 1] === pair) { + } else if (levels.length && starts(str, close, i) && levels[levels.length - 1] === pair) { levels.pop(); i += close.length - 1; if (!(levels.length)) { i += 1; } break; - } else if (starts(this.chunk, open, i)) { + } else if (starts(str, open, i)) { levels.push(pair); i += open.length - 1; break; @@ -257,7 +257,13 @@ if (i === 0) { return false; } - return this.chunk.substring(0, i); + return str.substring(0, i); + }; + // Matches a balanced string within the token's contents. + Lexer.prototype.balanced_token = function balanced_token() { + var delimited; + delimited = Array.prototype.slice.call(arguments, 0); + return this.balanced_string.apply(this, [this.chunk].concat(delimited)); }; // Matches and conumes comments. Lexer.prototype.comment_token = function comment_token() { @@ -453,50 +459,55 @@ // "Hello $name." // "Hello ${name.capitalize()}." Lexer.prototype.interpolate_string = function interpolate_string(str) { - var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, before, each, group, i, inner, interp, lexer, match, nested, prev, quote, tok, tokens; + var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, each, expression, group, i, inner, interp, last_i, lexer, match, nested, prev, quote, tok, tokens; if (str.length < 3 || !starts(str, '"')) { return this.token('STRING', str); } else { lexer = new Lexer(); tokens = []; quote = str.substring(0, 1); - str = str.substring(1, str.length - 1); - while (str.length) { - match = str.match(INTERPOLATION); - if (match) { - _a = match; - group = _a[0]; - before = _a[1]; - interp = _a[2]; - if (starts(before, '\\', before.length - 1)) { - prev = before.substring(0, before.length - 1); - if (before.length) { - tokens.push(['STRING', quote + prev + "$" + interp + quote]); + i = 1; + last_i = i; + while (i < str.length - 1) { + if (starts(str, '\\', i)) { + i += 1; + } else { + match = str.substring(i).match(INTERPOLATION); + if (match) { + _a = match; + group = _a[0]; + interp = _a[1]; + if (starts(interp, '@')) { + interp = "this." + (interp.substring(1)); } + if (last_i < i) { + tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]); + } + tokens.push(['IDENTIFIER', interp]); + i += group.length - 1; + last_i = i + 1; } else { - if (before.length) { - tokens.push(['STRING', quote + before + quote]); - } - if (starts(interp, '{')) { - inner = interp.substring(1, interp.length - 1); + expression = this.balanced_string(str.substring(i), ['${', '}']); + if (expression && expression.length > 3) { + inner = expression.substring(2, expression.length - 1); nested = lexer.tokenize("(" + inner + ")", { rewrite: false, line: this.line }); nested.pop(); - tokens.push(['TOKENS', nested]); - } else { - if (starts(interp, '@')) { - interp = "this." + (interp.substring(1)); + if (last_i < i) { + tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]); } - tokens.push(['IDENTIFIER', interp]); + tokens.push(['TOKENS', nested]); + i += expression.length - 1; + last_i = i + 1; } } - str = str.substring(group.length); - } else { - tokens.push(['STRING', quote + str + quote]); - str = ''; } + i += 1; + } + if (last_i < i && last_i < str.length - 1) { + tokens.push(['STRING', quote + (str.substring(last_i, i)) + quote]); } if (tokens.length > 1) { _d = tokens.length - 1; _e = 1; diff --git a/src/lexer.coffee b/src/lexer.coffee index fb6e4fb2..dec23165 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -59,7 +59,7 @@ JS_FORBIDDEN: JS_KEYWORDS.concat RESERVED IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/ NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/ -INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/ +INTERPOLATION : /^\$([a-zA-Z_@]\w*)/ OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/ WHITESPACE : /^([ \t]+)/ COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/ @@ -199,22 +199,22 @@ exports.Lexer: class Lexer # Matches a balanced group such as a single or double-quoted string. Pass in # a series of delimiters, all of which must be balanced correctly within the - # token's contents. - balanced_token: (delimited...) -> + # string. + balanced_string: (str, delimited...) -> levels: [] i: 0 - while i < @chunk.length + while i < str.length for pair in delimited [open, close]: pair - if levels.length and starts @chunk, '\\', i + if levels.length and starts str, '\\', i i += 1 break - else if levels.length and starts(@chunk, close, i) and levels[levels.length - 1] is pair + else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair levels.pop() i += close.length - 1 i += 1 unless levels.length break - else if starts @chunk, open, i + else if starts str, open, i levels.push(pair) i += open.length - 1 break @@ -222,7 +222,11 @@ exports.Lexer: class Lexer i += 1 throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" if levels.length return false if i is 0 - return @chunk.substring(0, i) + return str.substring(0, i) + + # Matches a balanced string within the token's contents. + balanced_token: (delimited...) -> + @balanced_string @chunk, delimited... # Matches and conumes comments. comment_token: -> @@ -382,28 +386,32 @@ exports.Lexer: class Lexer lexer: new Lexer() tokens: [] quote: str.substring(0, 1) - str: str.substring(1, str.length - 1) - while str.length - match: str.match INTERPOLATION - if match - [group, before, interp]: match - if starts before, '\\', before.length - 1 - prev: before.substring(0, before.length - 1) - tokens.push ['STRING', "$quote$prev$$interp$quote"] if before.length + i: 1 + last_i: i + while i < str.length - 1 + if starts str, '\\', i + i += 1 + else + match: str.substring(i).match INTERPOLATION + if match + [group, interp]: match + interp: "this.${ interp.substring(1) }" if starts interp, '@' + tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i + tokens.push ['IDENTIFIER', interp] + i += group.length - 1 + last_i: i + 1 else - tokens.push ['STRING', "$quote$before$quote"] if before.length - if starts interp, '{' - inner: interp.substring(1, interp.length - 1) + expression: @balanced_string str.substring(i), ['${', '}'] + if expression and expression.length > 3 + inner: expression.substring(2, expression.length - 1) nested: lexer.tokenize "($inner)", {rewrite: no, line: @line} nested.pop() + tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i tokens.push ['TOKENS', nested] - else - interp: "this.${ interp.substring(1) }" if starts interp, '@' - tokens.push ['IDENTIFIER', interp] - str: str.substring(group.length) - else - tokens.push ['STRING', "$quote$str$quote"] - str: '' + i += expression.length - 1 + last_i: i + 1 + i += 1 + tokens.push ['STRING', "$quote${ str.substring(last_i, i) }$quote"] if last_i < i and last_i < str.length - 1 if tokens.length > 1 for i in [tokens.length - 1..1] [prev, tok]: [tokens[i - 1], tokens[i]] diff --git a/test/test_string_interpolation.coffee b/test/test_string_interpolation.coffee index bde36f56..2356202b 100644 --- a/test/test_string_interpolation.coffee +++ b/test/test_string_interpolation.coffee @@ -14,22 +14,23 @@ ok "$hello ${ 1 + 2 } $world" is "Hello 3 World" [s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g'] ok "$s$t$r$i$n$g" is 'string' ok "${s}${t}${r}${i}${n}${g}" is 'string' -ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g' -ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}' -ok "\\$string" is '$string' -ok "\\${string}" is '${string}' +ok "\$s\$t\$r\$i\$n\$g" is '$s$t$r$i$n$g' +ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '\\s\\t\\r\\i\\n\\g' +ok "\${s}\${t}\${r}\${i}\${n}\${g}" is '${s}${t}${r}${i}${n}${g}' +ok "\$string" is '$string' +ok "\${string}" is '${string}' -ok "\\$Escaping first" is '$Escaping first' -ok "\\${Escaping} first" is '${Escaping} first' -ok "Escaping \\$in middle" is 'Escaping $in middle' -ok "Escaping \\${in} middle" is 'Escaping ${in} middle' -ok "Escaping \\$last" is 'Escaping $last' -ok "Escaping \\${last}" is 'Escaping ${last}' +ok "\$Escaping first" is '$Escaping first' +ok "\${Escaping} first" is '${Escaping} first' +ok "Escaping \$in middle" is 'Escaping $in middle' +ok "Escaping \${in} middle" is 'Escaping ${in} middle' +ok "Escaping \$last" is 'Escaping $last' +ok "Escaping \${last}" is 'Escaping ${last}' ok "$$" is '$$' ok "${}" is '${}' -ok "\\\\$$" is '\\\\$$' -ok "\\\\${}" is '\\\\${}' +ok "\\\\\$$" is '\\\\\$$' +ok "\\\${}" is '\\${}' ok "I won $20 last night." is 'I won $20 last night.' ok "I won $${20} last night." is 'I won $20 last night.' @@ -53,3 +54,8 @@ ok "I can has ${"cheeze"}" is 'I can has cheeze' ok 'I can has ${"cheeze"}' is 'I can has ${"cheeze"}' ok "Where is ${obj["name"] + '?'}" is 'Where is Joe?' + +ok "Where is ${"the new ${obj["name"]}"}?" is 'Where is the new Joe?' +ok "Hello ${world ? "$hello"}" is 'Hello World' + +ok "Hello ${"${"${obj["name"]}" + '!'}"}" is 'Hello Joe!'