From e2f86678a44793a47be4b0898ac2a6617c9f1427 Mon Sep 17 00:00:00 2001 From: Stan Angeloff Date: Sat, 6 Mar 2010 02:15:27 +0200 Subject: [PATCH] Allowing expressions to be used inside strings; syntax is $\{...\} --- lib/lexer.js | 76 ++++++++++++++++++++------- src/lexer.coffee | 40 ++++++++++---- test/test_string_interpolation.coffee | 24 +++++++++ 3 files changed, 109 insertions(+), 31 deletions(-) diff --git a/lib/lexer.js b/lib/lexer.js index 494d26b5..6f0f5ecc 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -76,7 +76,7 @@ exports.Lexer = (function() { Lexer = function Lexer() { }; // Scan by attempting to match tokens one at a time. Slow and steady. - Lexer.prototype.tokenize = function tokenize(code) { + Lexer.prototype.tokenize = function tokenize(code, rewrite) { this.code = code; // The remainder of the source code. this.i = 0; @@ -94,7 +94,10 @@ this.extract_next_token(); } this.close_indentation(); - return (new Rewriter()).rewrite(this.tokens); + if (((typeof rewrite !== "undefined" && rewrite !== null) ? rewrite : true)) { + return (new Rewriter()).rewrite(this.tokens); + } + return this.tokens; }; // At every position, run through this list of attempted matches, // short-circuiting if any of them succeed. @@ -397,55 +400,88 @@ Lexer.prototype.assignment_error = function assignment_error() { throw new Error('SyntaxError: Reserved word "' + this.value() + '" on line ' + this.line + ' can\'t be assigned'); }; - // Replace variables and block calls inside double-quoted strings. + // Replace variables and expressions inside double-quoted strings. Lexer.prototype.interpolate_string = function interpolate_string(escaped) { - var _a, _b, _c, _d, _e, _f, _g, _h, before, each, group, i, identifier, identifier_match, quote, tokens; + var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, before, each, expression, expression_match, group, i, identifier, identifier_match, lexer, nested, quote, rewrite, tokens; if (escaped.length < 3 || escaped.indexOf('"') !== 0) { return this.token('STRING', escaped); } else { + lexer = null; tokens = []; quote = escaped.substring(0, 1); escaped = escaped.substring(1, escaped.length - 1); while (escaped.length) { - identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/); - if (identifier_match) { - _a = identifier_match; + expression_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/); + if (expression_match) { + _a = expression_match; group = _a[0]; before = _a[1]; - identifier = _a[2]; + expression = _a[2]; if (before.substring(before.length - 1) === '\\') { if (before.length) { - tokens.push(['STRING', quote + before.substring(0, before.length - 1) + identifier + quote]); + tokens.push(['STRING', quote + before.substring(0, before.length - 1) + expression + quote]); } } else { if (before.length) { tokens.push(['STRING', quote + before + quote]); } - tokens.push(['IDENTIFIER', identifier.substring(1)]); + if (!(typeof lexer !== "undefined" && lexer !== null)) { + lexer = new Lexer(); + } + nested = lexer.tokenize('(' + expression.substring(2, expression.length - 1) + ')', (rewrite = false)); + nested.pop(); + tokens.push(['TOKENS', nested]); } escaped = escaped.substring(group.length); } else { - tokens.push(['STRING', quote + escaped + quote]); - escaped = ''; + identifier_match = escaped.match(/(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/); + if (identifier_match) { + _b = identifier_match; + group = _b[0]; + before = _b[1]; + identifier = _b[2]; + if (before.substring(before.length - 1) === '\\') { + if (before.length) { + tokens.push(['STRING', quote + before.substring(0, before.length - 1) + identifier + quote]); + } + } else { + if (before.length) { + tokens.push(['STRING', quote + before + quote]); + } + tokens.push(['IDENTIFIER', identifier.substring(1)]); + } + escaped = escaped.substring(group.length); + } else { + tokens.push(['STRING', quote + escaped + quote]); + escaped = ''; + } } } if (tokens.length > 1) { - _d = tokens.length - 1; _e = 1; - for (_c = 0, i = _d; (_d <= _e ? i <= _e : i >= _e); (_d <= _e ? i += 1 : i -= 1), _c++) { + _e = tokens.length - 1; _f = 1; + for (_d = 0, i = _e; (_e <= _f ? i <= _f : i >= _f); (_e <= _f ? i += 1 : i -= 1), _d++) { tokens[i][0] === 'STRING' && tokens[i - 1][0] === 'STRING' ? tokens.splice(i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) + tokens[i][1].substring(1, tokens[i][1].length - 1) + quote]) : null; } } - _f = []; _g = tokens; - for (i = 0, _h = _g.length; i < _h; i++) { - each = _g[i]; - _f.push((function() { - this.token(each[0], each[1]); + _g = []; _h = tokens; + for (i = 0, _i = _h.length; i < _i; i++) { + each = _h[i]; + _g.push((function() { + if (each[0] === 'TOKENS') { + _j = each[1]; + for (_k = 0, _l = _j.length; _k < _l; _k++) { + nested = _j[_k]; + this.token(nested[0], nested[1]); + } + } else { + this.token(each[0], each[1]); + } if (i < tokens.length - 1) { return this.token('+', '+'); } }).call(this)); } - return _f; + return _g; } }; // Helpers diff --git a/src/lexer.coffee b/src/lexer.coffee index fb0102ef..1bc9aac6 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -112,7 +112,7 @@ BEFORE_WHEN: ['INDENT', 'OUTDENT', 'TERMINATOR'] exports.Lexer: class Lexer # Scan by attempting to match tokens one at a time. Slow and steady. - tokenize: (code) -> + tokenize: (code, rewrite) -> @code : code # The remainder of the source code. @i : 0 # Current character position we're parsing. @line : 0 # The current line. @@ -123,7 +123,8 @@ exports.Lexer: class Lexer @chunk: @code.slice(@i) @extract_next_token() @close_indentation() - (new Rewriter()).rewrite @tokens + return (new Rewriter()).rewrite @tokens if (rewrite ? true) + return @tokens # At every position, run through this list of attempted matches, # short-circuiting if any of them succeed. @@ -340,34 +341,51 @@ exports.Lexer: class Lexer assignment_error: -> throw new Error 'SyntaxError: Reserved word "' + @value() + '" on line ' + @line + ' can\'t be assigned' - # Replace variables and block calls inside double-quoted strings. + # Replace variables and expressions inside double-quoted strings. interpolate_string: (escaped) -> if escaped.length < 3 or escaped.indexOf('"') isnt 0 @token 'STRING', escaped else + lexer: null tokens: [] quote: escaped.substring(0, 1) escaped: escaped.substring(1, escaped.length - 1) while escaped.length - identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/ - if identifier_match - [group, before, identifier]: identifier_match + expression_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\${[\s\S]*?(?:[^\\]|\\\\)})/ + if expression_match + [group, before, expression]: expression_match if before.substring(before.length - 1) is '\\' - tokens.push ['STRING', quote + before.substring(0, before.length - 1) + identifier + quote] if before.length + tokens.push ['STRING', quote + before.substring(0, before.length - 1) + expression + quote] if before.length else tokens.push ['STRING', quote + before + quote] if before.length - tokens.push ['IDENTIFIER', identifier.substring(1)] + lexer: new Lexer() if not lexer? + nested: lexer.tokenize '(' + expression.substring(2, expression.length - 1) + ')', rewrite: no + nested.pop() + tokens.push ['TOKENS', nested] escaped: escaped.substring(group.length) else - tokens.push ['STRING', quote + escaped + quote] - escaped: '' + identifier_match: escaped.match /(^|[\s\S]*?(?:[\\]|\\\\)?)(\$([a-zA-Z_]\w*))/ + if identifier_match + [group, before, identifier]: identifier_match + if before.substring(before.length - 1) is '\\' + tokens.push ['STRING', quote + before.substring(0, before.length - 1) + identifier + quote] if before.length + else + tokens.push ['STRING', quote + before + quote] if before.length + tokens.push ['IDENTIFIER', identifier.substring(1)] + escaped: escaped.substring(group.length) + else + tokens.push ['STRING', quote + escaped + quote] + escaped: '' if tokens.length > 1 for i in [tokens.length - 1..1] if tokens[i][0] is 'STRING' and tokens[i - 1][0] is 'STRING' tokens.splice i - 1, 2, ['STRING', quote + tokens[i - 1][1].substring(1, tokens[i - 1][1].length - 1) + tokens[i][1].substring(1, tokens[i][1].length - 1) + quote] for each, i in tokens - @token each[0], each[1] + if each[0] is 'TOKENS' + @token nested[0], nested[1] for nested in each[1] + else + @token each[0], each[1] @token '+', '+' if i < tokens.length - 1 # Helpers diff --git a/test/test_string_interpolation.coffee b/test/test_string_interpolation.coffee index bccb712a..d62374b7 100644 --- a/test/test_string_interpolation.coffee +++ b/test/test_string_interpolation.coffee @@ -1,18 +1,42 @@ hello: 'Hello' world: 'World' ok '$hello $world!' is '$hello $world!' +ok '${hello} ${world}!' is '${hello} ${world}!' ok "$hello $world!" is 'Hello World!' +ok "${hello} ${world}!" is 'Hello World!' ok "[$hello$world]" is '[HelloWorld]' +ok "[${hello}${world}]" is '[HelloWorld]' ok "$hello$$world" is 'Hello$World' +# ok "${hello}$${world}" is 'Hello$World' [s, t, r, i, n, g]: ['s', 't', 'r', 'i', 'n', 'g'] ok "$s$t$r$i$n$g" is 'string' +ok "${s}${t}${r}${i}${n}${g}" is 'string' ok "\\$s\\$t\\$r\\$i\\$n\\$g" is '$s$t$r$i$n$g' +ok "\\${s}\\${t}\\${r}\\${i}\\${n}\\${g}" is '${s}${t}${r}${i}${n}${g}' ok "\\$string" is '$string' +ok "\\${string}" is '${string}' ok "\\$Escaping first" is '$Escaping first' +ok "\\${Escaping} first" is '${Escaping} first' ok "Escaping \\$in middle" is 'Escaping $in middle' +ok "Escaping \\${in} middle" is 'Escaping ${in} middle' ok "Escaping \\$last" is 'Escaping $last' +ok "Escaping \\${last}" is 'Escaping ${last}' ok "$$" is '$$' +ok "${}" is '${}' ok "\\\\$$" is '\\\\$$' +ok "\\\\${}" is '\\\\${}' + +ok "I won $20 last night." is 'I won $20 last night.' +ok "I won $${20} last night." is 'I won $20 last night.' +ok "I won $#20 last night." is 'I won $#20 last night.' +ok "I won $${'#20'} last night." is 'I won $#20 last night.' + +ok "${hello + world}" is 'HelloWorld' +ok "${hello + ' ' + world + '!'}" is 'Hello World!' + +list: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +ok "values: ${list.join(', ')}, length: ${list.length}." is 'values: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, length: 10.' +ok "values: ${list.join ' '}" is 'values: 0 1 2 3 4 5 6 7 8 9'