mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-05-03 03:00:14 -04:00
minor cleanups to balanced_group -> balanced_token, removing optional escaper (unused), and using it to implement interpolated javascript.
This commit is contained in:
47
lib/lexer.js
47
lib/lexer.js
@@ -1,5 +1,5 @@
|
||||
(function(){
|
||||
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
|
||||
var ACCESSORS, ASSIGNMENT, BEFORE_WHEN, CALLABLE, CODE, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, compact, count, include;
|
||||
// The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
|
||||
// matches against the beginning of the source code. When a match is found,
|
||||
// a token is produced, we consume the match, and start again. Tokens are in the
|
||||
@@ -35,7 +35,6 @@
|
||||
NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
|
||||
HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
|
||||
INTERPOLATION = /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/;
|
||||
JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
|
||||
OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
|
||||
WHITESPACE = /^([ \t]+)/;
|
||||
COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
|
||||
@@ -77,12 +76,13 @@
|
||||
Lexer = function Lexer() { };
|
||||
// Scan by attempting to match tokens one at a time. Slow and steady.
|
||||
Lexer.prototype.tokenize = function tokenize(code, options) {
|
||||
options = options || {};
|
||||
var o;
|
||||
o = options || {};
|
||||
this.code = code;
|
||||
// The remainder of the source code.
|
||||
this.i = 0;
|
||||
// Current character position we're parsing.
|
||||
this.line = 0;
|
||||
this.line = o.line || 0;
|
||||
// The current line.
|
||||
this.indent = 0;
|
||||
// The current indent level.
|
||||
@@ -95,7 +95,7 @@
|
||||
this.extract_next_token();
|
||||
}
|
||||
this.close_indentation();
|
||||
if (options.rewrite === false) {
|
||||
if (o.rewrite === false) {
|
||||
return this.tokens;
|
||||
}
|
||||
return (new Rewriter()).rewrite(this.tokens);
|
||||
@@ -168,9 +168,9 @@
|
||||
// Matches strings, including multi-line strings.
|
||||
Lexer.prototype.string_token = function string_token() {
|
||||
var string;
|
||||
string = this.balanced_group(['"'], ['${', '}']);
|
||||
string = this.balanced_token(['"', '"'], ['${', '}']);
|
||||
if (string === false) {
|
||||
string = this.balanced_group(["'"]);
|
||||
string = this.balanced_token(["'", "'"]);
|
||||
}
|
||||
if (!(string)) {
|
||||
return false;
|
||||
@@ -195,7 +195,7 @@
|
||||
// Matches interpolated JavaScript.
|
||||
Lexer.prototype.js_token = function js_token() {
|
||||
var script;
|
||||
if (!((script = this.match(JS, 1)))) {
|
||||
if (!((script = this.balanced_token(['`', '`'])))) {
|
||||
return false;
|
||||
}
|
||||
this.token('JS', script.replace(JS_CLEANER, ''));
|
||||
@@ -215,28 +215,20 @@
|
||||
this.i += regex.length;
|
||||
return true;
|
||||
};
|
||||
// Matches a balanced group such as a single or double-quoted string.
|
||||
Lexer.prototype.balanced_group = function balanced_group() {
|
||||
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, delimited, each, i, levels, type;
|
||||
// Matches a balanced group such as a single or double-quoted string. Pass in
|
||||
// a series of delimiters, all of which must be balanced correctly within the
|
||||
// token's contents.
|
||||
Lexer.prototype.balanced_token = function balanced_token() {
|
||||
var _a, _b, delimited, each, i, levels, type;
|
||||
delimited = Array.prototype.slice.call(arguments, 0);
|
||||
_a = delimited;
|
||||
for (_b = 0, _c = _a.length; _b < _c; _b++) {
|
||||
each = _a[_b];
|
||||
!(typeof (_d = each[1]) !== "undefined" && _d !== null) ? ((each[1] = each[0])) : null;
|
||||
}
|
||||
_e = delimited;
|
||||
for (_f = 0, _g = _e.length; _f < _g; _f++) {
|
||||
each = _e[_f];
|
||||
!(typeof (_h = each[2]) !== "undefined" && _h !== null) ? ((each[2] = '\\')) : null;
|
||||
}
|
||||
levels = [];
|
||||
i = 0;
|
||||
while (i < this.chunk.length) {
|
||||
_i = delimited;
|
||||
for (type = 0, _j = _i.length; type < _j; type++) {
|
||||
each = _i[type];
|
||||
if (each[2] !== false && this.chunk.substring(i, i + each[2].length) === each[2]) {
|
||||
i += each[2].length;
|
||||
_a = delimited;
|
||||
for (type = 0, _b = _a.length; type < _b; type++) {
|
||||
each = _a[type];
|
||||
if (levels.length && this.chunk.substring(i, i + 1) === '\\') {
|
||||
i += 1;
|
||||
break;
|
||||
} else if (levels.length && this.chunk.substring(i, i + each[1].length) === each[1] && levels[levels.length - 1] === type) {
|
||||
levels.pop();
|
||||
@@ -485,7 +477,8 @@
|
||||
if (interp.substring(0, 1) === '{') {
|
||||
inner = interp.substring(1, interp.length - 1);
|
||||
nested = lexer.tokenize("(" + inner + ")", {
|
||||
rewrite: false
|
||||
rewrite: false,
|
||||
line: this.line
|
||||
});
|
||||
nested.pop();
|
||||
tokens.push(['TOKENS', nested]);
|
||||
|
||||
@@ -60,7 +60,6 @@ IDENTIFIER : /^([a-zA-Z$_](\w|\$)*)/
|
||||
NUMBER : /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
|
||||
HEREDOC : /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/
|
||||
INTERPOLATION : /(^|[\s\S]*?(?:[\\]|\\\\)?)\$([a-zA-Z_@]\w*|{[\s\S]*?(?:[^\\]|\\\\)})/
|
||||
JS : /^(``|`([\s\S]*?)([^\\]|\\\\)`)/
|
||||
OPERATOR : /^([+\*&|\/\-%=<>:!?]+)/
|
||||
WHITESPACE : /^([ \t]+)/
|
||||
COMMENT : /^(((\n?[ \t]*)?#[^\n]*)+)/
|
||||
@@ -113,18 +112,18 @@ exports.Lexer: class Lexer
|
||||
|
||||
# Scan by attempting to match tokens one at a time. Slow and steady.
|
||||
tokenize: (code, options) ->
|
||||
options ||= {}
|
||||
@code : code # The remainder of the source code.
|
||||
@i : 0 # Current character position we're parsing.
|
||||
@line : 0 # The current line.
|
||||
@indent : 0 # The current indent level.
|
||||
@indents : [] # The stack of all indent levels we are currently within.
|
||||
@tokens : [] # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
|
||||
o : options or {}
|
||||
@code : code # The remainder of the source code.
|
||||
@i : 0 # Current character position we're parsing.
|
||||
@line : o.line or 0 # The current line.
|
||||
@indent : 0 # The current indent level.
|
||||
@indents : [] # The stack of all indent levels we are currently within.
|
||||
@tokens : [] # Collection of all parsed tokens in the form ['TOKEN_TYPE', value, line]
|
||||
while @i < @code.length
|
||||
@chunk: @code.slice(@i)
|
||||
@extract_next_token()
|
||||
@close_indentation()
|
||||
return @tokens if options.rewrite is no
|
||||
return @tokens if o.rewrite is no
|
||||
(new Rewriter()).rewrite @tokens
|
||||
|
||||
# At every position, run through this list of attempted matches,
|
||||
@@ -166,8 +165,8 @@ exports.Lexer: class Lexer
|
||||
|
||||
# Matches strings, including multi-line strings.
|
||||
string_token: ->
|
||||
string: @balanced_group ['"'], ['${', '}']
|
||||
string: @balanced_group ["'"] if string is false
|
||||
string: @balanced_token ['"', '"'], ['${', '}']
|
||||
string: @balanced_token ["'", "'"] if string is false
|
||||
return false unless string
|
||||
@interpolate_string string.replace STRING_NEWLINES, " \\\n"
|
||||
@line += count string, "\n"
|
||||
@@ -185,7 +184,7 @@ exports.Lexer: class Lexer
|
||||
|
||||
# Matches interpolated JavaScript.
|
||||
js_token: ->
|
||||
return false unless script: @match JS, 1
|
||||
return false unless script: @balanced_token ['`', '`']
|
||||
@token 'JS', script.replace(JS_CLEANER, '')
|
||||
@i += script.length
|
||||
true
|
||||
@@ -198,16 +197,16 @@ exports.Lexer: class Lexer
|
||||
@i += regex.length
|
||||
true
|
||||
|
||||
# Matches a balanced group such as a single or double-quoted string.
|
||||
balanced_group: (delimited...) ->
|
||||
(each[1]: each[0]) for each in delimited when not each[1]?
|
||||
(each[2]: '\\') for each in delimited when not each[2]?
|
||||
# Matches a balanced group such as a single or double-quoted string. Pass in
|
||||
# a series of delimiters, all of which must be balanced correctly within the
|
||||
# token's contents.
|
||||
balanced_token: (delimited...) ->
|
||||
levels: []
|
||||
i: 0
|
||||
while i < @chunk.length
|
||||
for each, type in delimited
|
||||
if each[2] isnt false and @chunk.substring(i, i + each[2].length) is each[2]
|
||||
i += each[2].length
|
||||
if levels.length and @chunk.substring(i, i + 1) is '\\'
|
||||
i += 1
|
||||
break
|
||||
else if levels.length and @chunk.substring(i, i + each[1].length) is each[1] and levels[levels.length - 1] is type
|
||||
levels.pop()
|
||||
@@ -394,7 +393,7 @@ exports.Lexer: class Lexer
|
||||
tokens.push ['STRING', "$quote$before$quote"] if before.length
|
||||
if interp.substring(0, 1) is '{'
|
||||
inner: interp.substring(1, interp.length - 1)
|
||||
nested: lexer.tokenize "($inner)", {rewrite: no}
|
||||
nested: lexer.tokenize "($inner)", {rewrite: no, line: @line}
|
||||
nested.pop()
|
||||
tokens.push ['TOKENS', nested]
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user