mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-05-03 03:00:14 -04:00
lexer: simplified tokenizers' responsibility
This commit is contained in:
111
lib/lexer.js
111
lib/lexer.js
@@ -14,11 +14,10 @@
|
||||
return Lexer;
|
||||
})();
|
||||
Lexer.prototype.tokenize = function(code, options) {
|
||||
var o;
|
||||
var i, o;
|
||||
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
||||
o = options || {};
|
||||
this.code = code;
|
||||
this.i = 0;
|
||||
this.line = o.line || 0;
|
||||
this.indent = 0;
|
||||
this.indebt = 0;
|
||||
@@ -26,8 +25,9 @@
|
||||
this.indents = [];
|
||||
this.tokens = [];
|
||||
this.seenFor = this.seenFrom = false;
|
||||
while (this.chunk = code.slice(this.i)) {
|
||||
this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
i = 0;
|
||||
while (this.chunk = code.slice(i)) {
|
||||
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
}
|
||||
this.closeIndentation();
|
||||
if (o.rewrite === false) {
|
||||
@@ -38,24 +38,23 @@
|
||||
Lexer.prototype.identifierToken = function() {
|
||||
var _ref2, colon, forcedIdentifier, id, input, match, tag;
|
||||
if (!(match = IDENTIFIER.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
input = match[0], id = match[1], colon = match[2];
|
||||
this.i += input.length;
|
||||
if (id === 'all' && this.tag() === 'FOR') {
|
||||
this.token('ALL', id);
|
||||
return true;
|
||||
return 3;
|
||||
}
|
||||
if (id === 'from' && this.tag(1) === 'FOR') {
|
||||
this.seenFor = false;
|
||||
this.seenFrom = true;
|
||||
this.token('FROM', id);
|
||||
return true;
|
||||
return 4;
|
||||
}
|
||||
if (id === 'to' && this.seenFrom) {
|
||||
this.seenFrom = false;
|
||||
this.token('TO', id);
|
||||
return true;
|
||||
return 2;
|
||||
}
|
||||
forcedIdentifier = colon || this.tagAccessor();
|
||||
tag = 'IDENTIFIER';
|
||||
@@ -106,33 +105,32 @@
|
||||
if (colon) {
|
||||
this.token(':', ':');
|
||||
}
|
||||
return true;
|
||||
return input.length;
|
||||
};
|
||||
Lexer.prototype.numberToken = function() {
|
||||
var match, number;
|
||||
if (!(match = NUMBER.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
number = match[0];
|
||||
if (this.tag() === '.' && number.charAt(0) === '.') {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
this.i += number.length;
|
||||
this.token('NUMBER', number);
|
||||
return true;
|
||||
return number.length;
|
||||
};
|
||||
Lexer.prototype.stringToken = function() {
|
||||
var match, string;
|
||||
switch (this.chunk.charAt(0)) {
|
||||
case "'":
|
||||
if (!(match = SIMPLESTR.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
||||
break;
|
||||
case '"':
|
||||
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
if (0 < string.indexOf('#{', 1)) {
|
||||
this.interpolateString(string.slice(1, -1));
|
||||
@@ -141,16 +139,15 @@
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
this.line += count(string, '\n');
|
||||
this.i += string.length;
|
||||
return true;
|
||||
return string.length;
|
||||
};
|
||||
Lexer.prototype.heredocToken = function() {
|
||||
var doc, heredoc, match, quote;
|
||||
if (!(match = HEREDOC.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
heredoc = match[0];
|
||||
quote = heredoc.charAt(0);
|
||||
@@ -166,17 +163,15 @@
|
||||
this.token('STRING', this.makeString(doc, quote, true));
|
||||
}
|
||||
this.line += count(heredoc, '\n');
|
||||
this.i += heredoc.length;
|
||||
return true;
|
||||
return heredoc.length;
|
||||
};
|
||||
Lexer.prototype.commentToken = function() {
|
||||
var comment, here, match;
|
||||
if (!(match = this.chunk.match(COMMENT))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
comment = match[0], here = match[1];
|
||||
this.line += count(comment, '\n');
|
||||
this.i += comment.length;
|
||||
if (here) {
|
||||
this.token('HERECOMMENT', this.sanitizeHeredoc(here, {
|
||||
herecomment: true,
|
||||
@@ -184,44 +179,41 @@
|
||||
}));
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return true;
|
||||
return comment.length;
|
||||
};
|
||||
Lexer.prototype.jsToken = function() {
|
||||
var match, script;
|
||||
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
this.token('JS', (script = match[0]).slice(1, -1));
|
||||
this.i += script.length;
|
||||
return true;
|
||||
return script.length;
|
||||
};
|
||||
Lexer.prototype.regexToken = function() {
|
||||
var _ref2, match, regex;
|
||||
if (this.chunk.charAt(0) !== '/') {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
if (match = HEREGEX.exec(this.chunk)) {
|
||||
return this.heregexToken(match);
|
||||
}
|
||||
if ((_ref2 = this.tag(), __indexOf.call(NOT_REGEX, _ref2) >= 0)) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
if (!(match = REGEX.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
regex = match[0];
|
||||
this.token('REGEX', regex === '//' ? '/(?:)/' : regex);
|
||||
this.i += regex.length;
|
||||
return true;
|
||||
return regex.length;
|
||||
};
|
||||
Lexer.prototype.heregexToken = function(match) {
|
||||
var _i, _len, _ref2, _ref3, _this, body, flags, heregex, re, tag, tokens, value;
|
||||
heregex = match[0], body = match[1], flags = match[2];
|
||||
this.i += heregex.length;
|
||||
if (0 > body.indexOf('#{')) {
|
||||
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
|
||||
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
|
||||
return true;
|
||||
return heregex.length;
|
||||
}
|
||||
this.token('IDENTIFIER', 'RegExp');
|
||||
this.tokens.push(['CALL_START', '(']);
|
||||
@@ -252,29 +244,32 @@
|
||||
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
|
||||
}
|
||||
this.token(')', ')');
|
||||
return true;
|
||||
return heregex.length;
|
||||
};
|
||||
Lexer.prototype.lineToken = function() {
|
||||
var diff, indent, match, nextCharacter, noNewlines, prev, size;
|
||||
if (!(match = MULTI_DENT.exec(this.chunk))) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
indent = match[0];
|
||||
this.line += count(indent, '\n');
|
||||
this.i += indent.length;
|
||||
prev = last(this.tokens, 1);
|
||||
size = indent.length - 1 - indent.lastIndexOf('\n');
|
||||
nextCharacter = NEXT_CHARACTER.exec(this.chunk)[1];
|
||||
noNewlines = ((nextCharacter === '.' || nextCharacter === ',') && !NEXT_ELLIPSIS.test(this.chunk)) || this.unfinished();
|
||||
if (size - this.indebt === this.indent) {
|
||||
if (noNewlines) {
|
||||
return this.suppressNewlines();
|
||||
this.suppressNewlines();
|
||||
} else {
|
||||
this.newlineToken();
|
||||
}
|
||||
return this.newlineToken(indent);
|
||||
} else if (size > this.indent) {
|
||||
return indent.length;
|
||||
}
|
||||
if (size > this.indent) {
|
||||
if (noNewlines) {
|
||||
this.indebt = size - this.indent;
|
||||
return this.suppressNewlines();
|
||||
this.suppressNewlines();
|
||||
return indent.length;
|
||||
}
|
||||
diff = size - this.indent + this.outdebt;
|
||||
this.token('INDENT', diff);
|
||||
@@ -285,7 +280,7 @@
|
||||
this.outdentToken(this.indent - size, noNewlines);
|
||||
}
|
||||
this.indent = size;
|
||||
return true;
|
||||
return indent.length;
|
||||
};
|
||||
Lexer.prototype.outdentToken = function(moveOut, noNewlines, close) {
|
||||
var dent, len;
|
||||
@@ -312,33 +307,30 @@
|
||||
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return true;
|
||||
return this;
|
||||
};
|
||||
Lexer.prototype.whitespaceToken = function() {
|
||||
var match, nline, prev;
|
||||
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.substring(0, 1) === '\n'))) {
|
||||
return false;
|
||||
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
|
||||
return 0;
|
||||
}
|
||||
prev = last(this.tokens);
|
||||
if (prev) {
|
||||
prev[match ? 'spaced' : 'newLine'] = true;
|
||||
}
|
||||
if (match) {
|
||||
this.i += match[0].length;
|
||||
}
|
||||
return !!match;
|
||||
return match ? match[0].length : 0;
|
||||
};
|
||||
Lexer.prototype.newlineToken = function(newlines) {
|
||||
Lexer.prototype.newlineToken = function() {
|
||||
if (this.tag() !== 'TERMINATOR') {
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return true;
|
||||
return this;
|
||||
};
|
||||
Lexer.prototype.suppressNewlines = function() {
|
||||
if (this.value() === '\\') {
|
||||
this.tokens.pop();
|
||||
}
|
||||
return true;
|
||||
return this;
|
||||
};
|
||||
Lexer.prototype.literalToken = function() {
|
||||
var _ref2, _ref3, _ref4, _ref5, match, prev, tag, value;
|
||||
@@ -350,7 +342,6 @@
|
||||
} else {
|
||||
value = this.chunk.charAt(0);
|
||||
}
|
||||
this.i += value.length;
|
||||
tag = value;
|
||||
prev = last(this.tokens);
|
||||
if (value === '=' && prev) {
|
||||
@@ -360,13 +351,11 @@
|
||||
if ((_ref3 = prev[1]) === '||' || _ref3 === '&&') {
|
||||
prev[0] = 'COMPOUND_ASSIGN';
|
||||
prev[1] += '=';
|
||||
return true;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (value === ';') {
|
||||
tag = 'TERMINATOR';
|
||||
} else if (__indexOf.call(LOGIC, value) >= 0) {
|
||||
tag = 'LOGIC';
|
||||
} else if (__indexOf.call(MATH, value) >= 0) {
|
||||
tag = 'MATH';
|
||||
} else if (__indexOf.call(COMPARE, value) >= 0) {
|
||||
@@ -377,7 +366,7 @@
|
||||
tag = 'UNARY';
|
||||
} else if (__indexOf.call(SHIFT, value) >= 0) {
|
||||
tag = 'SHIFT';
|
||||
} else if (value === '?' && ((prev != null) ? prev.spaced : undefined)) {
|
||||
} else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && ((prev != null) ? prev.spaced : undefined)) {
|
||||
tag = 'LOGIC';
|
||||
} else if (prev && !prev.spaced) {
|
||||
if (value === '(' && (_ref4 = prev[0], __indexOf.call(CALLABLE, _ref4) >= 0)) {
|
||||
@@ -398,7 +387,7 @@
|
||||
}
|
||||
}
|
||||
this.token(tag, value);
|
||||
return true;
|
||||
return value.length;
|
||||
};
|
||||
Lexer.prototype.tagAccessor = function() {
|
||||
var prev;
|
||||
@@ -444,7 +433,7 @@
|
||||
Lexer.prototype.tagParameters = function() {
|
||||
var i, tok;
|
||||
if (this.tag() !== ')') {
|
||||
return;
|
||||
return this;
|
||||
}
|
||||
i = this.tokens.length;
|
||||
while (tok = this.tokens[--i]) {
|
||||
@@ -461,7 +450,7 @@
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return this;
|
||||
};
|
||||
Lexer.prototype.closeIndentation = function() {
|
||||
return this.outdentToken(this.indent);
|
||||
|
||||
Reference in New Issue
Block a user