lexer: simplified tokenizers' responsibility

This commit is contained in:
satyr
2010-10-22 14:48:26 +09:00
parent c92fd79f35
commit a9e95fa43b
2 changed files with 109 additions and 131 deletions

View File

@@ -14,11 +14,10 @@
return Lexer;
})();
Lexer.prototype.tokenize = function(code, options) {
var o;
var i, o;
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
o = options || {};
this.code = code;
this.i = 0;
this.line = o.line || 0;
this.indent = 0;
this.indebt = 0;
@@ -26,8 +25,9 @@
this.indents = [];
this.tokens = [];
this.seenFor = this.seenFrom = false;
while (this.chunk = code.slice(this.i)) {
this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
i = 0;
while (this.chunk = code.slice(i)) {
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
}
this.closeIndentation();
if (o.rewrite === false) {
@@ -38,24 +38,23 @@
Lexer.prototype.identifierToken = function() {
var _ref2, colon, forcedIdentifier, id, input, match, tag;
if (!(match = IDENTIFIER.exec(this.chunk))) {
return false;
return 0;
}
input = match[0], id = match[1], colon = match[2];
this.i += input.length;
if (id === 'all' && this.tag() === 'FOR') {
this.token('ALL', id);
return true;
return 3;
}
if (id === 'from' && this.tag(1) === 'FOR') {
this.seenFor = false;
this.seenFrom = true;
this.token('FROM', id);
return true;
return 4;
}
if (id === 'to' && this.seenFrom) {
this.seenFrom = false;
this.token('TO', id);
return true;
return 2;
}
forcedIdentifier = colon || this.tagAccessor();
tag = 'IDENTIFIER';
@@ -106,33 +105,32 @@
if (colon) {
this.token(':', ':');
}
return true;
return input.length;
};
Lexer.prototype.numberToken = function() {
var match, number;
if (!(match = NUMBER.exec(this.chunk))) {
return false;
return 0;
}
number = match[0];
if (this.tag() === '.' && number.charAt(0) === '.') {
return false;
return 0;
}
this.i += number.length;
this.token('NUMBER', number);
return true;
return number.length;
};
Lexer.prototype.stringToken = function() {
var match, string;
switch (this.chunk.charAt(0)) {
case "'":
if (!(match = SIMPLESTR.exec(this.chunk))) {
return false;
return 0;
}
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
break;
case '"':
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
return false;
return 0;
}
if (0 < string.indexOf('#{', 1)) {
this.interpolateString(string.slice(1, -1));
@@ -141,16 +139,15 @@
}
break;
default:
return false;
return 0;
}
this.line += count(string, '\n');
this.i += string.length;
return true;
return string.length;
};
Lexer.prototype.heredocToken = function() {
var doc, heredoc, match, quote;
if (!(match = HEREDOC.exec(this.chunk))) {
return false;
return 0;
}
heredoc = match[0];
quote = heredoc.charAt(0);
@@ -166,17 +163,15 @@
this.token('STRING', this.makeString(doc, quote, true));
}
this.line += count(heredoc, '\n');
this.i += heredoc.length;
return true;
return heredoc.length;
};
Lexer.prototype.commentToken = function() {
var comment, here, match;
if (!(match = this.chunk.match(COMMENT))) {
return false;
return 0;
}
comment = match[0], here = match[1];
this.line += count(comment, '\n');
this.i += comment.length;
if (here) {
this.token('HERECOMMENT', this.sanitizeHeredoc(here, {
herecomment: true,
@@ -184,44 +179,41 @@
}));
this.token('TERMINATOR', '\n');
}
return true;
return comment.length;
};
Lexer.prototype.jsToken = function() {
var match, script;
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
return false;
return 0;
}
this.token('JS', (script = match[0]).slice(1, -1));
this.i += script.length;
return true;
return script.length;
};
Lexer.prototype.regexToken = function() {
var _ref2, match, regex;
if (this.chunk.charAt(0) !== '/') {
return false;
return 0;
}
if (match = HEREGEX.exec(this.chunk)) {
return this.heregexToken(match);
}
if ((_ref2 = this.tag(), __indexOf.call(NOT_REGEX, _ref2) >= 0)) {
return false;
return 0;
}
if (!(match = REGEX.exec(this.chunk))) {
return false;
return 0;
}
regex = match[0];
this.token('REGEX', regex === '//' ? '/(?:)/' : regex);
this.i += regex.length;
return true;
return regex.length;
};
Lexer.prototype.heregexToken = function(match) {
var _i, _len, _ref2, _ref3, _this, body, flags, heregex, re, tag, tokens, value;
heregex = match[0], body = match[1], flags = match[2];
this.i += heregex.length;
if (0 > body.indexOf('#{')) {
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
return true;
return heregex.length;
}
this.token('IDENTIFIER', 'RegExp');
this.tokens.push(['CALL_START', '(']);
@@ -252,29 +244,32 @@
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
}
this.token(')', ')');
return true;
return heregex.length;
};
Lexer.prototype.lineToken = function() {
var diff, indent, match, nextCharacter, noNewlines, prev, size;
if (!(match = MULTI_DENT.exec(this.chunk))) {
return false;
return 0;
}
indent = match[0];
this.line += count(indent, '\n');
this.i += indent.length;
prev = last(this.tokens, 1);
size = indent.length - 1 - indent.lastIndexOf('\n');
nextCharacter = NEXT_CHARACTER.exec(this.chunk)[1];
noNewlines = ((nextCharacter === '.' || nextCharacter === ',') && !NEXT_ELLIPSIS.test(this.chunk)) || this.unfinished();
if (size - this.indebt === this.indent) {
if (noNewlines) {
return this.suppressNewlines();
this.suppressNewlines();
} else {
this.newlineToken();
}
return this.newlineToken(indent);
} else if (size > this.indent) {
return indent.length;
}
if (size > this.indent) {
if (noNewlines) {
this.indebt = size - this.indent;
return this.suppressNewlines();
this.suppressNewlines();
return indent.length;
}
diff = size - this.indent + this.outdebt;
this.token('INDENT', diff);
@@ -285,7 +280,7 @@
this.outdentToken(this.indent - size, noNewlines);
}
this.indent = size;
return true;
return indent.length;
};
Lexer.prototype.outdentToken = function(moveOut, noNewlines, close) {
var dent, len;
@@ -312,33 +307,30 @@
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
this.token('TERMINATOR', '\n');
}
return true;
return this;
};
Lexer.prototype.whitespaceToken = function() {
var match, nline, prev;
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.substring(0, 1) === '\n'))) {
return false;
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
return 0;
}
prev = last(this.tokens);
if (prev) {
prev[match ? 'spaced' : 'newLine'] = true;
}
if (match) {
this.i += match[0].length;
}
return !!match;
return match ? match[0].length : 0;
};
Lexer.prototype.newlineToken = function(newlines) {
Lexer.prototype.newlineToken = function() {
if (this.tag() !== 'TERMINATOR') {
this.token('TERMINATOR', '\n');
}
return true;
return this;
};
Lexer.prototype.suppressNewlines = function() {
if (this.value() === '\\') {
this.tokens.pop();
}
return true;
return this;
};
Lexer.prototype.literalToken = function() {
var _ref2, _ref3, _ref4, _ref5, match, prev, tag, value;
@@ -350,7 +342,6 @@
} else {
value = this.chunk.charAt(0);
}
this.i += value.length;
tag = value;
prev = last(this.tokens);
if (value === '=' && prev) {
@@ -360,13 +351,11 @@
if ((_ref3 = prev[1]) === '||' || _ref3 === '&&') {
prev[0] = 'COMPOUND_ASSIGN';
prev[1] += '=';
return true;
return 1;
}
}
if (value === ';') {
tag = 'TERMINATOR';
} else if (__indexOf.call(LOGIC, value) >= 0) {
tag = 'LOGIC';
} else if (__indexOf.call(MATH, value) >= 0) {
tag = 'MATH';
} else if (__indexOf.call(COMPARE, value) >= 0) {
@@ -377,7 +366,7 @@
tag = 'UNARY';
} else if (__indexOf.call(SHIFT, value) >= 0) {
tag = 'SHIFT';
} else if (value === '?' && ((prev != null) ? prev.spaced : undefined)) {
} else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && ((prev != null) ? prev.spaced : undefined)) {
tag = 'LOGIC';
} else if (prev && !prev.spaced) {
if (value === '(' && (_ref4 = prev[0], __indexOf.call(CALLABLE, _ref4) >= 0)) {
@@ -398,7 +387,7 @@
}
}
this.token(tag, value);
return true;
return value.length;
};
Lexer.prototype.tagAccessor = function() {
var prev;
@@ -444,7 +433,7 @@
Lexer.prototype.tagParameters = function() {
var i, tok;
if (this.tag() !== ')') {
return;
return this;
}
i = this.tokens.length;
while (tok = this.tokens[--i]) {
@@ -461,7 +450,7 @@
return true;
}
}
return true;
return this;
};
Lexer.prototype.closeIndentation = function() {
return this.outdentToken(this.indent);