mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-01-14 09:17:55 -05:00
Revert "lexer: simplified tokenizers' responsibility"
This reverts commit a9e95fa43b.
This commit is contained in:
111
lib/lexer.js
111
lib/lexer.js
@@ -14,10 +14,11 @@
|
||||
return Lexer;
|
||||
})();
|
||||
Lexer.prototype.tokenize = function(code, options) {
|
||||
var i, o;
|
||||
var o;
|
||||
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
||||
o = options || {};
|
||||
this.code = code;
|
||||
this.i = 0;
|
||||
this.line = o.line || 0;
|
||||
this.indent = 0;
|
||||
this.indebt = 0;
|
||||
@@ -25,9 +26,8 @@
|
||||
this.indents = [];
|
||||
this.tokens = [];
|
||||
this.seenFor = this.seenFrom = false;
|
||||
i = 0;
|
||||
while (this.chunk = code.slice(i)) {
|
||||
i += this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
while (this.chunk = code.slice(this.i)) {
|
||||
this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.heredocToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
|
||||
}
|
||||
this.closeIndentation();
|
||||
if (o.rewrite === false) {
|
||||
@@ -38,23 +38,24 @@
|
||||
Lexer.prototype.identifierToken = function() {
|
||||
var _ref2, colon, forcedIdentifier, id, input, match, tag;
|
||||
if (!(match = IDENTIFIER.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
input = match[0], id = match[1], colon = match[2];
|
||||
this.i += input.length;
|
||||
if (id === 'all' && this.tag() === 'FOR') {
|
||||
this.token('ALL', id);
|
||||
return 3;
|
||||
return true;
|
||||
}
|
||||
if (id === 'from' && this.tag(1) === 'FOR') {
|
||||
this.seenFor = false;
|
||||
this.seenFrom = true;
|
||||
this.token('FROM', id);
|
||||
return 4;
|
||||
return true;
|
||||
}
|
||||
if (id === 'to' && this.seenFrom) {
|
||||
this.seenFrom = false;
|
||||
this.token('TO', id);
|
||||
return 2;
|
||||
return true;
|
||||
}
|
||||
forcedIdentifier = colon || this.tagAccessor();
|
||||
tag = 'IDENTIFIER';
|
||||
@@ -105,32 +106,33 @@
|
||||
if (colon) {
|
||||
this.token(':', ':');
|
||||
}
|
||||
return input.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.numberToken = function() {
|
||||
var match, number;
|
||||
if (!(match = NUMBER.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
number = match[0];
|
||||
if (this.tag() === '.' && number.charAt(0) === '.') {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
this.i += number.length;
|
||||
this.token('NUMBER', number);
|
||||
return number.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.stringToken = function() {
|
||||
var match, string;
|
||||
switch (this.chunk.charAt(0)) {
|
||||
case "'":
|
||||
if (!(match = SIMPLESTR.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
||||
break;
|
||||
case '"':
|
||||
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
if (0 < string.indexOf('#{', 1)) {
|
||||
this.interpolateString(string.slice(1, -1));
|
||||
@@ -139,15 +141,16 @@
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
this.line += count(string, '\n');
|
||||
return string.length;
|
||||
this.i += string.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.heredocToken = function() {
|
||||
var doc, heredoc, match, quote;
|
||||
if (!(match = HEREDOC.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
heredoc = match[0];
|
||||
quote = heredoc.charAt(0);
|
||||
@@ -163,15 +166,17 @@
|
||||
this.token('STRING', this.makeString(doc, quote, true));
|
||||
}
|
||||
this.line += count(heredoc, '\n');
|
||||
return heredoc.length;
|
||||
this.i += heredoc.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.commentToken = function() {
|
||||
var comment, here, match;
|
||||
if (!(match = this.chunk.match(COMMENT))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
comment = match[0], here = match[1];
|
||||
this.line += count(comment, '\n');
|
||||
this.i += comment.length;
|
||||
if (here) {
|
||||
this.token('HERECOMMENT', this.sanitizeHeredoc(here, {
|
||||
herecomment: true,
|
||||
@@ -179,41 +184,44 @@
|
||||
}));
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return comment.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.jsToken = function() {
|
||||
var match, script;
|
||||
if (!(this.chunk.charAt(0) === '`' && (match = JSTOKEN.exec(this.chunk)))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
this.token('JS', (script = match[0]).slice(1, -1));
|
||||
return script.length;
|
||||
this.i += script.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.regexToken = function() {
|
||||
var _ref2, match, regex;
|
||||
if (this.chunk.charAt(0) !== '/') {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
if (match = HEREGEX.exec(this.chunk)) {
|
||||
return this.heregexToken(match);
|
||||
}
|
||||
if ((_ref2 = this.tag(), __indexOf.call(NOT_REGEX, _ref2) >= 0)) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
if (!(match = REGEX.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
regex = match[0];
|
||||
this.token('REGEX', regex === '//' ? '/(?:)/' : regex);
|
||||
return regex.length;
|
||||
this.i += regex.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.heregexToken = function(match) {
|
||||
var _i, _len, _ref2, _ref3, _this, body, flags, heregex, re, tag, tokens, value;
|
||||
heregex = match[0], body = match[1], flags = match[2];
|
||||
this.i += heregex.length;
|
||||
if (0 > body.indexOf('#{')) {
|
||||
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/');
|
||||
this.token('REGEX', "/" + (re || '(?:)') + "/" + flags);
|
||||
return heregex.length;
|
||||
return true;
|
||||
}
|
||||
this.token('IDENTIFIER', 'RegExp');
|
||||
this.tokens.push(['CALL_START', '(']);
|
||||
@@ -244,32 +252,29 @@
|
||||
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
|
||||
}
|
||||
this.token(')', ')');
|
||||
return heregex.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.lineToken = function() {
|
||||
var diff, indent, match, nextCharacter, noNewlines, prev, size;
|
||||
if (!(match = MULTI_DENT.exec(this.chunk))) {
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
indent = match[0];
|
||||
this.line += count(indent, '\n');
|
||||
this.i += indent.length;
|
||||
prev = last(this.tokens, 1);
|
||||
size = indent.length - 1 - indent.lastIndexOf('\n');
|
||||
nextCharacter = NEXT_CHARACTER.exec(this.chunk)[1];
|
||||
noNewlines = ((nextCharacter === '.' || nextCharacter === ',') && !NEXT_ELLIPSIS.test(this.chunk)) || this.unfinished();
|
||||
if (size - this.indebt === this.indent) {
|
||||
if (noNewlines) {
|
||||
this.suppressNewlines();
|
||||
} else {
|
||||
this.newlineToken();
|
||||
return this.suppressNewlines();
|
||||
}
|
||||
return indent.length;
|
||||
}
|
||||
if (size > this.indent) {
|
||||
return this.newlineToken(indent);
|
||||
} else if (size > this.indent) {
|
||||
if (noNewlines) {
|
||||
this.indebt = size - this.indent;
|
||||
this.suppressNewlines();
|
||||
return indent.length;
|
||||
return this.suppressNewlines();
|
||||
}
|
||||
diff = size - this.indent + this.outdebt;
|
||||
this.token('INDENT', diff);
|
||||
@@ -280,7 +285,7 @@
|
||||
this.outdentToken(this.indent - size, noNewlines);
|
||||
}
|
||||
this.indent = size;
|
||||
return indent.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.outdentToken = function(moveOut, noNewlines, close) {
|
||||
var dent, len;
|
||||
@@ -307,30 +312,33 @@
|
||||
if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return this;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.whitespaceToken = function() {
|
||||
var match, nline, prev;
|
||||
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
|
||||
return 0;
|
||||
if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.substring(0, 1) === '\n'))) {
|
||||
return false;
|
||||
}
|
||||
prev = last(this.tokens);
|
||||
if (prev) {
|
||||
prev[match ? 'spaced' : 'newLine'] = true;
|
||||
}
|
||||
return match ? match[0].length : 0;
|
||||
if (match) {
|
||||
this.i += match[0].length;
|
||||
}
|
||||
return !!match;
|
||||
};
|
||||
Lexer.prototype.newlineToken = function() {
|
||||
Lexer.prototype.newlineToken = function(newlines) {
|
||||
if (this.tag() !== 'TERMINATOR') {
|
||||
this.token('TERMINATOR', '\n');
|
||||
}
|
||||
return this;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.suppressNewlines = function() {
|
||||
if (this.value() === '\\') {
|
||||
this.tokens.pop();
|
||||
}
|
||||
return this;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.literalToken = function() {
|
||||
var _ref2, _ref3, _ref4, _ref5, match, prev, tag, value;
|
||||
@@ -342,6 +350,7 @@
|
||||
} else {
|
||||
value = this.chunk.charAt(0);
|
||||
}
|
||||
this.i += value.length;
|
||||
tag = value;
|
||||
prev = last(this.tokens);
|
||||
if (value === '=' && prev) {
|
||||
@@ -351,11 +360,13 @@
|
||||
if ((_ref3 = prev[1]) === '||' || _ref3 === '&&') {
|
||||
prev[0] = 'COMPOUND_ASSIGN';
|
||||
prev[1] += '=';
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (value === ';') {
|
||||
tag = 'TERMINATOR';
|
||||
} else if (__indexOf.call(LOGIC, value) >= 0) {
|
||||
tag = 'LOGIC';
|
||||
} else if (__indexOf.call(MATH, value) >= 0) {
|
||||
tag = 'MATH';
|
||||
} else if (__indexOf.call(COMPARE, value) >= 0) {
|
||||
@@ -366,7 +377,7 @@
|
||||
tag = 'UNARY';
|
||||
} else if (__indexOf.call(SHIFT, value) >= 0) {
|
||||
tag = 'SHIFT';
|
||||
} else if (__indexOf.call(LOGIC, value) >= 0 || value === '?' && ((prev != null) ? prev.spaced : undefined)) {
|
||||
} else if (value === '?' && ((prev != null) ? prev.spaced : undefined)) {
|
||||
tag = 'LOGIC';
|
||||
} else if (prev && !prev.spaced) {
|
||||
if (value === '(' && (_ref4 = prev[0], __indexOf.call(CALLABLE, _ref4) >= 0)) {
|
||||
@@ -387,7 +398,7 @@
|
||||
}
|
||||
}
|
||||
this.token(tag, value);
|
||||
return value.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.tagAccessor = function() {
|
||||
var prev;
|
||||
@@ -433,7 +444,7 @@
|
||||
Lexer.prototype.tagParameters = function() {
|
||||
var i, tok;
|
||||
if (this.tag() !== ')') {
|
||||
return this;
|
||||
return;
|
||||
}
|
||||
i = this.tokens.length;
|
||||
while (tok = this.tokens[--i]) {
|
||||
@@ -450,7 +461,7 @@
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return this;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.closeIndentation = function() {
|
||||
return this.outdentToken(this.indent);
|
||||
|
||||
129
src/lexer.coffee
129
src/lexer.coffee
@@ -26,8 +26,9 @@ exports.Lexer = class Lexer
|
||||
# (for interpolations). When the next token has been recorded, we move forward
|
||||
# within the code past the token, and begin again.
|
||||
#
|
||||
# Each tokenizing method is responsible for returning the number of characters
|
||||
# it has consumed.
|
||||
# Each tokenizing method is responsible for incrementing `@i` by the number of
|
||||
# characters it has consumed. `@i` can be thought of as our finger on the page
|
||||
# of source.
|
||||
#
|
||||
# Before returning the token stream, run it through the [Rewriter](rewriter.html)
|
||||
# unless explicitly asked not to.
|
||||
@@ -35,6 +36,7 @@ exports.Lexer = class Lexer
|
||||
code = code.replace(/\r/g, '').replace TRAILING_SPACES, ''
|
||||
o = options or {}
|
||||
@code = code # The remainder of the source code.
|
||||
@i = 0 # Current character position we're parsing.
|
||||
@line = o.line or 0 # The current line.
|
||||
@indent = 0 # The current indentation level.
|
||||
@indebt = 0 # The over-indentation at the current level.
|
||||
@@ -46,18 +48,17 @@ exports.Lexer = class Lexer
|
||||
# At every position, run through this list of attempted matches,
|
||||
# short-circuiting if any of them succeed. Their order determines precedence:
|
||||
# `@literalToken` is the fallback catch-all.
|
||||
i = 0
|
||||
while @chunk = code.slice i
|
||||
i += @identifierToken() or
|
||||
@commentToken() or
|
||||
@whitespaceToken() or
|
||||
@lineToken() or
|
||||
@heredocToken() or
|
||||
@stringToken() or
|
||||
@numberToken() or
|
||||
@regexToken() or
|
||||
@jsToken() or
|
||||
@literalToken()
|
||||
while @chunk = code.slice @i
|
||||
@identifierToken() or
|
||||
@commentToken() or
|
||||
@whitespaceToken() or
|
||||
@lineToken() or
|
||||
@heredocToken() or
|
||||
@stringToken() or
|
||||
@numberToken() or
|
||||
@regexToken() or
|
||||
@jsToken() or
|
||||
@literalToken()
|
||||
@closeIndentation()
|
||||
return @tokens if o.rewrite is off
|
||||
(new Rewriter).rewrite @tokens
|
||||
@@ -72,20 +73,21 @@ exports.Lexer = class Lexer
|
||||
# referenced as property names here, so you can still do `jQuery.is()` even
|
||||
# though `is` means `===` otherwise.
|
||||
identifierToken: ->
|
||||
return 0 unless match = IDENTIFIER.exec @chunk
|
||||
return false unless match = IDENTIFIER.exec @chunk
|
||||
[input, id, colon] = match
|
||||
@i += input.length
|
||||
if id is 'all' and @tag() is 'FOR'
|
||||
@token 'ALL', id
|
||||
return 3
|
||||
return true
|
||||
if id is 'from' and @tag(1) is 'FOR'
|
||||
@seenFor = no
|
||||
@seenFrom = yes
|
||||
@token 'FROM', id
|
||||
return 4
|
||||
return true
|
||||
if id is 'to' and @seenFrom
|
||||
@seenFrom = no
|
||||
@token 'TO', id
|
||||
return 2
|
||||
return true
|
||||
forcedIdentifier = colon or @tagAccessor()
|
||||
tag = 'IDENTIFIER'
|
||||
if id in JS_KEYWORDS or
|
||||
@@ -124,39 +126,41 @@ exports.Lexer = class Lexer
|
||||
tag = 'BOOL'
|
||||
@token tag, id
|
||||
@token ':', ':' if colon
|
||||
input.length
|
||||
true
|
||||
|
||||
# Matches numbers, including decimals, hex, and exponential notation.
|
||||
# Be careful not to interfere with ranges-in-progress.
|
||||
numberToken: ->
|
||||
return 0 unless match = NUMBER.exec @chunk
|
||||
return false unless match = NUMBER.exec @chunk
|
||||
number = match[0]
|
||||
return 0 if @tag() is '.' and number.charAt(0) is '.'
|
||||
return false if @tag() is '.' and number.charAt(0) is '.'
|
||||
@i += number.length
|
||||
@token 'NUMBER', number
|
||||
number.length
|
||||
true
|
||||
|
||||
# Matches strings, including multi-line strings. Ensures that quotation marks
|
||||
# are balanced within the string's contents, and within nested interpolations.
|
||||
stringToken: ->
|
||||
switch @chunk.charAt 0
|
||||
when "'"
|
||||
return 0 unless match = SIMPLESTR.exec @chunk
|
||||
return false unless match = SIMPLESTR.exec @chunk
|
||||
@token 'STRING', (string = match[0]).replace MULTILINER, '\\\n'
|
||||
when '"'
|
||||
return 0 unless string = @balancedString @chunk, [['"', '"'], ['#{', '}']]
|
||||
return false unless string = @balancedString @chunk, [['"', '"'], ['#{', '}']]
|
||||
if 0 < string.indexOf '#{', 1
|
||||
@interpolateString string.slice 1, -1
|
||||
else
|
||||
@token 'STRING', @escapeLines string
|
||||
else
|
||||
return 0
|
||||
return false
|
||||
@line += count string, '\n'
|
||||
string.length
|
||||
@i += string.length
|
||||
true
|
||||
|
||||
# Matches heredocs, adjusting indentation to the correct level, as heredocs
|
||||
# preserve whitespace, but ignore indentation to the left.
|
||||
heredocToken: ->
|
||||
return 0 unless match = HEREDOC.exec @chunk
|
||||
return false unless match = HEREDOC.exec @chunk
|
||||
heredoc = match[0]
|
||||
quote = heredoc.charAt 0
|
||||
doc = @sanitizeHeredoc match[2], {quote, indent: null}
|
||||
@@ -165,44 +169,49 @@ exports.Lexer = class Lexer
|
||||
else
|
||||
@token 'STRING', @makeString doc, quote, yes
|
||||
@line += count heredoc, '\n'
|
||||
heredoc.length
|
||||
@i += heredoc.length
|
||||
true
|
||||
|
||||
# Matches and consumes comments.
|
||||
commentToken: ->
|
||||
return 0 unless match = @chunk.match COMMENT
|
||||
return false unless match = @chunk.match COMMENT
|
||||
[comment, here] = match
|
||||
@line += count comment, '\n'
|
||||
@i += comment.length
|
||||
if here
|
||||
@token 'HERECOMMENT', @sanitizeHeredoc here,
|
||||
herecomment: true, indent: Array(@indent + 1).join(' ')
|
||||
@token 'TERMINATOR', '\n'
|
||||
comment.length
|
||||
true
|
||||
|
||||
# Matches JavaScript interpolated directly into the source via backticks.
|
||||
jsToken: ->
|
||||
return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk
|
||||
return false unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk
|
||||
@token 'JS', (script = match[0]).slice 1, -1
|
||||
script.length
|
||||
@i += script.length
|
||||
true
|
||||
|
||||
# Matches regular expression literals. Lexing regular expressions is difficult
|
||||
# to distinguish from division, so we borrow some basic heuristics from
|
||||
# JavaScript and Ruby.
|
||||
regexToken: ->
|
||||
return 0 if @chunk.charAt(0) isnt '/'
|
||||
return false if @chunk.charAt(0) isnt '/'
|
||||
return @heregexToken match if match = HEREGEX.exec @chunk
|
||||
return 0 if @tag() in NOT_REGEX
|
||||
return 0 unless match = REGEX.exec @chunk
|
||||
return false if @tag() in NOT_REGEX
|
||||
return false unless match = REGEX.exec @chunk
|
||||
[regex] = match
|
||||
@token 'REGEX', if regex is '//' then '/(?:)/' else regex
|
||||
regex.length
|
||||
@i += regex.length
|
||||
true
|
||||
|
||||
# Matches experimental, multiline and extended regular expression literals.
|
||||
heregexToken: (match) ->
|
||||
[heregex, body, flags] = match
|
||||
@i += heregex.length
|
||||
if 0 > body.indexOf '#{'
|
||||
re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/')
|
||||
@token 'REGEX', "/#{ re or '(?:)' }/#{flags}"
|
||||
return heregex.length
|
||||
return true
|
||||
@token 'IDENTIFIER', 'RegExp'
|
||||
@tokens.push ['CALL_START', '(']
|
||||
tokens = []
|
||||
@@ -219,7 +228,7 @@ exports.Lexer = class Lexer
|
||||
@tokens.push tokens...
|
||||
@tokens.push [',', ','], ['STRING', '"' + flags + '"'] if flags
|
||||
@token ')', ')'
|
||||
heregex.length
|
||||
true
|
||||
|
||||
# Matches newlines, indents, and outdents, and determines which is which.
|
||||
# If we can detect that the current line is continued onto the the next line,
|
||||
@@ -232,21 +241,21 @@ exports.Lexer = class Lexer
|
||||
# Keeps track of the level of indentation, because a single outdent token
|
||||
# can close multiple indents, so we need to know how far in we happen to be.
|
||||
lineToken: ->
|
||||
return 0 unless match = MULTI_DENT.exec @chunk
|
||||
return false unless match = MULTI_DENT.exec @chunk
|
||||
indent = match[0]
|
||||
@line += count indent, '\n'
|
||||
@i += indent.length
|
||||
prev = last @tokens, 1
|
||||
size = indent.length - 1 - indent.lastIndexOf '\n'
|
||||
nextCharacter = NEXT_CHARACTER.exec(@chunk)[1]
|
||||
noNewlines = (nextCharacter in ['.', ','] and not NEXT_ELLIPSIS.test(@chunk)) or @unfinished()
|
||||
if size - @indebt is @indent
|
||||
if noNewlines then @suppressNewlines() else @newlineToken()
|
||||
return indent.length
|
||||
if size > @indent
|
||||
return @suppressNewlines() if noNewlines
|
||||
return @newlineToken indent
|
||||
else if size > @indent
|
||||
if noNewlines
|
||||
@indebt = size - @indent
|
||||
@suppressNewlines()
|
||||
return indent.length
|
||||
return @suppressNewlines()
|
||||
diff = size - @indent + @outdebt
|
||||
@token 'INDENT', diff
|
||||
@indents.push diff
|
||||
@@ -255,7 +264,7 @@ exports.Lexer = class Lexer
|
||||
@indebt = 0
|
||||
@outdentToken @indent - size, noNewlines
|
||||
@indent = size
|
||||
indent.length
|
||||
true
|
||||
|
||||
# Record an outdent token or multiple tokens, if we happen to be moving back
|
||||
# inwards past several recorded indents.
|
||||
@@ -277,27 +286,27 @@ exports.Lexer = class Lexer
|
||||
@token 'OUTDENT', dent
|
||||
@outdebt -= moveOut if dent
|
||||
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR' or noNewlines
|
||||
this
|
||||
true
|
||||
|
||||
# Matches and consumes non-meaningful whitespace. Tag the previous token
|
||||
# as being "spaced", because there are some cases where it makes a difference.
|
||||
whitespaceToken: ->
|
||||
return 0 unless (match = WHITESPACE.exec @chunk) or
|
||||
(nline = @chunk.charAt(0) is '\n')
|
||||
return false unless (match = WHITESPACE.exec @chunk) or nline = @chunk.substring(0, 1) is '\n'
|
||||
prev = last @tokens
|
||||
prev[if match then 'spaced' else 'newLine'] = true if prev
|
||||
if match then match[0].length else 0
|
||||
@i += match[0].length if match
|
||||
!!match
|
||||
|
||||
# Generate a newline token. Consecutive newlines get merged together.
|
||||
newlineToken: ->
|
||||
newlineToken: (newlines) ->
|
||||
@token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR'
|
||||
this
|
||||
true
|
||||
|
||||
# Use a `\` at a line-ending to suppress the newline.
|
||||
# The slash is removed here once its job is done.
|
||||
suppressNewlines: ->
|
||||
@tokens.pop() if @value() is '\\'
|
||||
this
|
||||
true
|
||||
|
||||
# We treat all other single characters as a token. Eg.: `( ) , . !`
|
||||
# Multi-character operators are also literal tokens, so that Jison can assign
|
||||
@@ -310,21 +319,23 @@ exports.Lexer = class Lexer
|
||||
@tagParameters() if CODE.test value
|
||||
else
|
||||
value = @chunk.charAt 0
|
||||
tag = value
|
||||
@i += value.length
|
||||
tag = value
|
||||
prev = last @tokens
|
||||
if value is '=' and prev
|
||||
@assignmentError() if not prev[1].reserved and prev[1] in JS_FORBIDDEN
|
||||
if prev[1] in ['||', '&&']
|
||||
prev[0] = 'COMPOUND_ASSIGN'
|
||||
prev[1] += '='
|
||||
return 1
|
||||
return true
|
||||
if value is ';' then tag = 'TERMINATOR'
|
||||
else if value in LOGIC then tag = 'LOGIC'
|
||||
else if value in MATH then tag = 'MATH'
|
||||
else if value in COMPARE then tag = 'COMPARE'
|
||||
else if value in COMPOUND_ASSIGN then tag = 'COMPOUND_ASSIGN'
|
||||
else if value in UNARY then tag = 'UNARY'
|
||||
else if value in SHIFT then tag = 'SHIFT'
|
||||
else if value in LOGIC or value is '?' and prev?.spaced then tag = 'LOGIC'
|
||||
else if value is '?' and prev?.spaced then tag = 'LOGIC'
|
||||
else if prev and not prev.spaced
|
||||
if value is '(' and prev[0] in CALLABLE
|
||||
prev[0] = 'FUNC_EXIST' if prev[0] is '?'
|
||||
@@ -335,7 +346,7 @@ exports.Lexer = class Lexer
|
||||
when '?' then prev[0] = 'INDEX_SOAK'
|
||||
when '::' then prev[0] = 'INDEX_PROTO'
|
||||
@token tag, value
|
||||
value.length
|
||||
true
|
||||
|
||||
# Token Manipulators
|
||||
# ------------------
|
||||
@@ -350,7 +361,7 @@ exports.Lexer = class Lexer
|
||||
else if prev[1] is '.' and @value(1) isnt '.'
|
||||
if @tag(1) is '?'
|
||||
@tag 0, 'SOAK_ACCESS'
|
||||
@tokens.splice -2, 1
|
||||
@tokens.splice(-2, 1)
|
||||
else
|
||||
@tag 0, 'PROPERTY_ACCESS'
|
||||
else
|
||||
@@ -374,14 +385,14 @@ exports.Lexer = class Lexer
|
||||
# definitions versus argument lists in function calls. Walk backwards, tagging
|
||||
# parameters specially in order to make things easier for the parser.
|
||||
tagParameters: ->
|
||||
return this if @tag() isnt ')'
|
||||
return if @tag() isnt ')'
|
||||
i = @tokens.length
|
||||
while tok = @tokens[--i]
|
||||
switch tok[0]
|
||||
when 'IDENTIFIER' then tok[0] = 'PARAM'
|
||||
when ')' then tok[0] = 'PARAM_END'
|
||||
when '(', 'CALL_START' then tok[0] = 'PARAM_START'; return true
|
||||
this
|
||||
true
|
||||
|
||||
# Close up all remaining open blocks at the end of the file.
|
||||
closeIndentation: ->
|
||||
|
||||
Reference in New Issue
Block a user