mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-02-19 03:44:23 -05:00
first stub at heregex
This commit is contained in:
175
lib/lexer.js
175
lib/lexer.js
@@ -1,13 +1,12 @@
|
|||||||
(function() {
|
(function() {
|
||||||
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
|
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
|
||||||
var __slice = Array.prototype.slice;
|
|
||||||
Rewriter = require('./rewriter').Rewriter;
|
Rewriter = require('./rewriter').Rewriter;
|
||||||
_ref = require('./helpers'), include = _ref.include, count = _ref.count, starts = _ref.starts, compact = _ref.compact, last = _ref.last;
|
_ref = require('./helpers'), include = _ref.include, count = _ref.count, starts = _ref.starts, compact = _ref.compact, last = _ref.last;
|
||||||
exports.Lexer = (function() {
|
exports.Lexer = (function() {
|
||||||
Lexer = function() {};
|
Lexer = function() {};
|
||||||
Lexer.prototype.tokenize = function(code, options) {
|
Lexer.prototype.tokenize = function(code, options) {
|
||||||
var o;
|
var o;
|
||||||
code = code.replace(/\r/g, '').replace(/\s+$/, '');
|
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
||||||
o = options || {};
|
o = options || {};
|
||||||
this.code = code;
|
this.code = code;
|
||||||
this.i = 0;
|
this.i = 0;
|
||||||
@@ -101,10 +100,14 @@
|
|||||||
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
||||||
break;
|
break;
|
||||||
case '"':
|
case '"':
|
||||||
if (!(string = this.balancedToken(['"', '"'], ['#{', '}']))) {
|
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
this.interpolateString(string);
|
if (~string.indexOf('#{')) {
|
||||||
|
this.interpolateString(string);
|
||||||
|
} else {
|
||||||
|
this.token('STRING', this.escapeLines(string));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@@ -115,7 +118,7 @@
|
|||||||
};
|
};
|
||||||
Lexer.prototype.heredocToken = function() {
|
Lexer.prototype.heredocToken = function() {
|
||||||
var doc, heredoc, match, quote;
|
var doc, heredoc, match, quote;
|
||||||
if (!(match = this.chunk.match(HEREDOC))) {
|
if (!(match = HEREDOC.exec(this.chunk))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
heredoc = match[0];
|
heredoc = match[0];
|
||||||
@@ -124,12 +127,12 @@
|
|||||||
quote: quote,
|
quote: quote,
|
||||||
indent: null
|
indent: null
|
||||||
});
|
});
|
||||||
if (quote === '"') {
|
if (quote === '"' && ~doc.indexOf('#{')) {
|
||||||
this.interpolateString(quote + doc + quote, {
|
this.interpolateString(quote + doc + quote, {
|
||||||
heredoc: true
|
heredoc: true
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
this.token('STRING', quote + doc + quote);
|
this.token('STRING', quote + this.escapeLines(doc, true) + quote);
|
||||||
}
|
}
|
||||||
this.line += count(heredoc, '\n');
|
this.line += count(heredoc, '\n');
|
||||||
this.i += heredoc.length;
|
this.i += heredoc.length;
|
||||||
@@ -162,8 +165,14 @@
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
Lexer.prototype.regexToken = function() {
|
Lexer.prototype.regexToken = function() {
|
||||||
var _ref2, end, first, flags, regex, str;
|
var _ref2, end, first, flags, match, regex, str;
|
||||||
if (!(first = this.chunk.match(REGEX_START))) {
|
if (this.chunk.charAt(0) !== '/') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (match = HEREGEX.exec(this.chunk)) {
|
||||||
|
return this.heregexToken(match);
|
||||||
|
}
|
||||||
|
if (!(first = REGEX_START.exec(this.chunk))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (first[1] === ' ' && !('CALL_START' === (_ref2 = this.tag()) || '=' === _ref2)) {
|
if (first[1] === ' ' && !('CALL_START' === (_ref2 = this.tag()) || '=' === _ref2)) {
|
||||||
@@ -172,34 +181,48 @@
|
|||||||
if (include(NOT_REGEX, this.tag())) {
|
if (include(NOT_REGEX, this.tag())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!(regex = this.balancedToken(['/', '/']))) {
|
if (!(regex = this.balancedString(this.chunk, [['/', '/']]))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!(end = this.chunk.slice(regex.length).match(REGEX_END))) {
|
if (!(end = this.chunk.slice(regex.length).match(REGEX_END))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
flags = end[0];
|
flags = end[0];
|
||||||
if (REGEX_INTERPOLATION.test(regex)) {
|
if (~regex.indexOf('#{')) {
|
||||||
str = regex.slice(1, -1);
|
str = regex.slice(1, -1);
|
||||||
str = str.replace(REGEX_ESCAPE, '\\$&');
|
this.tokens.push(['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
|
||||||
this.tokens.push(['(', '('], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
|
|
||||||
this.interpolateString("\"" + (str) + "\"", {
|
this.interpolateString("\"" + (str) + "\"", {
|
||||||
escapeQuotes: true
|
regex: true
|
||||||
});
|
});
|
||||||
if (flags) {
|
if (flags) {
|
||||||
this.tokens.push([',', ','], ['STRING', ("\"" + (flags) + "\"")]);
|
this.tokens.push([',', ','], ['STRING', ("\"" + (flags) + "\"")]);
|
||||||
}
|
}
|
||||||
this.tokens.push([')', ')'], [')', ')']);
|
this.tokens.push(['CALL_END', ')']);
|
||||||
} else {
|
} else {
|
||||||
this.token('REGEX', regex + flags);
|
this.token('REGEX', regex + flags);
|
||||||
}
|
}
|
||||||
this.i += regex.length + flags.length;
|
this.i += regex.length + flags.length;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
Lexer.prototype.balancedToken = function() {
|
Lexer.prototype.heregexToken = function(match) {
|
||||||
var delimited;
|
var _ref2, body, flags, heregex;
|
||||||
delimited = __slice.call(arguments, 0);
|
_ref2 = match, heregex = _ref2[0], body = _ref2[1], flags = _ref2[2];
|
||||||
return this.balancedString(this.chunk, delimited);
|
this.i += heregex.length;
|
||||||
|
if (!(~body.indexOf('#{'))) {
|
||||||
|
this.token('REGEX', '/' + body.replace(HEREGEX_OMIT, '') + '/' + flags);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
this.token('IDENTIFIER', 'RegExp');
|
||||||
|
this.tokens.push(['CALL_START', '(']);
|
||||||
|
this.interpolateString("\"" + (body) + "\"", {
|
||||||
|
regex: true,
|
||||||
|
heregex: true
|
||||||
|
});
|
||||||
|
if (flags) {
|
||||||
|
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
|
||||||
|
}
|
||||||
|
this.tokens.push(['CALL_END', ')']);
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
Lexer.prototype.lineToken = function() {
|
Lexer.prototype.lineToken = function() {
|
||||||
var diff, indent, match, nextCharacter, noNewlines, prev, size;
|
var diff, indent, match, nextCharacter, noNewlines, prev, size;
|
||||||
@@ -450,7 +473,8 @@
|
|||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
} else if (starts(str, open, i)) {
|
}
|
||||||
|
if (starts(str, open, i)) {
|
||||||
levels.push(pair);
|
levels.push(pair);
|
||||||
i += open.length - 1;
|
i += open.length - 1;
|
||||||
break;
|
break;
|
||||||
@@ -471,74 +495,78 @@
|
|||||||
return !i ? false : str.slice(0, i);
|
return !i ? false : str.slice(0, i);
|
||||||
};
|
};
|
||||||
Lexer.prototype.interpolateString = function(str, options) {
|
Lexer.prototype.interpolateString = function(str, options) {
|
||||||
var _len, _ref2, end, escapeQuotes, escaped, expr, heredoc, i, idx, inner, interpolated, lexer, nested, pi, push, quote, s, tag, tok, token, tokens, value;
|
var _i, _len, _ref2, char, expr, heredoc, i, inner, interpolated, lexer, nested, pi, push, regex, s, tag, tok, tokens, value;
|
||||||
_ref2 = options || {}, heredoc = _ref2.heredoc, escapeQuotes = _ref2.escapeQuotes;
|
if (str.length < 5) {
|
||||||
quote = str.charAt(0);
|
return this.token('STRING', this.escapeLines(str, heredoc));
|
||||||
if (quote !== '"' || str.length < 3) {
|
|
||||||
return this.token('STRING', str);
|
|
||||||
}
|
}
|
||||||
|
_ref2 = options || (options = {}), heredoc = _ref2.heredoc, regex = _ref2.regex;
|
||||||
lexer = new Lexer;
|
lexer = new Lexer;
|
||||||
tokens = [];
|
tokens = [];
|
||||||
i = (pi = 1);
|
pi = 1;
|
||||||
end = str.length - 1;
|
i = 0;
|
||||||
while (i < end) {
|
while (char = str.charAt(i += 1)) {
|
||||||
if (str.charAt(i) === '\\') {
|
if (char === '\\') {
|
||||||
i += 1;
|
i += 1;
|
||||||
} else if (expr = this.balancedString(str.slice(i), [['#{', '}']])) {
|
continue;
|
||||||
if (pi < i) {
|
|
||||||
s = quote + this.escapeLines(str.slice(pi, i), heredoc) + quote;
|
|
||||||
tokens.push(['STRING', s]);
|
|
||||||
}
|
|
||||||
inner = expr.slice(2, -1).replace(/^[ \t]*\n/, '');
|
|
||||||
if (inner.length) {
|
|
||||||
if (heredoc) {
|
|
||||||
inner = inner.replace(RegExp('\\\\' + quote, 'g'), quote);
|
|
||||||
}
|
|
||||||
nested = lexer.tokenize("(" + (inner) + ")", {
|
|
||||||
line: this.line
|
|
||||||
});
|
|
||||||
for (idx = 0, _len = nested.length; idx < _len; idx++) {
|
|
||||||
tok = nested[idx];
|
|
||||||
if (tok[0] === 'CALL_END') {
|
|
||||||
(tok[0] = ')');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nested.pop();
|
|
||||||
tokens.push(['TOKENS', nested]);
|
|
||||||
} else {
|
|
||||||
tokens.push(['STRING', quote + quote]);
|
|
||||||
}
|
|
||||||
i += expr.length - 1;
|
|
||||||
pi = i + 1;
|
|
||||||
}
|
}
|
||||||
i += 1;
|
if (!(char === '#' && str.charAt(i + 1) === '{' && (expr = this.balancedString(str.slice(i + 1), [['{', '}']])))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (pi < i) {
|
||||||
|
tokens.push(['STRING', '"' + this.escapeLines(str.slice(pi, i), heredoc) + '"']);
|
||||||
|
}
|
||||||
|
inner = expr.slice(1, -1).replace(LEADING_SPACES, '').replace(TRAILING_SPACES, '');
|
||||||
|
if (inner.length) {
|
||||||
|
if (heredoc) {
|
||||||
|
inner = inner.replace(/\\\"/g, '"');
|
||||||
|
}
|
||||||
|
nested = lexer.tokenize("(" + (inner) + ")", {
|
||||||
|
line: this.line
|
||||||
|
});
|
||||||
|
for (_i = 0, _len = nested.length; _i < _len; _i++) {
|
||||||
|
tok = nested[_i];
|
||||||
|
if (tok[0] === 'CALL_END') {
|
||||||
|
(tok[0] = ')');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nested.pop();
|
||||||
|
tokens.push(['TOKENS', nested]);
|
||||||
|
} else {
|
||||||
|
tokens.push(['STRING', '""']);
|
||||||
|
}
|
||||||
|
i += expr.length;
|
||||||
|
pi = i + 1;
|
||||||
}
|
}
|
||||||
if ((i > pi) && (pi < str.length - 1)) {
|
if ((i > pi) && (pi < str.length - 1)) {
|
||||||
s = str.slice(pi, i).replace(MULTILINER, heredoc ? '\\n' : '');
|
s = this.escapeLines(str.slice(pi, -1), heredoc);
|
||||||
tokens.push(['STRING', quote + s + quote]);
|
tokens.push(['STRING', '"' + s + '"']);
|
||||||
}
|
}
|
||||||
if (tokens[0][0] !== 'STRING') {
|
if (tokens[0][0] !== 'STRING') {
|
||||||
tokens.unshift(['STRING', '""']);
|
tokens.unshift(['STRING', '""']);
|
||||||
}
|
}
|
||||||
interpolated = tokens.length > 1;
|
interpolated = !regex && tokens.length > 1;
|
||||||
if (interpolated) {
|
if (interpolated) {
|
||||||
this.token('(', '(');
|
this.token('(', '(');
|
||||||
}
|
}
|
||||||
push = tokens.push;
|
push = tokens.push;
|
||||||
for (i = 0, _len = tokens.length; i < _len; i++) {
|
for (i = 0, _len = tokens.length; i < _len; i++) {
|
||||||
token = tokens[i];
|
_ref2 = tokens[i], tag = _ref2[0], value = _ref2[1];
|
||||||
_ref2 = token, tag = _ref2[0], value = _ref2[1];
|
if (i) {
|
||||||
if (tag === 'TOKENS') {
|
|
||||||
push.apply(this.tokens, value);
|
|
||||||
} else if (tag === 'STRING' && escapeQuotes) {
|
|
||||||
escaped = value.slice(1, -1).replace(/"/g, '\\"');
|
|
||||||
this.token(tag, "\"" + (escaped) + "\"");
|
|
||||||
} else {
|
|
||||||
this.token(tag, value);
|
|
||||||
}
|
|
||||||
if (i < tokens.length - 1) {
|
|
||||||
this.token('+', '+');
|
this.token('+', '+');
|
||||||
}
|
}
|
||||||
|
if (tag === 'TOKENS') {
|
||||||
|
push.apply(this.tokens, value);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (regex) {
|
||||||
|
value = value.slice(1, -1);
|
||||||
|
value = value.replace(/[\\\"]/g, '\\$&');
|
||||||
|
if (options.heregex) {
|
||||||
|
value = value.replace(HEREGEX_OMIT, '');
|
||||||
|
}
|
||||||
|
value = '"' + value + '"';
|
||||||
|
}
|
||||||
|
this.token(tag, value);
|
||||||
}
|
}
|
||||||
if (interpolated) {
|
if (interpolated) {
|
||||||
this.token(')', ')');
|
this.token(')', ')');
|
||||||
@@ -587,14 +615,17 @@
|
|||||||
SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
|
SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
|
||||||
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
|
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
|
||||||
REGEX_START = /^\/([^\/])/;
|
REGEX_START = /^\/([^\/])/;
|
||||||
REGEX_INTERPOLATION = /[^\\]#\{.*[^\\]\}/;
|
|
||||||
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/;
|
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/;
|
||||||
REGEX_ESCAPE = /\\[^#]/g;
|
REGEX_ESCAPE = /\\[^#]/g;
|
||||||
|
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/;
|
||||||
|
HEREGEX_OMIT = /\s+(?:#.*)?/g;
|
||||||
MULTILINER = /\n/g;
|
MULTILINER = /\n/g;
|
||||||
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;
|
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;
|
||||||
HEREDOC_INDENT = /\n+([ \t]*)/g;
|
HEREDOC_INDENT = /\n+([ \t]*)/g;
|
||||||
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
|
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
|
||||||
NEXT_CHARACTER = /^\s*(\S?)/;
|
NEXT_CHARACTER = /^\s*(\S?)/;
|
||||||
|
LEADING_SPACES = /^\s+/;
|
||||||
|
TRAILING_SPACES = /\s+$/;
|
||||||
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|='];
|
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|='];
|
||||||
UNARY = ['UMINUS', 'UPLUS', '!', '!!', '~', 'NEW', 'TYPEOF', 'DELETE'];
|
UNARY = ['UMINUS', 'UPLUS', '!', '!!', '~', 'NEW', 'TYPEOF', 'DELETE'];
|
||||||
LOGIC = ['&', '|', '^', '&&', '||'];
|
LOGIC = ['&', '|', '^', '&&', '||'];
|
||||||
|
|||||||
126
src/lexer.coffee
126
src/lexer.coffee
@@ -33,7 +33,7 @@ exports.Lexer = class Lexer
|
|||||||
# Before returning the token stream, run it through the [Rewriter](rewriter.html)
|
# Before returning the token stream, run it through the [Rewriter](rewriter.html)
|
||||||
# unless explicitly asked not to.
|
# unless explicitly asked not to.
|
||||||
tokenize: (code, options) ->
|
tokenize: (code, options) ->
|
||||||
code = code.replace(/\r/g, '').replace /\s+$/, ''
|
code = code.replace(/\r/g, '').replace TRAILING_SPACES, ''
|
||||||
o = options or {}
|
o = options or {}
|
||||||
@code = code # The remainder of the source code.
|
@code = code # The remainder of the source code.
|
||||||
@i = 0 # Current character position we're parsing.
|
@i = 0 # Current character position we're parsing.
|
||||||
@@ -124,8 +124,11 @@ exports.Lexer = class Lexer
|
|||||||
return false unless match = SIMPLESTR.exec @chunk
|
return false unless match = SIMPLESTR.exec @chunk
|
||||||
@token 'STRING', (string = match[0]).replace MULTILINER, '\\\n'
|
@token 'STRING', (string = match[0]).replace MULTILINER, '\\\n'
|
||||||
when '"'
|
when '"'
|
||||||
return false unless string = @balancedToken ['"', '"'], ['#{', '}']
|
return false unless string = @balancedString @chunk, [['"', '"'], ['#{', '}']]
|
||||||
@interpolateString string
|
if ~string.indexOf '#{'
|
||||||
|
@interpolateString string
|
||||||
|
else
|
||||||
|
@token 'STRING', @escapeLines string
|
||||||
else
|
else
|
||||||
return false
|
return false
|
||||||
@line += count string, '\n'
|
@line += count string, '\n'
|
||||||
@@ -135,14 +138,14 @@ exports.Lexer = class Lexer
|
|||||||
# Matches heredocs, adjusting indentation to the correct level, as heredocs
|
# Matches heredocs, adjusting indentation to the correct level, as heredocs
|
||||||
# preserve whitespace, but ignore indentation to the left.
|
# preserve whitespace, but ignore indentation to the left.
|
||||||
heredocToken: ->
|
heredocToken: ->
|
||||||
return false unless match = @chunk.match HEREDOC
|
return false unless match = HEREDOC.exec @chunk
|
||||||
heredoc = match[0]
|
heredoc = match[0]
|
||||||
quote = heredoc.charAt 0
|
quote = heredoc.charAt 0
|
||||||
doc = @sanitizeHeredoc match[2], {quote, indent: null}
|
doc = @sanitizeHeredoc match[2], {quote, indent: null}
|
||||||
if quote is '"'
|
if quote is '"' and ~doc.indexOf '#{'
|
||||||
@interpolateString quote + doc + quote, heredoc: yes
|
@interpolateString quote + doc + quote, heredoc: yes
|
||||||
else
|
else
|
||||||
@token 'STRING', quote + doc + quote
|
@token 'STRING', quote + @escapeLines(doc, yes) + quote
|
||||||
@line += count heredoc, '\n'
|
@line += count heredoc, '\n'
|
||||||
@i += heredoc.length
|
@i += heredoc.length
|
||||||
true
|
true
|
||||||
@@ -168,31 +171,41 @@ exports.Lexer = class Lexer
|
|||||||
|
|
||||||
# Matches regular expression literals. Lexing regular expressions is difficult
|
# Matches regular expression literals. Lexing regular expressions is difficult
|
||||||
# to distinguish from division, so we borrow some basic heuristics from
|
# to distinguish from division, so we borrow some basic heuristics from
|
||||||
# JavaScript and Ruby, borrow slash balancing from `@balancedToken`, and
|
# JavaScript and Ruby, borrow slash balancing from `@balancedString`, and
|
||||||
# borrow interpolation from `@interpolateString`.
|
# borrow interpolation from `@interpolateString`.
|
||||||
regexToken: ->
|
regexToken: ->
|
||||||
return false unless first = @chunk.match REGEX_START
|
return false if @chunk.charAt(0) isnt '/'
|
||||||
|
return @heregexToken match if match = HEREGEX.exec @chunk
|
||||||
|
return false unless first = REGEX_START.exec @chunk
|
||||||
return false if first[1] is ' ' and @tag() not in ['CALL_START', '=']
|
return false if first[1] is ' ' and @tag() not in ['CALL_START', '=']
|
||||||
return false if include NOT_REGEX, @tag()
|
return false if include NOT_REGEX, @tag()
|
||||||
return false unless regex = @balancedToken ['/', '/']
|
return false unless regex = @balancedString @chunk, [['/', '/']]
|
||||||
return false unless end = @chunk[regex.length..].match REGEX_END
|
return false unless end = @chunk[regex.length..].match REGEX_END
|
||||||
flags = end[0]
|
flags = end[0]
|
||||||
if REGEX_INTERPOLATION.test regex
|
if ~regex.indexOf '#{'
|
||||||
str = regex.slice 1, -1
|
str = regex.slice 1, -1
|
||||||
str = str.replace REGEX_ESCAPE, '\\$&'
|
@tokens.push ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']
|
||||||
@tokens.push ['(', '('], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']
|
@interpolateString "\"#{str}\"", regex: yes
|
||||||
@interpolateString "\"#{str}\"", escapeQuotes: yes
|
|
||||||
@tokens.push [',', ','], ['STRING', "\"#{flags}\""] if flags
|
@tokens.push [',', ','], ['STRING', "\"#{flags}\""] if flags
|
||||||
@tokens.push [')', ')'], [')', ')']
|
@tokens.push ['CALL_END', ')']
|
||||||
else
|
else
|
||||||
@token 'REGEX', regex + flags
|
@token 'REGEX', regex + flags
|
||||||
@i += regex.length + flags.length
|
@i += regex.length + flags.length
|
||||||
true
|
true
|
||||||
|
|
||||||
# Matches a token in which the passed delimiter pairs must be correctly
|
# Matches experimental, multiline and extended regular expression literals.
|
||||||
# balanced (ie. strings, JS literals).
|
heregexToken: (match) ->
|
||||||
balancedToken: (delimited...) ->
|
[heregex, body, flags] = match
|
||||||
@balancedString @chunk, delimited
|
@i += heregex.length
|
||||||
|
unless ~body.indexOf '#{'
|
||||||
|
@token 'REGEX', '/' + body.replace(HEREGEX_OMIT, '') + '/' + flags
|
||||||
|
return true
|
||||||
|
@token 'IDENTIFIER', 'RegExp'
|
||||||
|
@tokens.push ['CALL_START', '(']
|
||||||
|
@interpolateString "\"#{body}\"", regex: yes, heregex: yes
|
||||||
|
@tokens.push [',', ','], ['STRING', '"' + flags + '"'] if flags
|
||||||
|
@tokens.push ['CALL_END', ')']
|
||||||
|
true
|
||||||
|
|
||||||
# Matches newlines, indents, and outdents, and determines which is which.
|
# Matches newlines, indents, and outdents, and determines which is which.
|
||||||
# If we can detect that the current line is continued onto the the next line,
|
# If we can detect that the current line is continued onto the the next line,
|
||||||
@@ -399,7 +412,7 @@ exports.Lexer = class Lexer
|
|||||||
i += close.length - 1
|
i += close.length - 1
|
||||||
i += 1 unless levels.length
|
i += 1 unless levels.length
|
||||||
break
|
break
|
||||||
else if starts str, open, i
|
if starts str, open, i
|
||||||
levels.push(pair)
|
levels.push(pair)
|
||||||
i += open.length - 1
|
i += open.length - 1
|
||||||
break
|
break
|
||||||
@@ -419,49 +432,50 @@ exports.Lexer = class Lexer
|
|||||||
# new Lexer, tokenize the interpolated contents, and merge them into the
|
# new Lexer, tokenize the interpolated contents, and merge them into the
|
||||||
# token stream.
|
# token stream.
|
||||||
interpolateString: (str, options) ->
|
interpolateString: (str, options) ->
|
||||||
{heredoc, escapeQuotes} = options or {}
|
return @token 'STRING', @escapeLines(str, heredoc) if str.length < 5 # "#{}"
|
||||||
quote = str.charAt 0
|
{heredoc, regex} = options or= {}
|
||||||
return @token 'STRING', str if quote isnt '"' or str.length < 3
|
|
||||||
lexer = new Lexer
|
lexer = new Lexer
|
||||||
tokens = []
|
tokens = []
|
||||||
i = pi = 1
|
pi = 1
|
||||||
end = str.length - 1
|
i = 0
|
||||||
while i < end
|
while char = str.charAt i += 1
|
||||||
if str.charAt(i) is '\\'
|
if char is '\\'
|
||||||
i += 1
|
i += 1
|
||||||
else if expr = @balancedString str[i..], [['#{', '}']]
|
continue
|
||||||
if pi < i
|
unless char is '#' and str.charAt(i+1) is '{' and
|
||||||
s = quote + @escapeLines(str[pi...i], heredoc) + quote
|
(expr = @balancedString str[i+1..], [['{', '}']])
|
||||||
tokens.push ['STRING', s]
|
continue
|
||||||
inner = expr.slice(2, -1).replace /^[ \t]*\n/, ''
|
if pi < i
|
||||||
if inner.length
|
tokens.push ['STRING', '"' + @escapeLines(str[pi...i], heredoc) + '"']
|
||||||
inner = inner.replace RegExp('\\\\' + quote, 'g'), quote if heredoc
|
inner = expr.slice(1, -1).replace(LEADING_SPACES, '').replace(TRAILING_SPACES, '')
|
||||||
nested = lexer.tokenize "(#{inner})", line: @line
|
if inner.length
|
||||||
(tok[0] = ')') for tok, idx in nested when tok[0] is 'CALL_END'
|
inner = inner.replace /\\\"/g, '"' if heredoc
|
||||||
nested.pop()
|
nested = lexer.tokenize "(#{inner})", line: @line
|
||||||
tokens.push ['TOKENS', nested]
|
(tok[0] = ')') for tok in nested when tok[0] is 'CALL_END'
|
||||||
else
|
nested.pop()
|
||||||
tokens.push ['STRING', quote + quote]
|
tokens.push ['TOKENS', nested]
|
||||||
i += expr.length - 1
|
else
|
||||||
pi = i + 1
|
tokens.push ['STRING', '""']
|
||||||
i += 1
|
i += expr.length
|
||||||
|
pi = i + 1
|
||||||
if i > pi < str.length - 1
|
if i > pi < str.length - 1
|
||||||
s = str[pi...i].replace MULTILINER, if heredoc then '\\n' else ''
|
s = @escapeLines str.slice(pi, -1), heredoc
|
||||||
tokens.push ['STRING', quote + s + quote]
|
tokens.push ['STRING', '"' + s + '"']
|
||||||
tokens.unshift ['STRING', '""'] unless tokens[0][0] is 'STRING'
|
tokens.unshift ['STRING', '""'] unless tokens[0][0] is 'STRING'
|
||||||
interpolated = tokens.length > 1
|
interpolated = not regex and tokens.length > 1
|
||||||
@token '(', '(' if interpolated
|
@token '(', '(' if interpolated
|
||||||
{push} = tokens
|
{push} = tokens
|
||||||
for token, i in tokens
|
for [tag, value], i in tokens
|
||||||
[tag, value] = token
|
@token '+', '+' if i
|
||||||
if tag is 'TOKENS'
|
if tag is 'TOKENS'
|
||||||
push.apply @tokens, value
|
push.apply @tokens, value
|
||||||
else if tag is 'STRING' and escapeQuotes
|
continue
|
||||||
escaped = value.slice(1, -1).replace(/"/g, '\\"')
|
if regex
|
||||||
@token tag, "\"#{escaped}\""
|
value = value.slice 1, -1
|
||||||
else
|
value = value.replace /[\\\"]/g, '\\$&'
|
||||||
@token tag, value
|
value = value.replace HEREGEX_OMIT, '' if options.heregex
|
||||||
@token '+', '+' if i < tokens.length - 1
|
value = '"' + value + '"'
|
||||||
|
@token tag, value
|
||||||
@token ')', ')' if interpolated
|
@token ')', ')' if interpolated
|
||||||
tokens
|
tokens
|
||||||
|
|
||||||
@@ -544,16 +558,20 @@ JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/
|
|||||||
|
|
||||||
# Regex-matching-regexes.
|
# Regex-matching-regexes.
|
||||||
REGEX_START = /^\/([^\/])/
|
REGEX_START = /^\/([^\/])/
|
||||||
REGEX_INTERPOLATION = /[^\\]#\{.*[^\\]\}/
|
|
||||||
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/
|
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/
|
||||||
REGEX_ESCAPE = /\\[^#]/g
|
REGEX_ESCAPE = /\\[^#]/g
|
||||||
|
|
||||||
|
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/
|
||||||
|
HEREGEX_OMIT = /\s+(?:#.*)?/g
|
||||||
|
|
||||||
# Token cleaning regexes.
|
# Token cleaning regexes.
|
||||||
MULTILINER = /\n/g
|
MULTILINER = /\n/g
|
||||||
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/
|
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/
|
||||||
HEREDOC_INDENT = /\n+([ \t]*)/g
|
HEREDOC_INDENT = /\n+([ \t]*)/g
|
||||||
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/
|
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/
|
||||||
NEXT_CHARACTER = /^\s*(\S?)/
|
NEXT_CHARACTER = /^\s*(\S?)/
|
||||||
|
LEADING_SPACES = /^\s+/
|
||||||
|
TRAILING_SPACES = /\s+$/
|
||||||
|
|
||||||
# Compound assignment tokens.
|
# Compound assignment tokens.
|
||||||
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=']
|
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=']
|
||||||
|
|||||||
@@ -26,3 +26,13 @@ regexp = / /
|
|||||||
ok ' '.match regexp
|
ok ' '.match regexp
|
||||||
|
|
||||||
ok (obj.width()/id - obj.height()/id) is -5
|
ok (obj.width()/id - obj.height()/id) is -5
|
||||||
|
|
||||||
|
eq /^I'm\s+Heregex?/gim + '', ///
|
||||||
|
^ I'm \s+ Heregex? # or not
|
||||||
|
///gim + ''
|
||||||
|
eq '\\\\#{}', ///
|
||||||
|
#{
|
||||||
|
"#{ '\\' }"
|
||||||
|
}
|
||||||
|
\#{}
|
||||||
|
///.source
|
||||||
|
|||||||
Reference in New Issue
Block a user