mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-02-19 03:44:23 -05:00
first stub at heregex
This commit is contained in:
175
lib/lexer.js
175
lib/lexer.js
@@ -1,13 +1,12 @@
|
||||
(function() {
|
||||
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
|
||||
var __slice = Array.prototype.slice;
|
||||
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
|
||||
Rewriter = require('./rewriter').Rewriter;
|
||||
_ref = require('./helpers'), include = _ref.include, count = _ref.count, starts = _ref.starts, compact = _ref.compact, last = _ref.last;
|
||||
exports.Lexer = (function() {
|
||||
Lexer = function() {};
|
||||
Lexer.prototype.tokenize = function(code, options) {
|
||||
var o;
|
||||
code = code.replace(/\r/g, '').replace(/\s+$/, '');
|
||||
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
|
||||
o = options || {};
|
||||
this.code = code;
|
||||
this.i = 0;
|
||||
@@ -101,10 +100,14 @@
|
||||
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
|
||||
break;
|
||||
case '"':
|
||||
if (!(string = this.balancedToken(['"', '"'], ['#{', '}']))) {
|
||||
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
|
||||
return false;
|
||||
}
|
||||
this.interpolateString(string);
|
||||
if (~string.indexOf('#{')) {
|
||||
this.interpolateString(string);
|
||||
} else {
|
||||
this.token('STRING', this.escapeLines(string));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
@@ -115,7 +118,7 @@
|
||||
};
|
||||
Lexer.prototype.heredocToken = function() {
|
||||
var doc, heredoc, match, quote;
|
||||
if (!(match = this.chunk.match(HEREDOC))) {
|
||||
if (!(match = HEREDOC.exec(this.chunk))) {
|
||||
return false;
|
||||
}
|
||||
heredoc = match[0];
|
||||
@@ -124,12 +127,12 @@
|
||||
quote: quote,
|
||||
indent: null
|
||||
});
|
||||
if (quote === '"') {
|
||||
if (quote === '"' && ~doc.indexOf('#{')) {
|
||||
this.interpolateString(quote + doc + quote, {
|
||||
heredoc: true
|
||||
});
|
||||
} else {
|
||||
this.token('STRING', quote + doc + quote);
|
||||
this.token('STRING', quote + this.escapeLines(doc, true) + quote);
|
||||
}
|
||||
this.line += count(heredoc, '\n');
|
||||
this.i += heredoc.length;
|
||||
@@ -162,8 +165,14 @@
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.regexToken = function() {
|
||||
var _ref2, end, first, flags, regex, str;
|
||||
if (!(first = this.chunk.match(REGEX_START))) {
|
||||
var _ref2, end, first, flags, match, regex, str;
|
||||
if (this.chunk.charAt(0) !== '/') {
|
||||
return false;
|
||||
}
|
||||
if (match = HEREGEX.exec(this.chunk)) {
|
||||
return this.heregexToken(match);
|
||||
}
|
||||
if (!(first = REGEX_START.exec(this.chunk))) {
|
||||
return false;
|
||||
}
|
||||
if (first[1] === ' ' && !('CALL_START' === (_ref2 = this.tag()) || '=' === _ref2)) {
|
||||
@@ -172,34 +181,48 @@
|
||||
if (include(NOT_REGEX, this.tag())) {
|
||||
return false;
|
||||
}
|
||||
if (!(regex = this.balancedToken(['/', '/']))) {
|
||||
if (!(regex = this.balancedString(this.chunk, [['/', '/']]))) {
|
||||
return false;
|
||||
}
|
||||
if (!(end = this.chunk.slice(regex.length).match(REGEX_END))) {
|
||||
return false;
|
||||
}
|
||||
flags = end[0];
|
||||
if (REGEX_INTERPOLATION.test(regex)) {
|
||||
if (~regex.indexOf('#{')) {
|
||||
str = regex.slice(1, -1);
|
||||
str = str.replace(REGEX_ESCAPE, '\\$&');
|
||||
this.tokens.push(['(', '('], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
|
||||
this.tokens.push(['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
|
||||
this.interpolateString("\"" + (str) + "\"", {
|
||||
escapeQuotes: true
|
||||
regex: true
|
||||
});
|
||||
if (flags) {
|
||||
this.tokens.push([',', ','], ['STRING', ("\"" + (flags) + "\"")]);
|
||||
}
|
||||
this.tokens.push([')', ')'], [')', ')']);
|
||||
this.tokens.push(['CALL_END', ')']);
|
||||
} else {
|
||||
this.token('REGEX', regex + flags);
|
||||
}
|
||||
this.i += regex.length + flags.length;
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.balancedToken = function() {
|
||||
var delimited;
|
||||
delimited = __slice.call(arguments, 0);
|
||||
return this.balancedString(this.chunk, delimited);
|
||||
Lexer.prototype.heregexToken = function(match) {
|
||||
var _ref2, body, flags, heregex;
|
||||
_ref2 = match, heregex = _ref2[0], body = _ref2[1], flags = _ref2[2];
|
||||
this.i += heregex.length;
|
||||
if (!(~body.indexOf('#{'))) {
|
||||
this.token('REGEX', '/' + body.replace(HEREGEX_OMIT, '') + '/' + flags);
|
||||
return true;
|
||||
}
|
||||
this.token('IDENTIFIER', 'RegExp');
|
||||
this.tokens.push(['CALL_START', '(']);
|
||||
this.interpolateString("\"" + (body) + "\"", {
|
||||
regex: true,
|
||||
heregex: true
|
||||
});
|
||||
if (flags) {
|
||||
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
|
||||
}
|
||||
this.tokens.push(['CALL_END', ')']);
|
||||
return true;
|
||||
};
|
||||
Lexer.prototype.lineToken = function() {
|
||||
var diff, indent, match, nextCharacter, noNewlines, prev, size;
|
||||
@@ -450,7 +473,8 @@
|
||||
i += 1;
|
||||
}
|
||||
break;
|
||||
} else if (starts(str, open, i)) {
|
||||
}
|
||||
if (starts(str, open, i)) {
|
||||
levels.push(pair);
|
||||
i += open.length - 1;
|
||||
break;
|
||||
@@ -471,74 +495,78 @@
|
||||
return !i ? false : str.slice(0, i);
|
||||
};
|
||||
Lexer.prototype.interpolateString = function(str, options) {
|
||||
var _len, _ref2, end, escapeQuotes, escaped, expr, heredoc, i, idx, inner, interpolated, lexer, nested, pi, push, quote, s, tag, tok, token, tokens, value;
|
||||
_ref2 = options || {}, heredoc = _ref2.heredoc, escapeQuotes = _ref2.escapeQuotes;
|
||||
quote = str.charAt(0);
|
||||
if (quote !== '"' || str.length < 3) {
|
||||
return this.token('STRING', str);
|
||||
var _i, _len, _ref2, char, expr, heredoc, i, inner, interpolated, lexer, nested, pi, push, regex, s, tag, tok, tokens, value;
|
||||
if (str.length < 5) {
|
||||
return this.token('STRING', this.escapeLines(str, heredoc));
|
||||
}
|
||||
_ref2 = options || (options = {}), heredoc = _ref2.heredoc, regex = _ref2.regex;
|
||||
lexer = new Lexer;
|
||||
tokens = [];
|
||||
i = (pi = 1);
|
||||
end = str.length - 1;
|
||||
while (i < end) {
|
||||
if (str.charAt(i) === '\\') {
|
||||
pi = 1;
|
||||
i = 0;
|
||||
while (char = str.charAt(i += 1)) {
|
||||
if (char === '\\') {
|
||||
i += 1;
|
||||
} else if (expr = this.balancedString(str.slice(i), [['#{', '}']])) {
|
||||
if (pi < i) {
|
||||
s = quote + this.escapeLines(str.slice(pi, i), heredoc) + quote;
|
||||
tokens.push(['STRING', s]);
|
||||
}
|
||||
inner = expr.slice(2, -1).replace(/^[ \t]*\n/, '');
|
||||
if (inner.length) {
|
||||
if (heredoc) {
|
||||
inner = inner.replace(RegExp('\\\\' + quote, 'g'), quote);
|
||||
}
|
||||
nested = lexer.tokenize("(" + (inner) + ")", {
|
||||
line: this.line
|
||||
});
|
||||
for (idx = 0, _len = nested.length; idx < _len; idx++) {
|
||||
tok = nested[idx];
|
||||
if (tok[0] === 'CALL_END') {
|
||||
(tok[0] = ')');
|
||||
}
|
||||
}
|
||||
nested.pop();
|
||||
tokens.push(['TOKENS', nested]);
|
||||
} else {
|
||||
tokens.push(['STRING', quote + quote]);
|
||||
}
|
||||
i += expr.length - 1;
|
||||
pi = i + 1;
|
||||
continue;
|
||||
}
|
||||
i += 1;
|
||||
if (!(char === '#' && str.charAt(i + 1) === '{' && (expr = this.balancedString(str.slice(i + 1), [['{', '}']])))) {
|
||||
continue;
|
||||
}
|
||||
if (pi < i) {
|
||||
tokens.push(['STRING', '"' + this.escapeLines(str.slice(pi, i), heredoc) + '"']);
|
||||
}
|
||||
inner = expr.slice(1, -1).replace(LEADING_SPACES, '').replace(TRAILING_SPACES, '');
|
||||
if (inner.length) {
|
||||
if (heredoc) {
|
||||
inner = inner.replace(/\\\"/g, '"');
|
||||
}
|
||||
nested = lexer.tokenize("(" + (inner) + ")", {
|
||||
line: this.line
|
||||
});
|
||||
for (_i = 0, _len = nested.length; _i < _len; _i++) {
|
||||
tok = nested[_i];
|
||||
if (tok[0] === 'CALL_END') {
|
||||
(tok[0] = ')');
|
||||
}
|
||||
}
|
||||
nested.pop();
|
||||
tokens.push(['TOKENS', nested]);
|
||||
} else {
|
||||
tokens.push(['STRING', '""']);
|
||||
}
|
||||
i += expr.length;
|
||||
pi = i + 1;
|
||||
}
|
||||
if ((i > pi) && (pi < str.length - 1)) {
|
||||
s = str.slice(pi, i).replace(MULTILINER, heredoc ? '\\n' : '');
|
||||
tokens.push(['STRING', quote + s + quote]);
|
||||
s = this.escapeLines(str.slice(pi, -1), heredoc);
|
||||
tokens.push(['STRING', '"' + s + '"']);
|
||||
}
|
||||
if (tokens[0][0] !== 'STRING') {
|
||||
tokens.unshift(['STRING', '""']);
|
||||
}
|
||||
interpolated = tokens.length > 1;
|
||||
interpolated = !regex && tokens.length > 1;
|
||||
if (interpolated) {
|
||||
this.token('(', '(');
|
||||
}
|
||||
push = tokens.push;
|
||||
for (i = 0, _len = tokens.length; i < _len; i++) {
|
||||
token = tokens[i];
|
||||
_ref2 = token, tag = _ref2[0], value = _ref2[1];
|
||||
if (tag === 'TOKENS') {
|
||||
push.apply(this.tokens, value);
|
||||
} else if (tag === 'STRING' && escapeQuotes) {
|
||||
escaped = value.slice(1, -1).replace(/"/g, '\\"');
|
||||
this.token(tag, "\"" + (escaped) + "\"");
|
||||
} else {
|
||||
this.token(tag, value);
|
||||
}
|
||||
if (i < tokens.length - 1) {
|
||||
_ref2 = tokens[i], tag = _ref2[0], value = _ref2[1];
|
||||
if (i) {
|
||||
this.token('+', '+');
|
||||
}
|
||||
if (tag === 'TOKENS') {
|
||||
push.apply(this.tokens, value);
|
||||
continue;
|
||||
}
|
||||
if (regex) {
|
||||
value = value.slice(1, -1);
|
||||
value = value.replace(/[\\\"]/g, '\\$&');
|
||||
if (options.heregex) {
|
||||
value = value.replace(HEREGEX_OMIT, '');
|
||||
}
|
||||
value = '"' + value + '"';
|
||||
}
|
||||
this.token(tag, value);
|
||||
}
|
||||
if (interpolated) {
|
||||
this.token(')', ')');
|
||||
@@ -587,14 +615,17 @@
|
||||
SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
|
||||
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
|
||||
REGEX_START = /^\/([^\/])/;
|
||||
REGEX_INTERPOLATION = /[^\\]#\{.*[^\\]\}/;
|
||||
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/;
|
||||
REGEX_ESCAPE = /\\[^#]/g;
|
||||
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/;
|
||||
HEREGEX_OMIT = /\s+(?:#.*)?/g;
|
||||
MULTILINER = /\n/g;
|
||||
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;
|
||||
HEREDOC_INDENT = /\n+([ \t]*)/g;
|
||||
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
|
||||
NEXT_CHARACTER = /^\s*(\S?)/;
|
||||
LEADING_SPACES = /^\s+/;
|
||||
TRAILING_SPACES = /\s+$/;
|
||||
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|='];
|
||||
UNARY = ['UMINUS', 'UPLUS', '!', '!!', '~', 'NEW', 'TYPEOF', 'DELETE'];
|
||||
LOGIC = ['&', '|', '^', '&&', '||'];
|
||||
|
||||
Reference in New Issue
Block a user