first stub at heregex

This commit is contained in:
satyr
2010-10-04 08:22:42 +09:00
parent ae55c70ac5
commit c605b3e232
3 changed files with 185 additions and 126 deletions

View File

@@ -1,13 +1,12 @@
(function() {
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
var __slice = Array.prototype.slice;
var ASSIGNED, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, CONVERSIONS, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_START, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, _ref, compact, count, include, last, starts;
Rewriter = require('./rewriter').Rewriter;
_ref = require('./helpers'), include = _ref.include, count = _ref.count, starts = _ref.starts, compact = _ref.compact, last = _ref.last;
exports.Lexer = (function() {
Lexer = function() {};
Lexer.prototype.tokenize = function(code, options) {
var o;
code = code.replace(/\r/g, '').replace(/\s+$/, '');
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
o = options || {};
this.code = code;
this.i = 0;
@@ -101,10 +100,14 @@
this.token('STRING', (string = match[0]).replace(MULTILINER, '\\\n'));
break;
case '"':
if (!(string = this.balancedToken(['"', '"'], ['#{', '}']))) {
if (!(string = this.balancedString(this.chunk, [['"', '"'], ['#{', '}']]))) {
return false;
}
this.interpolateString(string);
if (~string.indexOf('#{')) {
this.interpolateString(string);
} else {
this.token('STRING', this.escapeLines(string));
}
break;
default:
return false;
@@ -115,7 +118,7 @@
};
Lexer.prototype.heredocToken = function() {
var doc, heredoc, match, quote;
if (!(match = this.chunk.match(HEREDOC))) {
if (!(match = HEREDOC.exec(this.chunk))) {
return false;
}
heredoc = match[0];
@@ -124,12 +127,12 @@
quote: quote,
indent: null
});
if (quote === '"') {
if (quote === '"' && ~doc.indexOf('#{')) {
this.interpolateString(quote + doc + quote, {
heredoc: true
});
} else {
this.token('STRING', quote + doc + quote);
this.token('STRING', quote + this.escapeLines(doc, true) + quote);
}
this.line += count(heredoc, '\n');
this.i += heredoc.length;
@@ -162,8 +165,14 @@
return true;
};
Lexer.prototype.regexToken = function() {
var _ref2, end, first, flags, regex, str;
if (!(first = this.chunk.match(REGEX_START))) {
var _ref2, end, first, flags, match, regex, str;
if (this.chunk.charAt(0) !== '/') {
return false;
}
if (match = HEREGEX.exec(this.chunk)) {
return this.heregexToken(match);
}
if (!(first = REGEX_START.exec(this.chunk))) {
return false;
}
if (first[1] === ' ' && !('CALL_START' === (_ref2 = this.tag()) || '=' === _ref2)) {
@@ -172,34 +181,48 @@
if (include(NOT_REGEX, this.tag())) {
return false;
}
if (!(regex = this.balancedToken(['/', '/']))) {
if (!(regex = this.balancedString(this.chunk, [['/', '/']]))) {
return false;
}
if (!(end = this.chunk.slice(regex.length).match(REGEX_END))) {
return false;
}
flags = end[0];
if (REGEX_INTERPOLATION.test(regex)) {
if (~regex.indexOf('#{')) {
str = regex.slice(1, -1);
str = str.replace(REGEX_ESCAPE, '\\$&');
this.tokens.push(['(', '('], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
this.tokens.push(['IDENTIFIER', 'RegExp'], ['CALL_START', '(']);
this.interpolateString("\"" + (str) + "\"", {
escapeQuotes: true
regex: true
});
if (flags) {
this.tokens.push([',', ','], ['STRING', ("\"" + (flags) + "\"")]);
}
this.tokens.push([')', ')'], [')', ')']);
this.tokens.push(['CALL_END', ')']);
} else {
this.token('REGEX', regex + flags);
}
this.i += regex.length + flags.length;
return true;
};
Lexer.prototype.balancedToken = function() {
var delimited;
delimited = __slice.call(arguments, 0);
return this.balancedString(this.chunk, delimited);
Lexer.prototype.heregexToken = function(match) {
var _ref2, body, flags, heregex;
_ref2 = match, heregex = _ref2[0], body = _ref2[1], flags = _ref2[2];
this.i += heregex.length;
if (!(~body.indexOf('#{'))) {
this.token('REGEX', '/' + body.replace(HEREGEX_OMIT, '') + '/' + flags);
return true;
}
this.token('IDENTIFIER', 'RegExp');
this.tokens.push(['CALL_START', '(']);
this.interpolateString("\"" + (body) + "\"", {
regex: true,
heregex: true
});
if (flags) {
this.tokens.push([',', ','], ['STRING', '"' + flags + '"']);
}
this.tokens.push(['CALL_END', ')']);
return true;
};
Lexer.prototype.lineToken = function() {
var diff, indent, match, nextCharacter, noNewlines, prev, size;
@@ -450,7 +473,8 @@
i += 1;
}
break;
} else if (starts(str, open, i)) {
}
if (starts(str, open, i)) {
levels.push(pair);
i += open.length - 1;
break;
@@ -471,74 +495,78 @@
return !i ? false : str.slice(0, i);
};
Lexer.prototype.interpolateString = function(str, options) {
var _len, _ref2, end, escapeQuotes, escaped, expr, heredoc, i, idx, inner, interpolated, lexer, nested, pi, push, quote, s, tag, tok, token, tokens, value;
_ref2 = options || {}, heredoc = _ref2.heredoc, escapeQuotes = _ref2.escapeQuotes;
quote = str.charAt(0);
if (quote !== '"' || str.length < 3) {
return this.token('STRING', str);
var _i, _len, _ref2, char, expr, heredoc, i, inner, interpolated, lexer, nested, pi, push, regex, s, tag, tok, tokens, value;
if (str.length < 5) {
return this.token('STRING', this.escapeLines(str, heredoc));
}
_ref2 = options || (options = {}), heredoc = _ref2.heredoc, regex = _ref2.regex;
lexer = new Lexer;
tokens = [];
i = (pi = 1);
end = str.length - 1;
while (i < end) {
if (str.charAt(i) === '\\') {
pi = 1;
i = 0;
while (char = str.charAt(i += 1)) {
if (char === '\\') {
i += 1;
} else if (expr = this.balancedString(str.slice(i), [['#{', '}']])) {
if (pi < i) {
s = quote + this.escapeLines(str.slice(pi, i), heredoc) + quote;
tokens.push(['STRING', s]);
}
inner = expr.slice(2, -1).replace(/^[ \t]*\n/, '');
if (inner.length) {
if (heredoc) {
inner = inner.replace(RegExp('\\\\' + quote, 'g'), quote);
}
nested = lexer.tokenize("(" + (inner) + ")", {
line: this.line
});
for (idx = 0, _len = nested.length; idx < _len; idx++) {
tok = nested[idx];
if (tok[0] === 'CALL_END') {
(tok[0] = ')');
}
}
nested.pop();
tokens.push(['TOKENS', nested]);
} else {
tokens.push(['STRING', quote + quote]);
}
i += expr.length - 1;
pi = i + 1;
continue;
}
i += 1;
if (!(char === '#' && str.charAt(i + 1) === '{' && (expr = this.balancedString(str.slice(i + 1), [['{', '}']])))) {
continue;
}
if (pi < i) {
tokens.push(['STRING', '"' + this.escapeLines(str.slice(pi, i), heredoc) + '"']);
}
inner = expr.slice(1, -1).replace(LEADING_SPACES, '').replace(TRAILING_SPACES, '');
if (inner.length) {
if (heredoc) {
inner = inner.replace(/\\\"/g, '"');
}
nested = lexer.tokenize("(" + (inner) + ")", {
line: this.line
});
for (_i = 0, _len = nested.length; _i < _len; _i++) {
tok = nested[_i];
if (tok[0] === 'CALL_END') {
(tok[0] = ')');
}
}
nested.pop();
tokens.push(['TOKENS', nested]);
} else {
tokens.push(['STRING', '""']);
}
i += expr.length;
pi = i + 1;
}
if ((i > pi) && (pi < str.length - 1)) {
s = str.slice(pi, i).replace(MULTILINER, heredoc ? '\\n' : '');
tokens.push(['STRING', quote + s + quote]);
s = this.escapeLines(str.slice(pi, -1), heredoc);
tokens.push(['STRING', '"' + s + '"']);
}
if (tokens[0][0] !== 'STRING') {
tokens.unshift(['STRING', '""']);
}
interpolated = tokens.length > 1;
interpolated = !regex && tokens.length > 1;
if (interpolated) {
this.token('(', '(');
}
push = tokens.push;
for (i = 0, _len = tokens.length; i < _len; i++) {
token = tokens[i];
_ref2 = token, tag = _ref2[0], value = _ref2[1];
if (tag === 'TOKENS') {
push.apply(this.tokens, value);
} else if (tag === 'STRING' && escapeQuotes) {
escaped = value.slice(1, -1).replace(/"/g, '\\"');
this.token(tag, "\"" + (escaped) + "\"");
} else {
this.token(tag, value);
}
if (i < tokens.length - 1) {
_ref2 = tokens[i], tag = _ref2[0], value = _ref2[1];
if (i) {
this.token('+', '+');
}
if (tag === 'TOKENS') {
push.apply(this.tokens, value);
continue;
}
if (regex) {
value = value.slice(1, -1);
value = value.replace(/[\\\"]/g, '\\$&');
if (options.heregex) {
value = value.replace(HEREGEX_OMIT, '');
}
value = '"' + value + '"';
}
this.token(tag, value);
}
if (interpolated) {
this.token(')', ')');
@@ -587,14 +615,17 @@
SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/;
JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/;
REGEX_START = /^\/([^\/])/;
REGEX_INTERPOLATION = /[^\\]#\{.*[^\\]\}/;
REGEX_END = /^[imgy]{0,4}(?![a-zA-Z])/;
REGEX_ESCAPE = /\\[^#]/g;
HEREGEX = /^\/{3}([\s\S]+?)\/{3}([imgy]{0,4})(?![A-Za-z])/;
HEREGEX_OMIT = /\s+(?:#.*)?/g;
MULTILINER = /\n/g;
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;
HEREDOC_INDENT = /\n+([ \t]*)/g;
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
NEXT_CHARACTER = /^\s*(\S?)/;
LEADING_SPACES = /^\s+/;
TRAILING_SPACES = /\s+$/;
COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|='];
UNARY = ['UMINUS', 'UPLUS', '!', '!!', '~', 'NEW', 'TYPEOF', 'DELETE'];
LOGIC = ['&', '|', '^', '&&', '||'];