Fix #3410, #3182: Allow regex to start with space or =

A regex may not follow a specific set of tokens. These were already known before
in the `NOT_REGEX` and `NOT_SPACED_REGEX` arrays. (However, I've refactored them
to be more correct and to add a few missing tokens). In all other cases (except
after a spaced callable) a slash is the start of a regex, and may now start with
a space or an equals sign. It’s really that simple!

A slash after a spaced callable is the only ambigous case. We cannot know if
that's division or function application with a regex as the argument. The
spacing determines which is which:

Space on both sides:
- `a / b/i`  -> `a / b / i`
- `a /= b/i` -> `a /= b / i`

No spaces:
- `a/b/i`    -> `a / b / i`
- `a/=b/i`   -> `a /= b / i`

Space on the right side:
- `a/ b/i`   -> `a / b / i`
- `a/= b/i`  -> `a /= b / i`

Space on the left side:
- `a /b/i`   -> `a(/b/i)`
- `a /=b/i`  -> `a(/=b/i)`

The last case used to compile to `a /= b / i`, but that has been changed to be
consistent with the `/` operator. The last case really looks like a regex, so it
should be parsed as one.

Moreover, you may now also space the `/` and `/=` operators with other
whitespace characters than a space (such as tabs and non-breaking spaces) for
consistency.

Lastly, unclosed regexes are now reported as such, instead of generating some
other confusing error message.

It should perhaps also be noted that apart from escaping (such as `a /\ b/`) you
may now also use parentheses to disambiguate division and regex: `a (/ b/)`. See
https://github.com/jashkenas/coffeescript/issues/3182#issuecomment-26688427.
This commit is contained in:
Simon Lydell
2015-01-10 01:48:00 +01:00
parent e769423d52
commit 8fd6258a46
4 changed files with 255 additions and 34 deletions

View File

@@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.8.0
(function() {
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NOT_SPACED_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_BLANK_LINE, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NUMBER, OCTAL_ESCAPE, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_OMIT, STRING_SINGLE, STRING_START, TRAILING_BLANK_LINE, TRAILING_SPACES, UNARY, UNARY_MATH, VALID_FLAGS, WHITESPACE, compact, count, invertLiterate, key, last, locationDataToString, repeat, starts, throwSyntaxError, _ref, _ref1,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
_ref = require('./rewriter'), Rewriter = _ref.Rewriter, INVERSES = _ref.INVERSES;
@@ -287,7 +287,7 @@
};
Lexer.prototype.regexToken = function() {
var end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3;
var closed, end, flags, index, match, prev, re, regex, tokens, _ref2, _ref3, _ref4;
switch (false) {
case !(match = REGEX_ILLEGAL.exec(this.chunk)):
this.error("regular expressions cannot begin with " + match[2], match.index + match[1].length);
@@ -296,11 +296,20 @@
_ref2 = this.matchWithInterpolations(this.chunk.slice(3), HEREGEX, '///', 3), tokens = _ref2.tokens, index = _ref2.index;
break;
case !(match = REGEX.exec(this.chunk)):
regex = match[0];
regex = match[0], closed = match[1];
index = regex.length;
prev = last(this.tokens);
if (prev && (_ref3 = prev[0], __indexOf.call((prev.spaced ? NOT_REGEX : NOT_SPACED_REGEX), _ref3) >= 0)) {
return 0;
if (prev) {
if (prev.spaced && (_ref3 = prev[0], __indexOf.call(CALLABLE, _ref3) >= 0)) {
if (!closed || POSSIBLY_DIVISION.test(regex)) {
return 0;
}
} else if (_ref4 = prev[0], __indexOf.call(NOT_REGEX, _ref4) >= 0) {
return 0;
}
}
if (!closed) {
this.error('missing / (unclosed regex)');
}
break;
default:
@@ -845,7 +854,7 @@
HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
REGEX = /^\/(?![\s=])(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])+\//;
REGEX = /^\/(?!\/)(?:[^[\/\n\\]|\\.|\[(?:\\.|[^\]\n\\])*])*(\/)?/;
REGEX_FLAGS = /^\w*/;
@@ -857,6 +866,8 @@
REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
POSSIBLY_DIVISION = /^\/=?\s/;
MULTILINER = /\n/g;
HERECOMMENT_ILLEGAL = /\*\//;
@@ -889,13 +900,11 @@
BOOL = ['TRUE', 'FALSE'];
NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--'];
CALLABLE = ['IDENTIFIER', ')', ']', '?', '@', 'THIS', 'SUPER'];
NOT_SPACED_REGEX = NOT_REGEX.concat(')', '}', 'THIS', 'IDENTIFIER', 'STRING', ']');
INDEXABLE = CALLABLE.concat(['NUMBER', 'STRING', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '}', '::']);
CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'];
INDEXABLE = CALLABLE.concat('NUMBER', 'BOOL', 'NULL', 'UNDEFINED');
NOT_REGEX = INDEXABLE.concat(['++', '--']);
LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];