Refactor interpolation (and string and regex) handling in lexer

- Fix #3394: Unclosed single-quoted strings (both regular ones and heredocs)
  used to pass through the lexer, causing a parsing error later, while
  double-quoted strings caused an error already in the lexing phase. Now both
  single and double-quoted unclosed strings error out in the lexer (which is the
  more logical option) with consistent error messages. This also fixes the last
  comment by @satyr in #3301.

- Similar to the above, unclosed heregexes also used to pass through the lexer
  and not error until in the parsing phase, which resulted in confusing error
  messages. This has been fixed, too.

- Fix #3348, by adding passing tests.

- Fix #3529: If a string starts with an interpolation, an empty string is no
  longer emitted before the interpolation (unless it is needed to coerce the
  interpolation into a string).

- Block comments cannot contain `*/`. Now the error message also shows exactly
  where the offending `*/`. This improvement might seem unrelated, but I had to
  touch that code anyway to refactor string and regex related code, and the
  change was very trivial. Moreover, it's consistent with the next two points.

- Regexes cannot start with `*`. Now the error message also shows exactly where
  the offending `*` is. (It might actually not be exatly at the start in
  heregexes.) It is a very minor improvement, but it was trivial to add.

- Octal escapes in strings are forbidden in CoffeeScript (just like in
  JavaScript strict mode). However, this used to be the case only for regular
  strings. Now they are also forbidden in heredocs. Moreover, the errors now
  point at the offending octal escape.

- Invalid regex flags are no longer allowed. This includes repeated modifiers
  and unknown ones. Moreover, invalid modifiers do not stop a heregex from
  being matched, which results in better error messages.

- Fix #3621: `///a#{1}///` compiles to `RegExp("a" + 1)`. So does
  `RegExp("a#{1}")`. Still, those two code snippets used to generate different
  tokens, which is a bit weird, but more importantly causes problems for
  coffeelint (see clutchski/coffeelint#340). This required lots of tests in
  test/location.coffee to be updated. Note that some updates to those tests are
  unrelated to this point; some have been updated to be more consistent (I
  discovered this because the refactored code happened to be seemingly more
  correct).

- Regular regex literals used to erraneously allow newlines to be escaped,
  causing invalid JavaScript output. This has been fixed.

- Heregexes may now be completely empty (`//////`), instead of erroring out with
  a confusing message.

- Fix #2388: Heredocs and heregexes used to be lexed simply, which meant that
  you couldn't nest a heredoc within a heredoc (double-quoted, that is) or a
  heregex inside a heregex.

- Fix #2321: If you used division inside interpolation and then a slash later in
  the string containing that interpolation, the division slash and the latter
  slash was erraneously matched as a regex. This has been fixed.

- Indentation inside interpolations in heredocs no longer affect how much
  indentation is removed from each line of the heredoc (which is more
  intuitive).

- Whitespace is now correctly trimmed from the start and end of strings in a few
  edge cases.

- Last but not least, the lexing of interpolated strings now seems to be more
  efficient. For a regular double-quoted string, we used to use a custom
  function to find the end of it (taking interpolations and interpolations
  within interpolations etc. into account). Then we used to re-find the
  interpolations and recursively lex their contents. In effect, the same string
  was processed twice, or even more in the case of deeper nesting of
  interpolations. Now the same string is processed just once.

- Code duplication between regular strings, heredocs, regular regexes and
  heregexes has been reduced.

- The above two points should result in more easily read code, too.
This commit is contained in:
Simon Lydell
2015-01-03 23:40:43 +01:00
parent 8e4fb1b937
commit 0dcff507fb
17 changed files with 904 additions and 652 deletions

View File

@@ -89,7 +89,7 @@ if require?
test "#1096: unexpected generated tokens", ->
# Unexpected interpolation
assertErrorFormat '{"#{key}": val}', '''
[stdin]:1:3: error: unexpected string interpolation
[stdin]:1:3: error: unexpected interpolation
{"#{key}": val}
^^
'''
@@ -139,3 +139,224 @@ test "explicit indentation errors", ->
c
^^
'''
test "unclosed strings", ->
assertErrorFormat '''
'
''', '''
[stdin]:1:1: error: missing '
'
^
'''
assertErrorFormat '''
"
''', '''
[stdin]:1:1: error: missing "
"
^
'''
assertErrorFormat """
'''
""", """
[stdin]:1:1: error: missing '''
'''
^
"""
assertErrorFormat '''
"""
''', '''
[stdin]:1:1: error: missing """
"""
^
'''
assertErrorFormat '''
"#{"
''', '''
[stdin]:1:4: error: missing "
"#{"
^
'''
assertErrorFormat '''
"""#{"
''', '''
[stdin]:1:6: error: missing "
"""#{"
^
'''
assertErrorFormat '''
"#{"""
''', '''
[stdin]:1:4: error: missing """
"#{"""
^
'''
assertErrorFormat '''
"""#{"""
''', '''
[stdin]:1:6: error: missing """
"""#{"""
^
'''
assertErrorFormat '''
///#{"""
''', '''
[stdin]:1:6: error: missing """
///#{"""
^
'''
assertErrorFormat '''
"a
#{foo """
bar
#{ +'12 }
baz
"""} b"
''', '''
[stdin]:4:11: error: missing '
#{ +'12 }
^
'''
# https://github.com/jashkenas/coffeescript/issues/3301#issuecomment-31735168
assertErrorFormat '''
# Note the double escaping; this would be `"""a\"""` real code.
"""a\\"""
''', '''
[stdin]:2:1: error: missing """
"""a\\"""
^
'''
test "unclosed heregexes", ->
assertErrorFormat '''
///
''', '''
[stdin]:1:1: error: missing ///
///
^
'''
# https://github.com/jashkenas/coffeescript/issues/3301#issuecomment-31735168
assertErrorFormat '''
# Note the double escaping; this would be `///a\///` real code.
///a\\///
''', '''
[stdin]:2:1: error: missing ///
///a\\///
^
'''
test "unexpected token after string", ->
# Parsing error.
assertErrorFormat '''
'foo'bar
''', '''
[stdin]:1:6: error: unexpected bar
'foo'bar
^^^
'''
assertErrorFormat '''
"foo"bar
''', '''
[stdin]:1:6: error: unexpected bar
"foo"bar
^^^
'''
# Lexing error.
assertErrorFormat '''
'foo'bar'
''', '''
[stdin]:1:9: error: missing '
'foo'bar'
^
'''
assertErrorFormat '''
"foo"bar"
''', '''
[stdin]:1:9: error: missing "
"foo"bar"
^
'''
test "#3348: Location data is wrong in interpolations with leading whitespace", ->
assertErrorFormat '''
"#{ {"#{key}": val} }"
''', '''
[stdin]:1:7: error: unexpected interpolation
"#{ {"#{key}": val} }"
^^
'''
test "octal escapes", ->
assertErrorFormat '''
"a\\0\\tb\\\\\\07c"
''', '''
[stdin]:1:10: error: octal escape sequences are not allowed \\07
"a\\0\\tb\\\\\\07c"
\ \ \ \ ^
'''
test "illegal herecomment", ->
assertErrorFormat '''
###
Regex: /a*/g
###
''', '''
[stdin]:2:12: error: block comments cannot contain */
Regex: /a*/g
^
'''
test "#1724: regular expressions beginning with *", ->
assertErrorFormat '''
/* foo/
''', '''
[stdin]:1:2: error: regular expressions cannot begin with *
/* foo/
^
'''
assertErrorFormat '''
///
* foo
///
''', '''
[stdin]:2:3: error: regular expressions cannot begin with *
* foo
^
'''
test "invalid regex flags", ->
assertErrorFormat '''
/a/ii
''', '''
[stdin]:1:4: error: invalid regular expression flags ii
/a/ii
^
'''
assertErrorFormat '''
/a/G
''', '''
[stdin]:1:4: error: invalid regular expression flags G
/a/G
^
'''
assertErrorFormat '''
/a/gimi
''', '''
[stdin]:1:4: error: invalid regular expression flags gimi
/a/gimi
^
'''
assertErrorFormat '''
/a/g_
''', '''
[stdin]:1:4: error: invalid regular expression flags g_
/a/g_
^
'''
assertErrorFormat '''
///a///ii
''', '''
[stdin]:1:8: error: invalid regular expression flags ii
///a///ii
^
'''
doesNotThrow -> CoffeeScript.compile '/a/ymgi'

View File

@@ -34,6 +34,17 @@ eq "#{6/2}
eq "#{/// "'/'"/" ///}", '/"\'\\/\'"\\/"/' # heregex, stuffed with spicy characters
eq "#{/\\'/}", "/\\\\'/"
# Issue #2321: Regex/division conflict in interpolation
eq "#{4/2}/", '2/'
curWidth = 4
eq "<i style='left:#{ curWidth/2 }%;'></i>", "<i style='left:2%;'></i>"
throws -> CoffeeScript.compile '''
"<i style='left:#{ curWidth /2 }%;'></i>"'''
# valid regex--^^^^^^^^^^^ ^--unclosed string
eq "<i style='left:#{ curWidth/2 }%;'></i>", "<i style='left:2%;'></i>"
eq "<i style='left:#{ curWidth/ 2 }%;'></i>", "<i style='left:2%;'></i>"
eq "<i style='left:#{ curWidth / 2 }%;'></i>", "<i style='left:2%;'></i>"
hello = 'Hello'
world = 'World'
ok '#{hello} #{world}!' is '#{hello} #{world}!'
@@ -42,6 +53,10 @@ ok "[#{hello}#{world}]" is '[HelloWorld]'
ok "#{hello}##{world}" is 'Hello#World'
ok "Hello #{ 1 + 2 } World" is 'Hello 3 World'
ok "#{hello} #{ 1 + 2 } #{world}" is "Hello 3 World"
ok 1 + "#{2}px" is '12px'
ok isNaN "a#{2}" * 2
ok "#{2}" is '2'
ok "#{2}#{2}" is '22'
[s, t, r, i, n, g] = ['s', 't', 'r', 'i', 'n', 'g']
ok "#{s}#{t}#{r}#{i}#{n}#{g}" is 'string'

View File

@@ -83,8 +83,8 @@ test 'Verify locations in string interpolation (in "string")', ->
test 'Verify locations in string interpolation (in "string", multiple interpolation)', ->
tokens = CoffeeScript.tokens '"#{a}b#{c}"'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 3
@@ -104,8 +104,8 @@ test 'Verify locations in string interpolation (in "string", multiple interpolat
test 'Verify locations in string interpolation (in "string", multiple interpolation and line breaks)', ->
tokens = CoffeeScript.tokens '"#{a}\nb\n#{c}"'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 3
@@ -125,8 +125,8 @@ test 'Verify locations in string interpolation (in "string", multiple interpolat
test 'Verify locations in string interpolation (in "string", multiple interpolation and starting with line breaks)', ->
tokens = CoffeeScript.tokens '"\n#{a}\nb\n#{c}"'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 1
eq a[2].first_column, 2
@@ -146,8 +146,8 @@ test 'Verify locations in string interpolation (in "string", multiple interpolat
test 'Verify locations in string interpolation (in "string", multiple interpolation and starting with line breaks)', ->
tokens = CoffeeScript.tokens '"\n\n#{a}\n\nb\n\n#{c}"'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 2
eq a[2].first_column, 2
@@ -167,8 +167,8 @@ test 'Verify locations in string interpolation (in "string", multiple interpolat
test 'Verify locations in string interpolation (in "string", multiple interpolation and starting with line breaks)', ->
tokens = CoffeeScript.tokens '"\n\n\n#{a}\n\n\nb\n\n\n#{c}"'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 3
eq a[2].first_column, 2
@@ -209,13 +209,8 @@ test 'Verify locations in string interpolation (in """string""", line breaks)',
test 'Verify locations in string interpolation (in """string""", starting with a line break)', ->
tokens = CoffeeScript.tokens '"""\n#{b}\nc"""'
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 0
eq a[2].last_line, 0
eq a[2].last_column, 0
eq tokens.length, 6
[{}, b, {}, c] = tokens
eq b[2].first_line, 1
eq b[2].first_column, 2
@@ -233,8 +228,8 @@ test 'Verify locations in string interpolation (in """string""", starting with l
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 1
eq a[2].first_column, 0
eq a[2].first_line, 0
eq a[2].first_column, 3
eq a[2].last_line, 1
eq a[2].last_column, 0
@@ -251,8 +246,8 @@ test 'Verify locations in string interpolation (in """string""", starting with l
test 'Verify locations in string interpolation (in """string""", multiple interpolation)', ->
tokens = CoffeeScript.tokens '"""#{a}\nb\n#{c}"""'
eq tokens.length, 10
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 8
[{}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 5
@@ -315,7 +310,7 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
tokens = CoffeeScript.tokens '///#{a}b#{c}///'
eq tokens.length, 11
[{}, {}, {}, {}, a, {}, b, {}, c] = tokens
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 5
@@ -335,8 +330,8 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation)', ->
tokens = CoffeeScript.tokens '///a#{b}c///'
eq tokens.length, 9
[{}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 11
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 3
@@ -357,7 +352,7 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
tokens = CoffeeScript.tokens '///#{a}\nb\n#{c}///'
eq tokens.length, 11
[{}, {}, {}, {}, a, {}, b, {}, c] = tokens
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 5
@@ -378,7 +373,7 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
tokens = CoffeeScript.tokens '///#{a}\n\n\nb\n\n\n#{c}///'
eq tokens.length, 11
[{}, {}, {}, {}, a, {}, b, {}, c] = tokens
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 5
@@ -398,8 +393,8 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks)', ->
tokens = CoffeeScript.tokens '///a\n\n\n#{b}\n\n\nc///'
eq tokens.length, 9
[{}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 11
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 3
@@ -416,11 +411,11 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
eq c[2].last_line, 6
eq c[2].last_column, 0
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and stating with linebreak)', ->
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and starting with linebreak)', ->
tokens = CoffeeScript.tokens '///\n#{a}\nb\n#{c}///'
eq tokens.length, 11
[{}, {}, {}, {}, a, {}, b, {}, c] = tokens
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 1
eq a[2].first_column, 2
@@ -437,11 +432,11 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
eq c[2].last_line, 3
eq c[2].last_column, 2
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and stating with linebreak)', ->
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and starting with linebreak)', ->
tokens = CoffeeScript.tokens '///\n\n\n#{a}\n\n\nb\n\n\n#{c}///'
eq tokens.length, 11
[{}, {}, {}, {}, a, {}, b, {}, c] = tokens
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 3
eq a[2].first_column, 2
@@ -458,11 +453,11 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
eq c[2].last_line, 9
eq c[2].last_column, 2
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and stating with linebreak)', ->
test 'Verify locations in heregex interpolation (in ///regex///, multiple interpolation and line breaks and starting with linebreak)', ->
tokens = CoffeeScript.tokens '///\n\n\na\n\n\n#{b}\n\n\nc///'
eq tokens.length, 9
[{}, {}, a, {}, b, {}, c] = tokens
eq tokens.length, 11
[{}, {}, {}, a, {}, b, {}, c] = tokens
eq a[2].first_line, 0
eq a[2].first_column, 3
@@ -479,6 +474,19 @@ test 'Verify locations in heregex interpolation (in ///regex///, multiple interp
eq c[2].last_line, 9
eq c[2].last_column, 0
test "#3621: Multiline regex and manual `Regex` call with interpolation should
result in the same tokens", ->
tokensA = CoffeeScript.tokens 'RegExp(".*#{a}[0-9]")'
tokensB = CoffeeScript.tokens '///.*#{a}[0-9]///'
eq tokensA.length, tokensB.length
for i in [0...tokensA.length] by 1
tokenA = tokensA[i]
tokenB = tokensB[i]
eq tokenA[0], tokenB[0]
eq tokenA[1], tokenB[1]
eq tokenA.origin?[1], tokenB.origin?[1]
eq tokenA.stringEnd, tokenB.stringEnd
test "Verify all tokens get a location", ->
doesNotThrow ->
tokens = CoffeeScript.tokens testScript

View File

@@ -38,8 +38,8 @@ test "#764: regular expressions should be indexable", ->
test "#584: slashes are allowed unescaped in character classes", ->
ok /^a\/[/]b$/.test 'a//b'
test "#1724: regular expressions beginning with `*`", ->
throws -> CoffeeScript.compile '/*/'
test "does not allow to escape newlines", ->
throws -> CoffeeScript.compile '/a\\\nb/'
# Heregexe(n|s)
@@ -52,6 +52,14 @@ test "a heregex will ignore whitespace and comments", ->
test "an empty heregex will compile to an empty, non-capturing group", ->
eq /(?:)/ + '', /// /// + ''
eq /(?:)/ + '', ////// + ''
test "heregex starting with slashes", ->
ok /////a/\////.test ' //a// '
test '#2388: `///` in heregex interpolations', ->
ok ///a#{///b///}c///.test ' /a/b/c/ '
ws = ' \t'
scan = (regex) -> regex.exec('\t foo')[0]
eq '/\t /', /// #{scan /// [#{ws}]* ///} /// + ''
test "#1724: regular expressions beginning with `*`", ->
throws -> CoffeeScript.compile '/// * ///'

View File

@@ -55,6 +55,17 @@ test "octal escape sequences prohibited", ->
strictOk "`'\\1'`"
eq "\\" + "1", `"\\1"`
# Also test other string types.
strict "'\\\\\\1'"
eq "\x008", '\08'
eq "\\\\" + "1", '\\\\1'
strict "'''\\\\\\1'''"
eq "\x008", '''\08'''
eq "\\\\" + "1", '''\\\\1'''
strict '"""\\\\\\1"""'
eq "\x008", """\08"""
eq "\\\\" + "1", """\\\\1"""
test "duplicate formal parameters are prohibited", ->
nonce = {}
# a Param can be an Identifier, ThisProperty( @-param ), Array, or Object

View File

@@ -114,6 +114,22 @@ test "#3229, multiline strings", ->
eq 'first line\
\ backslash at BOL', 'first line\ backslash at BOL'
# Backslashes at end of strings.
eq 'first line \ ', 'first line '
eq 'first line
second line \
', 'first line second line '
eq 'first line
second line
\
', 'first line second line'
eq 'first line
second line
\
', 'first line second line'
# Edge case.
eq 'lone
@@ -164,12 +180,6 @@ test "#3249, escape newlines in heredocs with backslashes", ->
""", '\n1 2\n'
# TODO: uncomment when #2388 is fixed
# eq """a heredoc #{
# "inside \
# interpolation"
# }""", "a heredoc inside interpolation"
# Handle escaped backslashes correctly.
eq '''
escaped backslash at EOL\\
@@ -185,6 +195,25 @@ test "#3249, escape newlines in heredocs with backslashes", ->
eq """first line\
\ backslash at BOL""", 'first line\ backslash at BOL'
# Backslashes at end of strings.
eq '''first line \ ''', 'first line '
eq '''
first line
second line \
''', 'first line\nsecond line '
eq '''
first line
second line
\
''', 'first line\nsecond line'
eq '''
first line
second line
\
''', 'first line\nsecond line\n'
# Edge cases.
eq '''lone
@@ -196,6 +225,28 @@ test "#3249, escape newlines in heredocs with backslashes", ->
eq '''\
''', ''
test '#2388: `"""` in heredoc interpolations', ->
eq """a heredoc #{
"inside \
interpolation"
}""", "a heredoc inside interpolation"
eq """a#{"""b"""}c""", 'abc'
eq """#{""""""}""", ''
test "trailing whitespace", ->
testTrailing = (str, expected) ->
eq CoffeeScript.eval(str.replace /\|$/gm, ''), expected
testTrailing '''" |
|
a |
|
"''', 'a'
testTrailing """''' |
|
a |
|
'''""", ' \na \n '
#647
eq "''Hello, World\\''", '''
'\'Hello, World\\\''
@@ -259,6 +310,12 @@ ok a is 'more"than"one"quote'
a = '''here's an apostrophe'''
ok a is "here's an apostrophe"
a = """""surrounded by two quotes"\""""
ok a is '""surrounded by two quotes""'
a = '''''surrounded by two apostrophes'\''''
ok a is "''surrounded by two apostrophes''"
# The indentation detector ignores blank lines without trailing whitespace
a = """
one
@@ -272,6 +329,14 @@ eq ''' line 0
to the indent level
''', ' line 0\nshould not be relevant\n to the indent level'
eq """
interpolation #{
"contents"
}
should not be relevant
to the indent level
""", 'interpolation contents\nshould not be relevant\n to the indent level'
eq ''' '\\\' ''', " '\\' "
eq """ "\\\" """, ' "\\" '