From 146b5694c263271edc0d26a144e30abce8c2e1a4 Mon Sep 17 00:00:00 2001 From: Jeremy Ashkenas Date: Thu, 17 Dec 2009 09:29:49 -0500 Subject: [PATCH] cleaned up lexer in order to add line numbers --- TODO | 2 -- examples/syntax_errors.cs | 2 ++ lib/coffee_script/lexer.rb | 39 ++++++++++++++++++++++++++------------ 3 files changed, 29 insertions(+), 14 deletions(-) create mode 100644 examples/syntax_errors.cs diff --git a/TODO b/TODO index 9878620e..8fc6ae7c 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,5 @@ TODO: -* Super in methods. (reserve it). - * Need *way* better syntax errors. * Is it possible to close blocks (functions, ifs, trys) without an explicit diff --git a/examples/syntax_errors.cs b/examples/syntax_errors.cs new file mode 100644 index 00000000..5df5956c --- /dev/null +++ b/examples/syntax_errors.cs @@ -0,0 +1,2 @@ +# Identifiers run together: +a b c \ No newline at end of file diff --git a/lib/coffee_script/lexer.rb b/lib/coffee_script/lexer.rb index 7269d54c..62de56b7 100644 --- a/lib/coffee_script/lexer.rb +++ b/lib/coffee_script/lexer.rb @@ -32,6 +32,7 @@ class Lexer def tokenize(code) @code = code.chomp # Cleanup code by remove extra line breaks @i = 0 # Current character position we're parsing + @line = 1 # The current line. @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] while @i < @code.length @chunk = @code[@i..-1] @@ -57,35 +58,37 @@ class Lexer # Keywords are special identifiers tagged with their own name, 'if' will result # in an [:IF, "if"] token tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER - if tag == :IDENTIFIER && @tokens[-1] && @tokens[-1][1] == '.' - @tokens[-1] = [:PROPERTY_ACCESS, '.'] - end - @tokens << [tag, identifier] + @tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.' + token(tag, identifier) @i += identifier.length end def number_token return false unless number = @chunk[NUMBER, 1] float = number.include?('.') - @tokens << [:NUMBER, float ? number.to_f : number.to_i] + token(:NUMBER, float ? number.to_f : number.to_i) @i += number.length end def string_token return false unless string = @chunk[STRING, 1] - @tokens << [:STRING, string.gsub(MULTILINER, "\\\n")] + escaped = string.gsub(MULTILINER) do |match| + @line += 1 + "\\\n" + end + token(:STRING, escaped) @i += string.length end def js_token return false unless script = @chunk[JS, 1] - @tokens << [:JS, script.gsub(JS_CLEANER, '')] + token(:JS, script.gsub(JS_CLEANER, '')) @i += script.length end def regex_token return false unless regex = @chunk[REGEX, 1] - @tokens << [:REGEX, regex] + token(:REGEX, regex) @i += regex.length end @@ -106,7 +109,8 @@ class Lexer def literal_token value = @chunk[NEWLINE, 1] if value - @tokens << ["\n", "\n"] unless @tokens.last && @tokens.last[0] == "\n" + @line += value.length + token("\n", "\n") unless last_value == "\n" return @i += value.length end value = @chunk[OPERATOR, 1] @@ -114,10 +118,18 @@ class Lexer value ||= @chunk[0,1] skip_following_newlines if EXP_START.include?(value) remove_leading_newlines if EXP_END.include?(value) - @tokens << [value, value] + token(value, value) @i += value.length end + def token(tag, value) + @tokens << [tag, value] + end + + def last_value + @tokens.last && @tokens.last[1] + end + # The main source of ambiguity in our grammar was Parameter lists (as opposed # to argument lists in method calls). Tag parameter identifiers to avoid this. def tag_parameters @@ -132,11 +144,14 @@ class Lexer def skip_following_newlines newlines = @code[(@i+1)..-1][NEWLINE, 1] - @i += newlines.length if newlines + if newlines + @line += newlines.length + @i += newlines.length + end end def remove_leading_newlines - @tokens.pop if @tokens.last[1] == "\n" + @tokens.pop if last_value == "\n" end end \ No newline at end of file