diff --git a/TODO b/TODO index b8e9fc4a..2abba2f5 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,7 @@ TODO: +* Write some tests. + * Code Cleanup. * Is it possible to close blocks (functions, ifs, trys) without an explicit diff --git a/examples/code.cs b/examples/code.cs index b7735800..7e732c2d 100644 --- a/examples/code.cs +++ b/examples/code.cs @@ -16,19 +16,14 @@ run_loop: => dense_object_literal: {one: 1, two: 2, three: 3} spaced_out_multiline_object: { - pi: 3.14159 - list: [1, 2, 3, 4] - regex: /match[ing](every|thing|\/)/gi - three: new Idea() inner_obj: { freedom: => _.freedom(). } - } # Arrays: @@ -38,6 +33,11 @@ exponents : [x => x., x => x * x., x => x * x * x.] empty: [] +multiline: [ + 'line one' + 'line two' +] + # Conditionals and ternaries. if submarine.shields_up full_speed_ahead() @@ -64,7 +64,7 @@ good ||= evil wine &&= cheese # Nested property access and calls. -((moon.turn(360))).shapes[3].move({x: 45, y: 30}).position +((moon.turn(360))).shapes[3].move({x: 45, y: 30}).position['top'].offset('x') a: b: c: 5 diff --git a/lib/coffee_script/grammar.y b/lib/coffee_script/grammar.y index ef8bc8a6..adb408cf 100644 --- a/lib/coffee_script/grammar.y +++ b/lib/coffee_script/grammar.y @@ -53,7 +53,7 @@ rule | Terminator Expressions { result = val[1] } ; - # All types of expressions in our language + # All types of expressions in our language. Expression: Literal | Value @@ -70,19 +70,19 @@ rule | Switch ; - # All tokens that can terminate an expression + # All tokens that can terminate an expression. Terminator: "\n" | ";" ; - # All tokens that can serve to begin the second block + # All tokens that can serve to begin the second block of a multi-part expression. Then: THEN | Terminator ; - # All hard-coded values + # All hard-coded values. Literal: NUMBER { result = LiteralNode.new(val[0]) } | STRING { result = LiteralNode.new(val[0]) } @@ -95,7 +95,7 @@ rule | CONTINUE { result = LiteralNode.new(val[0]) } ; - # Assign to a variable + # Assignment to a variable. Assign: Value ":" Expression { result = AssignNode.new(val[0], val[2]) } ; @@ -105,7 +105,7 @@ rule IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) } ; - # A Return statement. + # A return statement. Return: RETURN Expression { result = ReturnNode.new(val[1]) } ; @@ -150,24 +150,25 @@ rule | DELETE Expression { result = OpNode.new(val[0], val[1]) } ; - - # Method definition + # Function definition. Code: ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) } | "=>" CodeBody "." { result = CodeNode.new([], val[1]) } ; + # The body of a function. CodeBody: /* nothing */ { result = Nodes.new([]) } | Expressions { result = val[0] } ; - + # The parameters to a function definition. ParamList: PARAM { result = val } | ParamList "," PARAM { result = val[0] << val[2] } ; + # Expressions that can be treated as values. Value: IDENTIFIER { result = ValueNode.new(val) } | Array { result = ValueNode.new(val) } @@ -177,24 +178,29 @@ rule | Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) } ; + # Accessing into an object or array, through dot or index notation. Accessor: PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) } | Index { result = val[0] } | Slice { result = val[0] } ; + # Indexing into an object or array. Index: "[" Expression "]" { result = IndexNode.new(val[1]) } ; + # Array slice literal. Slice: "[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) } ; + # An object literal. Object: "{" AssignList "}" { result = ObjectNode.new(val[1]) } ; + # Assignment within an object literal (comma or newline separated). AssignList: /* nothing */ { result = []} | AssignObj { result = val } @@ -202,27 +208,29 @@ rule | AssignList Terminator AssignObj { result = val[0] << val[2] } ; - # A method call. + # All flavors of function call (instantiation, super, and regular). Call: Invocation { result = val[0] } | NEW Invocation { result = val[1].new_instance } | Super { result = val[0] } ; + # A generic function invocation. Invocation: Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) } ; + # Calling super. Super: SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) } ; - # An Array. + # The array literal. Array: "[" ArgList "]" { result = ArrayNode.new(val[1]) } ; - # A list of arguments to a method call. + # A list of arguments to a method call, or as the contents of an array. ArgList: /* nothing */ { result = [] } | Expression { result = val } @@ -296,6 +304,9 @@ rule end +---- header +module CoffeeScript + ---- inner def parse(code) # @yydebug = true @@ -308,5 +319,8 @@ end end def on_error(error_token_id, error_value, value_stack) - raise CoffeeScript::ParseError.new(token_to_str(error_token_id), error_value, value_stack) - end \ No newline at end of file + raise ParseError.new(token_to_str(error_token_id), error_value, value_stack) + end + +---- footer +end \ No newline at end of file diff --git a/lib/coffee_script/lexer.rb b/lib/coffee_script/lexer.rb index dcf82bc1..dd0f50a6 100644 --- a/lib/coffee_script/lexer.rb +++ b/lib/coffee_script/lexer.rb @@ -1,157 +1,161 @@ -class Lexer +module CoffeeScript - KEYWORDS = ["if", "else", "then", "unless", - "true", "false", "null", - "and", "or", "is", "aint", "not", - "new", "return", - "try", "catch", "finally", "throw", - "break", "continue", - "for", "in", "while", - "switch", "case", - "super", - "delete"] + class Lexer - IDENTIFIER = /\A([a-zA-Z$_]\w*)/ - NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i - STRING = /\A("(.*?)[^\\]"|'(.*?)[^\\]')/m - JS = /\A(`(.*?)`)/ - OPERATOR = /\A([+\*&|\/\-%=<>]+)/ - WHITESPACE = /\A([ \t\r]+)/ - NEWLINE = /\A([\r\n]+)/ - COMMENT = /\A(#[^\r\n]*)/ - CODE = /\A(=>)/ - REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/ + KEYWORDS = ["if", "else", "then", "unless", + "true", "false", "null", + "and", "or", "is", "aint", "not", + "new", "return", + "try", "catch", "finally", "throw", + "break", "continue", + "for", "in", "while", + "switch", "case", + "super", + "delete"] - JS_CLEANER = /(\A`|`\Z)/ - MULTILINER = /[\r\n]/ + IDENTIFIER = /\A([a-zA-Z$_]\w*)/ + NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i + STRING = /\A("(.*?)[^\\]"|'(.*?)[^\\]')/m + JS = /\A(`(.*?)`)/ + OPERATOR = /\A([+\*&|\/\-%=<>]+)/ + WHITESPACE = /\A([ \t\r]+)/ + NEWLINE = /\A([\r\n]+)/ + COMMENT = /\A(#[^\r\n]*)/ + CODE = /\A(=>)/ + REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/ - EXP_START = ['{', '(', '['] - EXP_END = ['}', ')', ']'] + JS_CLEANER = /(\A`|`\Z)/ + MULTILINER = /[\r\n]/ - # This is how to implement a very simple scanner. - # Scan one caracter at the time until you find something to parse. - def tokenize(code) - @code = code.chomp # Cleanup code by remove extra line breaks - @i = 0 # Current character position we're parsing - @line = 1 # The current line. - @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] - while @i < @code.length - @chunk = @code[@i..-1] - extract_next_token + EXP_START = ['{', '(', '['] + EXP_END = ['}', ')', ']'] + + # This is how to implement a very simple scanner. + # Scan one caracter at the time until you find something to parse. + def tokenize(code) + @code = code.chomp # Cleanup code by remove extra line breaks + @i = 0 # Current character position we're parsing + @line = 1 # The current line. + @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] + while @i < @code.length + @chunk = @code[@i..-1] + extract_next_token + end + @tokens end - @tokens - end - def extract_next_token - return if identifier_token - return if number_token - return if string_token - return if js_token - return if regex_token - return if remove_comment - return if whitespace_token - return literal_token - end - - # Matching if, print, method names, etc. - def identifier_token - return false unless identifier = @chunk[IDENTIFIER, 1] - # Keywords are special identifiers tagged with their own name, 'if' will result - # in an [:IF, "if"] token - tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER - @tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.' - token(tag, identifier) - @i += identifier.length - end - - def number_token - return false unless number = @chunk[NUMBER, 1] - token(:NUMBER, number) - @i += number.length - end - - def string_token - return false unless string = @chunk[STRING, 1] - escaped = string.gsub(MULTILINER) do |match| - @line += 1 - "\\\n" + def extract_next_token + return if identifier_token + return if number_token + return if string_token + return if js_token + return if regex_token + return if remove_comment + return if whitespace_token + return literal_token end - token(:STRING, escaped) - @i += string.length - end - def js_token - return false unless script = @chunk[JS, 1] - token(:JS, script.gsub(JS_CLEANER, '')) - @i += script.length - end - - def regex_token - return false unless regex = @chunk[REGEX, 1] - token(:REGEX, regex) - @i += regex.length - end - - def remove_comment - return false unless comment = @chunk[COMMENT, 1] - @i += comment.length - end - - # Ignore whitespace - def whitespace_token - return false unless whitespace = @chunk[WHITESPACE, 1] - @i += whitespace.length - end - - # We treat all other single characters as a token. Eg.: ( ) , . ! - # Multi-character operators are also literal tokens, so that Racc can assign - # the proper order of operations. Multiple newlines get merged. - def literal_token - value = @chunk[NEWLINE, 1] - if value - @line += value.length - token("\n", "\n") unless last_value == "\n" - return @i += value.length + # Matching if, print, method names, etc. + def identifier_token + return false unless identifier = @chunk[IDENTIFIER, 1] + # Keywords are special identifiers tagged with their own name, 'if' will result + # in an [:IF, "if"] token + tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER + @tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.' + token(tag, identifier) + @i += identifier.length end - value = @chunk[OPERATOR, 1] - tag_parameters if value && value.match(CODE) - value ||= @chunk[0,1] - skip_following_newlines if EXP_START.include?(value) - remove_leading_newlines if EXP_END.include?(value) - token(value, value) - @i += value.length - end - def token(tag, value) - @tokens << [tag, Value.new(value, @line)] - end - - def last_value - @tokens.last && @tokens.last[1] - end - - # The main source of ambiguity in our grammar was Parameter lists (as opposed - # to argument lists in method calls). Tag parameter identifiers to avoid this. - def tag_parameters - index = 0 - loop do - tok = @tokens[index -= 1] - next if tok[0] == ',' - return if tok[0] != :IDENTIFIER - tok[0] = :PARAM + def number_token + return false unless number = @chunk[NUMBER, 1] + token(:NUMBER, number) + @i += number.length end - end - def skip_following_newlines - newlines = @code[(@i+1)..-1][NEWLINE, 1] - if newlines - @line += newlines.length - @i += newlines.length + def string_token + return false unless string = @chunk[STRING, 1] + escaped = string.gsub(MULTILINER) do |match| + @line += 1 + "\\\n" + end + token(:STRING, escaped) + @i += string.length + end + + def js_token + return false unless script = @chunk[JS, 1] + token(:JS, script.gsub(JS_CLEANER, '')) + @i += script.length + end + + def regex_token + return false unless regex = @chunk[REGEX, 1] + token(:REGEX, regex) + @i += regex.length + end + + def remove_comment + return false unless comment = @chunk[COMMENT, 1] + @i += comment.length + end + + # Ignore whitespace + def whitespace_token + return false unless whitespace = @chunk[WHITESPACE, 1] + @i += whitespace.length + end + + # We treat all other single characters as a token. Eg.: ( ) , . ! + # Multi-character operators are also literal tokens, so that Racc can assign + # the proper order of operations. Multiple newlines get merged. + def literal_token + value = @chunk[NEWLINE, 1] + if value + @line += value.length + token("\n", "\n") unless last_value == "\n" + return @i += value.length + end + value = @chunk[OPERATOR, 1] + tag_parameters if value && value.match(CODE) + value ||= @chunk[0,1] + skip_following_newlines if EXP_START.include?(value) + remove_leading_newlines if EXP_END.include?(value) + token(value, value) + @i += value.length + end + + def token(tag, value) + @tokens << [tag, Value.new(value, @line)] + end + + def last_value + @tokens.last && @tokens.last[1] + end + + # The main source of ambiguity in our grammar was Parameter lists (as opposed + # to argument lists in method calls). Tag parameter identifiers to avoid this. + def tag_parameters + index = 0 + loop do + tok = @tokens[index -= 1] + next if tok[0] == ',' + return if tok[0] != :IDENTIFIER + tok[0] = :PARAM + end + end + + def skip_following_newlines + newlines = @code[(@i+1)..-1][NEWLINE, 1] + if newlines + @line += newlines.length + @i += newlines.length + end + end + + def remove_leading_newlines + @tokens.pop if last_value == "\n" end - end - def remove_leading_newlines - @tokens.pop if last_value == "\n" end end \ No newline at end of file diff --git a/lib/coffee_script/nodes.rb b/lib/coffee_script/nodes.rb index 6eb80900..05644abb 100644 --- a/lib/coffee_script/nodes.rb +++ b/lib/coffee_script/nodes.rb @@ -205,7 +205,7 @@ class AssignNode < Node name = @variable.compile(indent, scope) if @variable.respond_to?(:compile) last = @variable.respond_to?(:last) ? @variable.last : name opts = opts.merge({:assign => name, :last_assign => last}) - value = @value.compile(indent, scope, opts) + value = @value.compile(indent + TAB, scope, opts) return "#{@variable}: #{value}" if @context == :object return "#{name} = #{value}" if @variable.properties? defined = scope.find(name) diff --git a/test/lexer_test.rb b/test/lexer_test.rb deleted file mode 100644 index 7821295d..00000000 --- a/test/lexer_test.rb +++ /dev/null @@ -1,2 +0,0 @@ -require "lexer" -p Lexer.new.tokenize(File.read('code.cs')) diff --git a/test/parser_test.rb b/test/parser_test.rb deleted file mode 100644 index e9ede388..00000000 --- a/test/parser_test.rb +++ /dev/null @@ -1,19 +0,0 @@ -# Recompile the Parser. -# With debugging and verbose: -v -g -`racc -v -o parser.rb grammar.y` - -# Parse and print the compiled CoffeeScript source. -require "parser.rb" -js = Parser.new.parse(File.read('code.cs')).compile -puts "\n\n" -puts js - -# Pipe compiled JS through JSLint. -puts "\n\n" -require 'open3' -stdin, stdout, stderr = Open3.popen3('/Users/jashkenas/Library/Application\ Support/TextMate/Bundles/JavaScript\ Tools.tmbundle/Support/bin/jsl -nologo -stdin') -stdin.write(js) -stdin.close -puts stdout.read -stdout.close -stderr.close \ No newline at end of file