first draft of parsing and printing along comments -- unfortunately, not yet working within objects and arrays

2026-02-19 03:44:23 -05:00 · 2009-12-22 11:27:19 -05:00
parent d45643c527
commit 65809d08f6
8 changed files with 61 additions and 23 deletions
--- a/lib/coffee_script/grammar.y
+++ b/lib/coffee_script/grammar.y
@@ -13,6 +13,7 @@ token SWITCH CASE
 token EXTENDS SUPER
 token DELETE
 token NEWLINE
 token COMMENT
 token JS
 # Declare order of operations.
@@ -81,6 +82,7 @@ rule
  | While
  | For
  | Switch
  | Comment
  ;
  # All tokens that can terminate an expression.
@@ -123,6 +125,11 @@ rule
    RETURN Expression                 { result = ReturnNode.new(val[1]) }
  ;
  # A comment.
  Comment:
    COMMENT                           { result = CommentNode.new(val[0]) }
  ;
  # Arithmetic and logical operators
  # For Ruby's Operator precedence, see:
  # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
--- a/lib/coffee_script/lexer.rb
+++ b/lib/coffee_script/lexer.rb
@@ -24,15 +24,15 @@ module CoffeeScript
    JS         = /\A(`(.*?)`)/
    OPERATOR   = /\A([+\*&|\/\-%=<>]+)/
    WHITESPACE = /\A([ \t\r]+)/
-    NEWLINE    = /\A([\r\n]+)/
+    NEWLINE    = /\A(\n+)/
-    COMMENT    = /\A(#[^\r\n]*)/
+    COMMENT    = /\A((#[^\n]*\s*)+)/m
    CODE       = /\A(=>)/
    REGEX      = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
    # Token cleaning regexes.
    JS_CLEANER = /(\A`|`\Z)/
-    MULTILINER = /[\r\n]/
+    MULTILINER = /\n/
-    COMMENT_CLEANER = /^\s*#\s*/
+    COMMENT_CLEANER = /^\s*#/
    # Tokens that always constitute the start of an expression.
    EXP_START  = ['{', '(', '[']
@@ -61,7 +61,7 @@ module CoffeeScript
      return if string_token
      return if js_token
      return if regex_token
-      return if remove_comment
+      return if comment_token
      return if whitespace_token
      return    literal_token
    end
@@ -110,10 +110,10 @@ module CoffeeScript
    end
    # Matches and consumes comments.
-    def remove_comment
+    def comment_token
      return false unless comment = @chunk[COMMENT, 1]
-      cleaned = comment.gsub(COMMENT_CLEANER, '')
+      token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
-      @prev_comment ? @prev_comment << cleaned : @prev_comment = [cleaned]
+      token("\n", "\n")
      @i += comment.length
    end
@@ -145,9 +145,7 @@ module CoffeeScript
    # Add a token to the results, taking note of the line number, and
    # immediately-preceding comment.
    def token(tag, value)
-      comment = @prev_comment
+      @tokens << [tag, Value.new(value, @line)]
      @prev_comment = nil
      @tokens << [tag, Value.new(value, @line, comment)]
    end
    # Peek at the previous token.
--- a/lib/coffee_script/nodes.rb
+++ b/lib/coffee_script/nodes.rb
@@ -46,6 +46,8 @@ module CoffeeScript
    statement
    attr_reader :expressions
    STRIP_TRAILING_WHITESPACE = /\s+$/
    # Wrap up a node as an Expressions, unless it already is.
    def self.wrap(node)
      node.is_a?(Expressions) ? node : Expressions.new([node])
@@ -66,10 +68,17 @@ module CoffeeScript
      @expressions.length == 1 ? @expressions.first : self
    end
    # Is the node last in this block of expressions.
    def last?(node)
      @last_index ||= @expressions.last.is_a?(CommentNode) ? -2 : -1
      node == @expressions[@last_index]
    end
    # If this is the top-level Expressions, wrap everything in a safety closure.
    def root_compile
-      options = {:indent => TAB, :scope => Scope.new}
+      code = compile(:indent => TAB, :scope => Scope.new)
-      "(function(){\n#{compile(options)}\n})();"
+      code.gsub!(STRIP_TRAILING_WHITESPACE, '')
      "(function(){\n#{code}\n})();"
    end
    # The extra fancy is to handle pushing down returns and assignments
@@ -78,7 +87,7 @@ module CoffeeScript
      return root_compile unless options[:scope]
      code = @expressions.map { |node|
        o = super(options)
-        if node == @expressions.last && (o[:return] || o[:assign])
+        if last?(node) && (o[:return] || o[:assign])
          if o[:return]
            if node.statement? || node.custom_return?
              "#{o[:indent]}#{node.compile(o)}#{node.line_ending}"
@@ -149,6 +158,27 @@ module CoffeeScript
    end
  end
  # Pass through CoffeeScript comments into JavaScript comments at the
  # same position.
  class CommentNode < Node
    statement
    def initialize(lines)
      @lines = lines.value
    end
    def line_ending
      ''
    end
    def compile(o={})
      delimiter = "\n#{o[:indent]}//"
      comment   = "#{delimiter}#{@lines.join(delimiter)}"
      write(comment)
    end
  end
  # Node for a function invocation. Takes care of converting super() calls into
  # calls against the prototype's function of the same name.
  class CallNode < Node
--- a/lib/coffee_script/value.rb
+++ b/lib/coffee_script/value.rb
@@ -3,10 +3,10 @@ module CoffeeScript
  # Instead of producing raw Ruby objects, the Lexer produces values of this
  # class, wrapping native objects tagged with line number information.
  class Value
-    attr_reader :line, :comment
+    attr_reader :value, :line
-    def initialize(value, line, comment=nil)
+    def initialize(value, line)
-      @value, @line, @comment = value, line, comment
+      @value, @line = value, line
    end
    def to_str
--- a/test/fixtures/each.js
+++ b/test/fixtures/each.js
@@ -1,4 +1,7 @@
 (function(){
  // The cornerstone, an each implementation.
  // Handles objects implementing forEach, arrays, and raw objects.
  _.each = function(obj, iterator, context) {
    var index = 0;
    try {
--- a/test/fixtures/each.tokens
+++ b/test/fixtures/each.tokens
@@ -1 +1 @@
-[["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [":", ":"], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], ["=>", "=>"], ["\n", "\n"], [:IDENTIFIER, "index"], [":", ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], ["\n", "\n"], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["\n", "\n"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], ["\n", "\n"], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [".", "."], ["\n", "\n"], [:ELSE, "else"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [".", "."], [".", "."], ["\n", "\n"], [:CATCH, "catch"], [:IDENTIFIER, "e"], ["\n", "\n"], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:AINT, "aint"], [:IDENTIFIER, "breaker"], [".", "."], ["\n", "\n"], [:IDENTIFIER, "obj"], [".", "."]]
+[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [":", ":"], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], ["=>", "=>"], ["\n", "\n"], [:IDENTIFIER, "index"], [":", ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], ["\n", "\n"], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["\n", "\n"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], ["\n", "\n"], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [".", "."], ["\n", "\n"], [:ELSE, "else"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [".", "."], [".", "."], ["\n", "\n"], [:CATCH, "catch"], [:IDENTIFIER, "e"], ["\n", "\n"], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:AINT, "aint"], [:IDENTIFIER, "breaker"], [".", "."], ["\n", "\n"], [:IDENTIFIER, "obj"], [".", "."]]
--- a/test/unit/test_lexer.rb
+++ b/test/unit/test_lexer.rb
@@ -37,8 +37,9 @@ class LexerTest < Test::Unit::TestCase
  def test_lexing_comment
    code = "a: 1\n # comment\n # on two lines\nb: 2"
-    token = @lex.tokenize(code).detect {|t| t[1].comment }
+    assert @lex.tokenize(code) == [[:IDENTIFIER, "a"], [":", ":"], [:NUMBER, "1"],
-    assert token[1].comment == ['comment', 'on two lines']
+    ["\n", "\n"], [:COMMENT, [" comment", " on two lines"]], ["\n", "\n"],
    [:IDENTIFIER, "b"], [":", ":"], [:NUMBER, "2"]]
  end
  def test_lexing
--- a/test/unit/test_parser.rb
+++ b/test/unit/test_parser.rb
@@ -54,13 +54,12 @@ class ParserTest < Test::Unit::TestCase
  def test_parsing_comment
    nodes = @par.parse("a: 1\n # comment\nb: 2").expressions
-    # Comments are being passed through to the raw values,
+    assert nodes[1].is_a? CommentNode
    # but are not yet properly exposed within the nodes.
  end
  def test_parsing
    nodes = @par.parse(File.read('test/fixtures/each.cs'))
-    assign = nodes.expressions.first
+    assign = nodes.expressions[1]
    assert assign.is_a? AssignNode
    assert assign.variable.literal == '_'
    assert assign.value.is_a? CodeNode
		`@@ -1 +1 @@`
			[["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [":", ":"], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], ["=>", "=>"], ["\n", "\n"], [:IDENTIFIER, "index"], [":", ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], ["\n", "\n"], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["\n", "\n"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], ["\n", "\n"], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [".", "."], ["\n", "\n"], [:ELSE, "else"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [".", "."], [".", "."], ["\n", "\n"], [:CATCH, "catch"], [:IDENTIFIER, "e"], ["\n", "\n"], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:AINT, "aint"], [:IDENTIFIER, "breaker"], [".", "."], ["\n", "\n"], [:IDENTIFIER, "obj"], [".", "."]]				[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [":", ":"], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], ["=>", "=>"], ["\n", "\n"], [:IDENTIFIER, "index"], [":", ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], ["\n", "\n"], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["\n", "\n"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], ["\n", "\n"], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [".", "."], ["\n", "\n"], [:ELSE, "else"], ["\n", "\n"], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [".", "."], [".", "."], ["\n", "\n"], [:CATCH, "catch"], [:IDENTIFIER, "e"], ["\n", "\n"], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:AINT, "aint"], [:IDENTIFIER, "breaker"], [".", "."], ["\n", "\n"], [:IDENTIFIER, "obj"], [".", "."]]