Updated vendor copy of html-scanner lib, for bug fixes and optimizations

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1416 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
2026-02-15 16:45:08 -05:00 · 2005-06-14 10:30:36 +00:00
parent bca13f727e
commit c23b2a4ad3
5 changed files with 28 additions and 19 deletions
--- a/actionpack/CHANGELOG
+++ b/actionpack/CHANGELOG
@@ -1,5 +1,7 @@
 *SVN*

+* Updated vendor copy of html-scanner lib to 0.5.1, for bug fixes and optimizations
+
 * Changed test requests to come from 0.0.0.0 instead of 127.0.0.1 such that they don't trigger debugging screens on exceptions, but instead call rescue_action_in_public

 * Modernize scaffolding to match the generator: use the new render method and change style from the warty @params["id"] to the sleek params[:id].  #1367
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
@@ -1,7 +1,7 @@
 require 'html/tokenizer'
 require 'html/node'

-module HTML#:nodoc:
+module HTML #:nodoc:
  
  # A top-level HTMl document. You give it a body of text, and it will parse that
  # text into a tree of nodes.
@@ -11,7 +11,7 @@ module HTML#:nodoc:
    attr_reader :root

    # Create a new Document from the given text.
-    def initialize(text)
+    def initialize(text, strict=false)
      tokenizer = Tokenizer.new(text)
      @root = Node.new(nil)
      node_stack = [ @root ]
@@ -28,7 +28,7 @@ module HTML#:nodoc:
              open_start = 0 if open_start < 0
              close_start = node.position - 20
              close_start = 0 if close_start < 0
-              warn <<EOF.strip
+              msg = <<EOF.strip
 ignoring attempt to close #{node_stack.last.name} with #{node.name}
  opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
  closed at byte #{node.position}, line #{node.line}
@@ -36,6 +36,7 @@ ignoring attempt to close #{node_stack.last.name} with #{node.name}
  text around open: #{text[open_start,40].inspect}
  text around close: #{text[close_start,40].inspect}
 EOF
+              strict ? raise(msg) : warn(msg)
            end
          elsif node.closing != :close
            node_stack.push node
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
@@ -1,8 +1,8 @@
 require 'strscan'

-module HTML#:nodoc:
+module HTML #:nodoc:
  
-  class Conditions < Hash#:nodoc:
+  class Conditions < Hash #:nodoc:
    def initialize(hash)
      super()
      hash = { :content => hash } unless Hash === hash
@@ -54,7 +54,7 @@ module HTML#:nodoc:
  end

  # The base class of all nodes, textual and otherwise, in an HTML document.
-  class Node#:nodoc:
+  class Node #:nodoc:
    # The array of children of this node. Not all nodes have children.
    attr_reader :children
    
@@ -91,6 +91,8 @@ module HTML#:nodoc:
    # Search the children of this node for the first node for which #find
    # returns non +nil+. Returns the result of the #find call that succeeded.
    def find(conditions)
+      conditions = validate_conditions(conditions)
+
      @children.each do |child|        
        node = child.find(conditions)
        return node if node
@@ -101,6 +103,8 @@ module HTML#:nodoc:
    # Search for all nodes that match the given conditions, and return them
    # as an array.
    def find_all(conditions)
+      conditions = validate_conditions(conditions)
+
      matches = []
      matches << self if match(conditions)
      @children.each do |child|
@@ -183,7 +187,7 @@ module HTML#:nodoc:
  end

  # A node that represents text, rather than markup.
-  class Text < Node#:nodoc:
+  class Text < Node #:nodoc:
    
    attr_reader :content
    
@@ -239,7 +243,7 @@ module HTML#:nodoc:
  # A Tag is any node that represents markup. It may be an opening tag, a
  # closing tag, or a self-closing tag. It has a name, and may have a hash of
  # attributes.
-  class Tag < Node#:nodoc:
+  class Tag < Node #:nodoc:
    
    # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
    attr_reader :closing
@@ -268,7 +272,9 @@ module HTML#:nodoc:

    # Returns non-+nil+ if this tag can contain child nodes.
    def childless?
-      @name =~ /^(img|br|hr|link|meta|area|base|basefont|col|frame|input|isindex|param)$/o
+      !@closing.nil? ||
+        @name =~ /^(img|br|hr|link|meta|area|base|basefont|
+                    col|frame|input|isindex|param)$/ox
    end

    # Returns a textual representation of the node
@@ -284,6 +290,7 @@ module HTML#:nodoc:
        s << " /" if @closing == :self
        s << ">"
        @children.each { |child| s << child.to_s }
+        s << "</#{@name}>" if @closing != :self && !@children.empty?
        s
      end
    end
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
@@ -1,6 +1,6 @@
 require 'strscan'

-module HTML#:nodoc:
+module HTML #:nodoc:
  
  # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
  # token is a string. Each string represents either "text", or an HTML element.
@@ -13,7 +13,7 @@ module HTML#:nodoc:
  #   while token = tokenizer.next
  #     p token
  #   end
-  class Tokenizer#:nodoc:
+  class Tokenizer #:nodoc:
    
    # The current (byte) position in the text
    attr_reader :position
@@ -51,7 +51,7 @@ module HTML#:nodoc:
        tag = @scanner.getch
        if @scanner.scan(/!--/) # comment
          tag << @scanner.matched
-          tag << @scanner.scan_until(/--\s*>/)
+          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
        elsif @scanner.scan(/!/) # doctype
          tag << @scanner.matched
          tag << consume_quoted_regions
@@ -63,14 +63,13 @@ module HTML#:nodoc:

      # Scan all text up to the next < character and return it.
      def scan_text
-        @scanner.getch + (@scanner.scan(/[^<]*/) || "")
+        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
      end
      
      # Counts the number of newlines in the text and updates the current line
      # accordingly.
      def update_current_line(text)
-        @current_line += text.scan(/\r\n|\r|\n/).length
-        text
+        text.scan(/\r?\n/) { @current_line += 1 }
      end
      
      # Skips over quoted strings, so that less-than and greater-than characters
@@ -89,7 +88,7 @@ module HTML#:nodoc:
          text << match
          break if delim == "<" || delim == ">"

-          # consume the conqued region
+          # consume the quoted region
          while match = @scanner.scan_until(/[\\#{delim}]/)
            text << match
            break if @scanner.matched == delim
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
@@ -1,9 +1,9 @@
-module HTML#:nodoc:
-  module Version#:nodoc:
+module HTML #:nodoc:
+  module Version #:nodoc:

    MAJOR = 0
    MINOR = 5
-    TINY  = 0
+    TINY  = 1

    STRING = [ MAJOR, MINOR, TINY ].join(".")