Added TextHelper#sanitize that can will remove any Javascript handlers, blocks, and forms from an input of HTML. This allows for use of HTML on public sites, but still be free of XSS issues. #1277 [Jamis Buck]

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1298 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
This commit is contained in:
David Heinemeier Hansson
2005-05-09 11:24:18 +00:00
parent b167248b21
commit 45780be2a7
3 changed files with 81 additions and 0 deletions

View File

@@ -1,5 +1,7 @@
*SVN*
* Added TextHelper#sanitize that can will remove any Javascript handlers, blocks, and forms from an input of HTML. This allows for use of HTML on public sites, but still be free of XSS issues. #1277 [Jamis Buck]
* Fixed the HTML scanner used by assert_tag where a infinite loop could be caused by a stray less-than sign in the input #1270 [Jamis Buck]
* Added functionality to assert_tag, so you can now do tests on the siblings of a node, to assert that some element comes before or after the element in question, or just to assert that some element exists as a sibling #1226 [Jamis Buck]

View File

@@ -128,6 +128,61 @@ module ActionView
def strip_links(text)
text.gsub(/<a.*>(.*)<\/a>/m, '\1')
end
# Try to require the html-scanner library
begin
require 'html/tokenizer'
require 'html/node'
rescue LoadError
# if there isn't a copy installed, use the vendor version in
# action controller
$:.unshift File.join(File.dirname(__FILE__), "..", "..",
"action_controller", "vendor", "html-scanner")
require 'html/tokenizer'
require 'html/node'
end
VERBOTEN_TAGS = %w(form script) unless defined?(VERBOTEN_TAGS)
VERBOTEN_ATTRS = /^on/i unless defined?(VERBOTEN_ATTRS)
# Sanitizes the given HTML by making form and script tags into regular
# text, and removing all "onxxx" attributes (so that arbitrary Javascript
# cannot be executed). Also removes href attributes that start with
# "javascript:".
#
# Returns the sanitized text.
def sanitize(html)
# only do this if absolutely necessary
if html.index("<")
tokenizer = HTML::Tokenizer.new(html)
new_text = ""
while token = tokenizer.next
node = HTML::Node.parse(nil, 0, 0, token, false)
new_text << case node
when HTML::Tag
if VERBOTEN_TAGS.include?(node.name)
node.to_s.gsub(/</, "&lt;")
else
if node.closing != :close
node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
if node.attributes["href"] =~ /^javascript:/i
node.attributes.delete "href"
end
end
node.to_s
end
else
node.to_s.gsub(/</, "&lt;")
end
end
html = new_text
end
html
end
private
# Returns a version of the text that's safe to use in a regular expression without triggering engine features.

View File

@@ -86,5 +86,29 @@ class TextHelperTest < Test::Unit::TestCase
assert_equal %(<p>Link #{link2_result}</p>), auto_link("<p>Link #{link2_raw}</p>")
assert_equal %(<p>#{link2_result} Link</p>), auto_link("<p>#{link2_raw} Link</p>")
end
def test_sanitize_form
raw = "<form action=\"/foo/bar\" method=\"post\"><input></form>"
result = sanitize(raw)
assert_equal "&lt;form action='/foo/bar' method='post'><input>&lt;/form>", result
end
def test_sanitize_script
raw = "<script language=\"Javascript\">blah blah blah</script>"
result = sanitize(raw)
assert_equal "&lt;script language='Javascript'>blah blah blah&lt;/script>", result
end
def test_sanitize_js_handlers
raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
result = sanitize(raw)
assert_equal %{onthis="do that" <a name='foo' href='#'>hello</a>}, result
end
def test_sanitize_javascript_href
raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
result = sanitize(raw)
assert_equal %{href="javascript:bang" <a name='hello'>foo</a>, <span>bar</span>}, result
end
end