From 71eb248ffea488987ac23ce8bc5ae889cfeebbdb Mon Sep 17 00:00:00 2001 From: Jeffrey Hardy Date: Wed, 22 Oct 2008 16:03:21 -0400 Subject: [PATCH] Fix that HTML::Node.parse would blow up on unclosed CDATA sections. If an unclosed CDATA section is encountered and parsing is strict, an exception will be raised. Otherwise, we consider the remainder of the line to be the section contents. This is consistent with HTML::Tokenizer#scan_tag. --- .../vendor/html-scanner/html/node.rb | 9 +++++++- .../test/controller/html-scanner/node_test.rb | 21 ++++++++++++++++++++ .../test/controller/html-scanner/sanitizer_test.rb | 10 +++++++++ 3 files changed, 39 insertions(+), 1 deletions(-) diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb index a277ddb..6c03316 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb @@ -150,7 +150,14 @@ module HTML #:nodoc: end if scanner.skip(/!\[CDATA\[/) - scanner.scan_until(/\]\]>/) + unless scanner.skip_until(/\]\]>/) + if strict + raise "expected ]]> (got #{scanner.rest.inspect} for #{content})" + else + scanner.skip_until(/\Z/) + end + end + return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/contents]]>" + node = nil + assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } + assert_kind_of HTML::CDATA, node + assert_equal 'contents', node.content + end + + def test_parse_strict_with_unterminated_cdata_section + s = "This is a test.\n\n\n\n

It no longer contains any HTML.

\n})) assert_equal "This has a here.", sanitizer.sanitize("This has a here.") + assert_equal "This has a here.", sanitizer.sanitize("This has a ]]> here.") + assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed ]] here...") [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) } end @@ -243,6 +245,14 @@ class SanitizerTest < Test::Unit::TestCase assert_sanitized %(), '' end + def test_should_sanitize_cdata_section + assert_sanitized "section]]>", "<![CDATA[<span>section</span>]]>" + end + + def test_should_sanitize_unterminated_cdata_section + assert_sanitized "neverending...", "<![CDATA[<span>neverending...]]>" + end + protected def assert_sanitized(input, expected = nil) @sanitizer ||= HTML::WhiteListSanitizer.new -- 1.5.5.1