def clean(str)
str = unescapeHTML(str)
doc = Hpricot(str, :fixup_tags => true)
doc = subtree(doc, :body)
tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq
remove_tags!(doc, tags - HTML_ELEMENTS)
remaining_tags = tags & HTML_ELEMENTS
(doc/remaining_tags.join(",")).each do |element|
next if element.raw_attributes.nil? || element.raw_attributes.empty?
element.raw_attributes.reject! do |attr,val|
!HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
end
element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
end unless remaining_tags.empty?
doc.traverse_text do |t|
t.swap(add_entities(t.to_html))
end
doc.to_s.gsub(/<\!--.*?-->/mi, '')
end