# File lib/html-cleaner.rb, line 117 def dodgy_uri?(uri) uri = uri.to_s # special case for poorly-formed entities (missing ';') # if these occur *anywhere* within the string, then throw it out. return true if (uri =~ /&\#(\d+|x[0-9a-f]+)[^;\d]/mi) # Try escaping as both HTML or URI encodings, and then trying # each scheme regexp on each [unescapeHTML(uri), CGI.unescape(uri)].each do |unesc_uri| DODGY_URI_SCHEMES.each do |scheme| regexp = "#{scheme}:".gsub(/./) do |char| "([\000-\037\177\s]*)#{char}" end # regexp looks something like # /\A([\000-\037\177\s]*)j([\000-\037\177\s]*)a([\000-\037\177\s]*)v([\000-\037\177\s]*)a([\000-\037\177\s]*)s([\000-\037\177\s]*)c([\000-\037\177\s]*)r([\000-\037\177\s]*)i([\000-\037\177\s]*)p([\000-\037\177\s]*)t([\000-\037\177\s]*):/mi return true if (unesc_uri =~ %r{\A#{regexp}}mi) end end nil end