139: def convert_misc_characters
140: dummy = dup.gsub(/\.{3,}/, " dot dot dot ")
141:
142: {
143: /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
144: /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
145: }.each do |found, replaced|
146: replaced = " #{replaced} " unless replaced =~ /\\1/
147: dummy.gsub!(found, replaced)
148: end
149:
150: {
151: /\s*&\s*/ => "and",
152: /\s*#/ => "number",
153: /\s*@\s*/ => "at",
154: /(\S|^)\.(\S)/ => '\1 dot \2',
155: /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars',
156: /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
157: /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
158: /\s*\*\s*/ => "star",
159: /\s*%\s*/ => "percent",
160: /\s*(\\|\/)\s*/ => "slash",
161: }.each do |found, replaced|
162: replaced = " #{replaced} " unless replaced =~ /\\1/
163: dummy.gsub!(found, replaced)
164: end
165: dummy = dummy.gsub(/(^|\w)'(\w|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'"_]/, " ")
166: end