module Acunetix::Cleanup
Private Instance Methods
cleanup_decimals(source)
click to toggle source
Replace periods for commas as decimals
# File lib/acunetix/concerns/cleanup.rb, line 41 def cleanup_decimals(source) result = source.dup result.gsub!(/([0-9])\,([0-9])/, '\1.\2') result end
cleanup_html(source)
click to toggle source
Convert HTML in the text to Textile format
# File lib/acunetix/concerns/cleanup.rb, line 6 def cleanup_html(source) result = source.dup format_table(result) result.gsub!(/"/, '"') result.gsub!(/&/, '&') result.gsub!(/</, '<') result.gsub!(/>/, '>') result.gsub!(/<b>(.*?)<\/b>/) { "*#{$1.strip}*" } result.gsub!(/<br\/>/, "\n") result.gsub!(/<div(.*?)>|<\/div>/, '') result.gsub!(/<a.*?>(.*?)<\/a>/m, '\1') result.gsub!(/<font.*?>(.*?)<\/font>/m, '\1') result.gsub!(/<h2>(.*?)<\/h2>/) { "*#{$1.strip}*" } result.gsub!(/<i>(.*?)<\/i>/, '\1') result.gsub!(/<p.*?>(.*?)<\/p>/) { "p. #{$1.strip}\n" } result.gsub!(/<code><pre.*?>(.*?)<\/pre><\/code>/m){|m| "\n\nbc.. #{$1.strip}\n\np. \n" } result.gsub!(/<code>(.*?)<\/code>/) { "\n\nbc. #{$1.strip}\n\n" } result.gsub!(/<pre.*?>(.*?)<\/pre>/m){|m| "\n\nbc.. #{$1.strip}\n\np. \n" } result.gsub!(/<li.*?>([\s\S]*?)<\/li>/m){"\n* #{$1.strip}"} result.gsub!(/<ul>([\s\S]*?)<\/ul>/m){ "#{$1.strip}\n" } result.gsub!(/(<ul>)|(<\/ul>|(<ol>)|(<\/ol>))/, "\n") result.gsub!(/<li>/, "\n* ") result.gsub!(/<\/li>/, "\n") result.gsub!(/<strong>(.*?)<\/strong>/) { "*#{$1.strip}*" } result.gsub!(/<span.*?>(.*?)<\/span>/m){"#{$1.strip}\n"} result end
format_table(str)
click to toggle source
# File lib/acunetix/concerns/cleanup.rb, line 47 def format_table(str) return unless str.include?('</table>') str.gsub!(/<table.*?>[\s\S]*<\/table>/) do |table| rows = [''] table.scan(/<tr>[\s\S]*?<\/tr>/).each do |tr| row = '|' tr.scan(/<td.*?>[\s\S]*?<\/td>/).each do |data| header = rows.empty? ? '_. ' : '' row << "#{header}#{data.gsub(/<td.*?>|<\/td>/, '')}|" end rows << row end rows.join("\n") end end