module RemoteTable::ProcessedByNokogiri
Mixed in to process XML and XHTML.
Constants
- SOFT_HYPHEN
Public Instance Methods
_each() { |values| ... }
click to toggle source
Yield each row using Nokogiri.
# File lib/remote_table/processed_by_nokogiri.rb, line 12 def _each require 'nokogiri' require 'cgi' # save this to a local var because we modify it in the loop current_headers = headers unless row_css or row_xpath raise ::ArgumentError, "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" end xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, RemoteTable::EXTERNAL_ENCODING) (row_css ? xml.css(row_css) : xml.xpath(row_xpath)).each do |row| some_value_present = false values = if column_css row.css column_css elsif column_xpath row.xpath column_xpath else [row] end.map do |cell| memo = cell.content.dup memo = assume_utf8 memo memo = RemoteTable.normalize_whitespace memo if not some_value_present and not keep_blank_rows and memo.present? some_value_present = true end memo end if current_headers == :first_row current_headers = values.select(&:present?) next end if keep_blank_rows or some_value_present if not headers yield values else yield zip(current_headers, values) end end end ensure local_copy.cleanup end
preprocess!()
click to toggle source
# File lib/remote_table/processed_by_nokogiri.rb, line 6 def preprocess! delete_harmful! transliterate_whole_file_to_utf8! end
Private Instance Methods
unescaped_xml_without_soft_hyphens()
click to toggle source
should we be doing this in ruby?
# File lib/remote_table/processed_by_nokogiri.rb, line 67 def unescaped_xml_without_soft_hyphens str = ::CGI.unescapeHTML local_copy.encoded_io.read local_copy.encoded_io.rewind # get rid of MS Office baddies str.gsub! SOFT_HYPHEN, '' str end
zip(keys, values)
click to toggle source
snippets.dzone.com/posts/show/406
# File lib/remote_table/processed_by_nokogiri.rb, line 60 def zip(keys, values) hash = ::ActiveSupport::OrderedHash.new keys.zip(values) { |k,v| hash[k]=v } hash end