module RemoteTable::ProcessedByNokogiri

Mixed in to process XML and XHTML.

Constants

SOFT_HYPHEN

Public Instance Methods

_each() { |values| ... } click to toggle source

Yield each row using Nokogiri.

# File lib/remote_table/processed_by_nokogiri.rb, line 12
def _each
  require 'nokogiri'
  require 'cgi'
  
  # save this to a local var because we modify it in the loop
  current_headers = headers

  unless row_css or row_xpath
    raise ::ArgumentError, "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML"
  end
  
  xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, RemoteTable::EXTERNAL_ENCODING)
  (row_css ? xml.css(row_css) : xml.xpath(row_xpath)).each do |row|
    some_value_present = false
    values = if column_css
      row.css column_css
    elsif column_xpath
      row.xpath column_xpath
    else
      [row]
    end.map do |cell|
      memo = cell.content.dup
      memo = assume_utf8 memo
      memo = RemoteTable.normalize_whitespace memo
      if not some_value_present and not keep_blank_rows and memo.present?
        some_value_present = true
      end
      memo
    end
    if current_headers == :first_row
      current_headers = values.select(&:present?)
      next
    end
    if keep_blank_rows or some_value_present
      if not headers
        yield values
      else
        yield zip(current_headers, values)
      end
    end
  end
ensure
  local_copy.cleanup
end
preprocess!() click to toggle source
# File lib/remote_table/processed_by_nokogiri.rb, line 6
def preprocess!
  delete_harmful!
  transliterate_whole_file_to_utf8!
end

Private Instance Methods

unescaped_xml_without_soft_hyphens() click to toggle source

should we be doing this in ruby?

# File lib/remote_table/processed_by_nokogiri.rb, line 67
def unescaped_xml_without_soft_hyphens
  str = ::CGI.unescapeHTML local_copy.encoded_io.read
  local_copy.encoded_io.rewind
  # get rid of MS Office baddies
  str.gsub! SOFT_HYPHEN, ''
  str
end
zip(keys, values) click to toggle source

snippets.dzone.com/posts/show/406

# File lib/remote_table/processed_by_nokogiri.rb, line 60
def zip(keys, values)
  hash = ::ActiveSupport::OrderedHash.new
  keys.zip(values) { |k,v| hash[k]=v }
  hash
end