class Tsumamigui::Parser

Attributes

xpath[R]

xpath to getting the data from

Public Class Methods

new(xpath) click to toggle source

@param [Hash] xpath Name and xpath to it

# File lib/tsumamigui/parser.rb, line 19
def initialize(xpath)
  @xpath = xpath
end
parse(responses, xpath) click to toggle source

@param [Array<Tsumamigui::Response>] responses @param [Hash] xpath @return [Array<Hash>] parsed responses

# File lib/tsumamigui/parser.rb, line 13
def parse(responses, xpath)
  new(xpath).send(:parse, responses)
end

Private Instance Methods

extract(document) click to toggle source

Extract data from parsed html with xpath @param [Object] document Nokogiri::HTML::Document @return [Hash] xpath and its key @raise [Tsumamigui::ParserError]

# File lib/tsumamigui/parser.rb, line 44
def extract(document)
  @xpath.each_with_object({}) do |(key, value), hash|
    hash[key] = document.xpath(value).to_s
  end
rescue => e
  raise ParserError, e.message
end
parse(responses) click to toggle source

Parse response data into hash object @param [Array<Tsumamigui::Response>] responses @return [Array<Hash>] parsed responses @raise [Tsumamigui::ParserError]

# File lib/tsumamigui/parser.rb, line 29
def parse(responses)
  responses.map do |res|
    url, html, charset = res.to_array
    result = extract(Nokogiri::HTML.parse(html, nil, charset))
    result[:scraped_from] = url
    result
  end
rescue => e
  raise ParserError, e.message
end