class ScrapKit::Recipe

Attributes

browser[RW]
user_agent[RW]

Public Class Methods

load(source) click to toggle source
# File lib/scrap_kit/recipe.rb, line 10
def load(source)
  input = if source.is_a?(Hash)
    source
  elsif source.is_a?(IO)
    JSON.parse(source.read)
  else
    JSON.parse(File.read(source))
  end

  new(input.deep_symbolize_keys)
end
new(url: nil, steps: [], attributes: {}) click to toggle source
# File lib/scrap_kit/recipe.rb, line 23
def initialize(url: nil, steps: [], attributes: {})
  @url = url
  @steps = steps
  @attributes = attributes
end

Public Instance Methods

elements_from_selector(browser_or_element, selector) click to toggle source
# File lib/scrap_kit/recipe.rb, line 67
def elements_from_selector(browser_or_element, selector)
  if selector.is_a?(String)
    browser_or_element.elements(css: selector)
  elsif selector.is_a?(Hash)
    browser_or_element.elements(selector)
  elsif selector.is_a?(Array)
    *remainder, condition = selector
    condition_key, condition_value = condition.first
    elements = browser_or_element

    if remainder.empty?
      elements = elements.elements(css: condition_key.to_s)
    else
      remainder.each do |item|
        elements = elements.elements(css: item)
      end
    end

    elements.filter do |element|
      found_element = element.element(css: condition_key.to_s)
      extracted_value = extract_value_from_element(found_element)
      extracted_value.match(condition_value) || extracted_value == condition_value
    end
  end
end
extract_attribute(browser_or_element, selector_or_object) click to toggle source
# File lib/scrap_kit/recipe.rb, line 107
def extract_attribute(browser_or_element, selector_or_object)
  if selector_or_object.is_a?(String)
    extract_value_from_element(browser_or_element.element(css: selector_or_object))
  elsif selector_or_object.is_a?(Array)
    found_elements = elements_from_selector(browser_or_element, selector_or_object)

    if found_elements.size === 1
      extract_value_from_element(found_elements.first)
    else
      found_elements.map do |element|
        extract_value_from_element(element)
      end
    end
  elsif selector_or_object.is_a?(Hash)
    if selector_or_object[:selector] && selector_or_object[:children_attributes]
      selector = selector_or_object[:selector]
      selector_for_children_attributes = selector_or_object[:children_attributes]

      elements_from_selector(browser_or_element, selector).map do |element|
        output = {}

        selector_for_children_attributes.each do |child_attribute_name, child_selector|
          output[child_attribute_name] = extract_attribute(element, child_selector)
        end

        output
      end
    elsif selector_or_object[:javascript]
      @browser.execute_script(selector_or_object[:javascript])
    else
      found_elements = elements_from_selector(browser_or_element, selector_or_object)

      if found_elements.size === 1
        extract_value_from_element(found_elements.first)
      else
        found_elements.map do |element|
          extract_value_from_element(element)
        end
      end
    end
  end
rescue
  nil
end
extract_value_from_element(element) click to toggle source
# File lib/scrap_kit/recipe.rb, line 93
def extract_value_from_element(element)
  return nil unless element.exists?

  if element&.respond_to?(:tag_name)
    if element.tag_name.downcase == "input"
      return element.attribute_value(:value)
    elsif element.tag_name.downcase == "img"
      return element.attribute_value(:src)
    end
  end

  element&.text_content
end
find_element_by_name_or_selector(browser_or_element, name_or_selector) click to toggle source
# File lib/scrap_kit/recipe.rb, line 57
def find_element_by_name_or_selector(browser_or_element, name_or_selector)
  element = browser_or_element.element(name: name_or_selector.to_s)
  return element if element.exists?

  element = browser_or_element.element(css: name_or_selector.to_s)
  return element if element.exists?

  nil
end
run() click to toggle source
# File lib/scrap_kit/recipe.rb, line 29
def run
  output = {}

  @browser = create_browser
  @browser.goto @url

  @steps.each do |step|
    run_step(step)
  end

  @attributes.each do |attribute_name, selector|
    output[attribute_name] = extract_attribute(@browser, selector)
  end

  @browser.close
  @browser = nil

  output
end
run_step(step) click to toggle source
# File lib/scrap_kit/recipe.rb, line 49
def run_step(step)
  return goto(step[:goto]) if step[:goto]
  return click(step[:click]) if step[:click]
  return fill_form(step[:fill_form]) if step[:fill_form]

  nil
end

Private Instance Methods

click(selector) click to toggle source
# File lib/scrap_kit/recipe.rb, line 171
def click(selector)
  if selector.is_a?(Array) || selector.is_a?(Hash)
    if found_element = elements_from_selector(@browser, selector).first
      found_element.click
    end
  end

  sleep 1
  @browser.wait_until do
    @browser.ready_state == "complete"
  end

rescue
  nil
end
create_browser() click to toggle source
# File lib/scrap_kit/recipe.rb, line 206
def create_browser
  options = Selenium::WebDriver::Chrome::Options.new

  options.add_argument "--headless"
  options.add_argument "--window-size=1080x720"
  options.add_argument "--hide-scrollbars"
  options.add_argument "--user-agent=#{@user_agent}" if @user_agent

  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
    options.add_argument "--no-sandbox"
    options.add_argument "--disable-dev-shm-usage"
    options.binary = chrome_bin
  end

  new_browser = Watir::Browser.new(:chrome, options: options)
  new_browser.driver.manage.timeouts.page_load = 120

  new_browser
end
fill_form(form_data) click to toggle source
# File lib/scrap_kit/recipe.rb, line 187
def fill_form(form_data)
  form_data.each do |name, value|
    if element = find_element_by_name_or_selector(@browser.body, name.to_s)
      element = element.to_subtype

      if element.respond_to?(:set)
        element.set(value)
      elsif element.respond_to?(:select)
        element.select(value)
      end
    end
  end

  sleep 1
  @browser.wait_until do
    @browser.ready_state == "complete"
  end
end
goto(link_or_selector) click to toggle source
# File lib/scrap_kit/recipe.rb, line 154
def goto(link_or_selector)
  if link_or_selector.is_a?(String)
    @browser.goto(link_or_selector)
  elsif link_or_selector.is_a?(Array) || link_or_selector.is_a?(Hash)
    if found_element = elements_from_selector(@browser, link_or_selector).first
      found_element.click
    end
  end

  sleep 0.5
  @browser.wait_until do
    @browser.ready_state == "complete"
  end
rescue
  nil
end