class CricosScrape::InstitutionImporter

Constants

INSTITUTION_URL

Attributes

agent[R]
page[R]
provider_id[R]

Public Class Methods

new(agent, **params) click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 10
def initialize(agent, **params)
  @agent = agent
  @provider_id = params.fetch(:provider_id)
  @page = agent.get(url)
end

Public Instance Methods

run() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 16
def run
  return if institution_not_found?

  institution                  = Institution.new
  institution.provider_id      = provider_id
  institution.provider_code    = find_provider_code
  institution.trading_name     = find_trading_name
  institution.name             = find_name
  institution.type             = find_type
  institution.total_capacity   = find_total_capacity
  institution.website          = find_website
  institution.postal_address   = find_postal_address
  institution.locations        = find_location if location_found?
  institution.contact_officers = find_contact_officers

  institution
end

Private Instance Methods

contains_contact_details_grid?() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 273
def contains_contact_details_grid?
  contact_officer_area_css_id = @contact_officer_area.attributes['id'].text
  @page.search("//*[@id='#{contact_officer_area_css_id}']/div/table[starts-with(@id, 'contactDetails_grid')]").any?
end
current_pagination_page() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 129
def current_pagination_page
  pagination.children[1].text.strip[/^Page ([0-9]+) of [0-9]+.*/, 1].to_i
end
fetch_locations_from_current_page() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 162
def fetch_locations_from_current_page
  locations_of_page = []

  # location_list is table contains locations in current page
  if search_results_node = @page.at('#locationList_gridSearchResults')
    location_list = search_results_node.children

    excess_row_at_the_end_table = location_results_paginated? ? 3 : 2
    start_location_row = 3
    end_location_row = location_list.count - excess_row_at_the_end_table

    for i in start_location_row..end_location_row
      location_row = location_list[i].children

      location_obj                   = Location.new
      location_obj.location_id       = get_location_id(i)
      location_obj.name              = find_value_of_field(location_row[1])
      location_obj.state             = find_value_of_field(location_row[2])
      location_obj.number_of_courses = find_value_of_field(location_row[3])

      locations_of_page << location_obj
    end
  end

  locations_of_page
end
find_contact_officer() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 231
def find_contact_officer
  contact       = ContactOfficer.new
  contact.role  = find_contact_officer_role
  contact.name  = find_contact_officer_name
  contact.title = find_contact_officer_title
  contact.phone = find_contact_officer_phone
  contact.fax   = find_contact_officer_fax
  contact.email = find_contact_officer_email

  contact
end
find_contact_officer_email() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 268
def find_contact_officer_email
  row = @contact_officer_table[9]
  find_value_of_field(row.children[3]) unless row.nil?
end
find_contact_officer_fax() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 263
def find_contact_officer_fax
  row = @contact_officer_table[7].children
  find_value_of_field(row[3])
end
find_contact_officer_grid() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 208
def find_contact_officer_grid
  contact_officers = []

  excess_row_at_the_end_table = 2
  data_row_start = 3
  data_row_end = @contact_officer_table.count - excess_row_at_the_end_table

  for i in data_row_start..data_row_end
    contact_row = @contact_officer_table[i].children

    contact       = ContactOfficer.new
    contact.role  = find_contact_officer_role
    contact.name  = find_value_of_field(contact_row[1])
    contact.phone = find_value_of_field(contact_row[2])
    contact.fax   = find_value_of_field(contact_row[3])
    contact.email = find_value_of_field(contact_row[4])

    contact_officers << contact
  end

  contact_officers
end
find_contact_officer_name() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 248
def find_contact_officer_name
  row = @contact_officer_table[1].children
  find_value_of_field(row[3])
end
find_contact_officer_phone() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 258
def find_contact_officer_phone
  row = @contact_officer_table[5].children
  find_value_of_field(row[3])
end
find_contact_officer_role() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 243
def find_contact_officer_role
  row = @contact_officer_area.children
  find_value_of_field(row[1]).sub(':', '')
end
find_contact_officer_title() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 253
def find_contact_officer_title
  row = @contact_officer_table[3].children
  find_value_of_field(row[3])
end
find_contact_officers() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 189
def find_contact_officers
  contact_officers = []

  contact_officers_list = @page.search('//div[starts-with(@id, "contactDetails_pnl")]')

  contact_officers_list.each do |contact_officer|
    @contact_officer_area = contact_officer
    @contact_officer_table = @contact_officer_area.at('table').children

    if contains_contact_details_grid?
      contact_officers += find_contact_officer_grid
    else
      contact_officers << find_contact_officer
    end
  end

  contact_officers
end
find_location() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 102
def find_location
  locations = []

  if location_results_paginated?
    for page_number in 1..total_pages
      jump_to_page(page_number)
      locations += fetch_locations_from_current_page
    end
  else
    locations += fetch_locations_from_current_page
  end

  locations
end
find_name() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 56
def find_name
  field = @page.at('#institutionDetails_lblInstitutionName')
  find_value_of_field(field)
end
find_postal_address() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 83
def find_postal_address
  post_address_node = @page.at('#institutionDetails_lblInstitutionPostalAddress')

  if post_address_node
    address_lines = post_address_node.children.select { |node| node.is_a?(Nokogiri::XML::Text) }.map { |node| find_value_of_field(node) }
    address_lines.join("\n")
  end
end
find_provider_code() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 46
def find_provider_code
  field = @page.at('#institutionDetails_lblProviderCode')
  find_value_of_field(field)
end
find_total_capacity() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 66
def find_total_capacity
  field = @page.at('#institutionDetails_lblLocationCapacity')

  capacity = find_value_of_field(field)
  capacity = is_number?(capacity) ? capacity.to_i : nil
  capacity
end
find_trading_name() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 51
def find_trading_name
  field = @page.at('#institutionDetails_lblInstitutionTradingName')
  find_value_of_field(field)
end
find_type() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 61
def find_type
  field = @page.at('#institutionDetails_lblInstitutionType')
  find_value_of_field(field)
end
find_value_of_field(field) click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 42
def find_value_of_field(field)
  field.nil? ? nil : field.text.strip
end
find_website() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 78
def find_website
  field = @page.at('#institutionDetails_hplInstitutionWebAddress')
  find_value_of_field(field)
end
get_location_id(row_index) click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 147
def get_location_id(row_index)
  hidden_form = @page.form_with id: 'Form1'
  hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
  hidden_form['__EVENTARGUMENT'] = "click-#{row_index-3}"

  begin
   course_page = hidden_form.submit(nil, {'action' => 'get-location-id'})
  rescue Mechanize::ResponseCodeError
    sleep 5
    get_location_id(row_index)
  end

  course_page.uri.to_s[/LocationID=([0-9]+)/, 1]
end
institution_not_found?() click to toggle source

there is no record not found page instead a search page is returned

# File lib/cricos_scrape/importer/institution_importer.rb, line 94
def institution_not_found?
  @page.body.include?('The Provider ID entered is invalid - please try another.')
end
is_number?(text) click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 74
def is_number?(text)
  text =~ /\d/
end
jump_to_page(page_number) click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 133
def jump_to_page(page_number)
  return @page if page_number == current_pagination_page

  hidden_form = @page.form_with id: 'Form1'
  hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
  hidden_form['__EVENTARGUMENT'] = "Page$#{page_number}"
  begin
   @page = hidden_form.submit(nil, {'action' => 'change-location-page'})
  rescue Mechanize::ResponseCodeError
    sleep 5
    jump_to_page(page_number)
  end
end
location_found?() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 98
def location_found?
  !@page.body.include?('No locations were found for the selected institution.')
end
location_results_paginated?() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 121
def location_results_paginated?
  !!pagination
end
pagination() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 117
def pagination
  @page.at('#locationList_gridSearchResults .gridPager')
end
total_pages() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 125
def total_pages
  pagination.children[1].text.strip[/^Page [0-9]+ of ([0-9]+).*/, 1].to_i
end
url() click to toggle source
# File lib/cricos_scrape/importer/institution_importer.rb, line 38
def url
  "#{INSTITUTION_URL}?ProviderID=#{provider_id}"
end