class JobParser::SpecialCases

Public Class Methods

call_special_case(key, method, doc) click to toggle source
# File lib/jobparser/specialcases.rb, line 68
def self.call_special_case(key, method, doc)
  self.dictionary[key][method].call(doc)
end
case_for_url(url) click to toggle source
# File lib/jobparser/specialcases.rb, line 72
def self.case_for_url(url)
  self.dictionary.keys.each do |key|
    return self.dictionary[key] if url.include?(key)
  end
  false
end
dictionary() click to toggle source
# File lib/jobparser/specialcases.rb, line 4
def self.dictionary
  {
    "jobsearch.direct.gov.uk" => {
      :title => Proc.new { |doc|
        headings = doc.css(".jobViewContent h2")
        if headings[1].nil?
          headings[0].content
        else
          headings[1].content
        end
      },
      :location => Proc.new { |doc|
        location = ""
        listings = doc.css(".jobViewSummary dl dt")
        listings.each do |dt|
          if dt.content == "Location"
            location = dt.next_element.content
            break
          end
        end
        location
      },
      :salary_string => Proc.new { |doc|
        salary = ""
        listings = doc.css(".jobViewSummary dl dt")
        listings.each do |dt|
          if dt.content == "Salary"
            salary = Cleaner.remove_nbsp(dt.next_element.content)
            break
          end
        end
        salary
      },
      :salary => Proc.new { |doc|
        # get string by calling salary_string special case
        salary = nil
        salary_string = SpecialCases.call_special_case("jobsearch.direct.gov.uk", :salary_string, doc)
        /£?([0-9,\.]+)\D*£?([0-9,\.]+)/.match(salary_string) { |m|
          low = m[1].gsub(",", "").to_i
          high = m[2].gsub(",", "").to_i
          salary = [low, high]
        }
        salary
      }
    },
    "bfi.org.uk" => {
      :deadline => Proc.new { |doc|
        match = ""
        doc.css(".block-container>p").each do |p|
          /The closing date for this position is (.+)\./.match(p.content) { |m|
            match = m[1]
          }
        end
        match
      }
    },
    "nationalmuseums.org" => {
      :title => Proc.new { |doc|
        doc.at_css("h3").content.gsub("[pdf]", "").strip
      }
    }
  }
end