class JobParser::ParseSchema
Constants
- EXTRA_SCHEMA_TEXT_FIELDS
Public Class Methods
new(html, from_url)
click to toggle source
Calls superclass method
JobParser::Parser::new
# File lib/jobparser/parseschema.rb, line 26 def initialize(html, from_url) Parser::ACCEPTED_ELEMENTS.push("span") super(html, from_url) end
Public Instance Methods
job()
click to toggle source
Calls superclass method
JobParser::Parser#job
# File lib/jobparser/parseschema.rb, line 31 def job res = super res[:schema] = true unless res[:from_cache] EXTRA_SCHEMA_TEXT_FIELDS.each do |field| underscore_name = underscore(field).to_sym result = send("job_#{underscore_name}") res[underscore_name] = result unless result.nil? || result.empty? end end cache(res) end
Private Instance Methods
apply_link()
click to toggle source
# File lib/jobparser/parseschema.rb, line 57 def apply_link Facets::Apply.new(@doc, @url, @plain_text).parse end
deadline()
click to toggle source
# File lib/jobparser/parseschema.rb, line 88 def deadline end
does_use_schema?()
click to toggle source
# File lib/jobparser/parseschema.rb, line 91 def does_use_schema? @doc.css("*").any? { |elem| elem['itemtype'] == "http://schema.org/JobPosting" } end
find_with_itemprop(prop)
click to toggle source
# File lib/jobparser/parseschema.rb, line 107 def find_with_itemprop(prop) @doc.css("*").select { |elem| elem['itemprop'] == prop }.first end
get_content_at_prop(prop)
click to toggle source
# File lib/jobparser/parseschema.rb, line 97 def get_content_at_prop(prop) elem = find_with_itemprop(prop) elem ? Cleaner.strip_all_white_space(elem.content) : "" end
is_content_at_prop?(prop)
click to toggle source
# File lib/jobparser/parseschema.rb, line 102 def is_content_at_prop?(prop) elem = find_with_itemprop(prop) elem && !elem.content.empty? end
job_location()
click to toggle source
# File lib/jobparser/parseschema.rb, line 65 def job_location if @doc.css("*").any? { |elem| elem['itemtype'] == "http://schema.org/PostalAddress" } fields = %w{ streetAddress addressLocality addressRegion addressCountry postalCode } address = [] fields.each do |field| content = get_content_at_prop(field) address.push(content) unless content.empty? end address.join(", ") else # some sites don't use the address stuff properly if is_content_at_prop?("addressLocality") get_content_at_prop("addressLocality") else get_content_at_prop("jobLocation") end end end
job_postcode()
click to toggle source
# File lib/jobparser/parseschema.rb, line 84 def job_postcode POSTCODE_REGEX.match(job_location) { |m| return m[0] } end
job_salary()
click to toggle source
# File lib/jobparser/parseschema.rb, line 46 def job_salary salary = job_salary_string SALARY_GROUP_REGEX.match(salary.gsub(CLEAN_SALARY_REGEX, "")) { |match| [match[1].to_i, match[2].to_i] } end
job_salary_string()
click to toggle source
# File lib/jobparser/parseschema.rb, line 61 def job_salary_string get_content_at_prop("baseSalary") end
job_title()
click to toggle source
# File lib/jobparser/parseschema.rb, line 53 def job_title get_content_at_prop("title") end