module JobParser

Constants

CLEAN_SALARY_REGEX
JOB_TITLE_ID_REGEX
JOB_TITLE_WORDS

words commonly used in job listings - not sure if this is a good way to go but I think it's worth a go could scope this regex just to headers

LOCATION_REGEX
NBSP
POSTCODE_REGEX
SALARY_GROUP_REGEX
SALARY_REGEX
SALARY_STRING_REGEX
SALARY_TITLE_REGEX
SALARY_UP_TO_REGEX
VACANCY_TITLE_REGEX
VERSION

Public Class Methods

cache() click to toggle source
# File lib/jobparser.rb, line 53
def self.cache
  @cache
end
config() click to toggle source
# File lib/jobparser.rb, line 49
def self.config
  @config
end
configure(opts = {}) click to toggle source
# File lib/jobparser.rb, line 57
def self.configure(opts = {})
  opts.each do |key, val|
    @config[key.to_sym] = val if @config.keys.include?(key.to_sym)
  end
end
parse(url) click to toggle source
# File lib/jobparser.rb, line 31
def self.parse(url)
  if JobParser.cache.valid_for_url?(url)
    res = JobParser.cache.fetch_result_for_url(url)
    res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url)
  else
    begin
      html = open(URI.encode(url), :allow_redirections => :safe).read
      if html.include?("http://schema.org/JobPosting")
        ParseSchema.new(html, url)
      else
        ParseHtml.new(html, url)
      end
    rescue URI::InvalidURIError
      raise JobParser::Error::InvalidUrl, "The URI given (\"#{url}\") was not valid"
    end
  end
end
parser(url) click to toggle source
# File lib/jobparser.rb, line 26
def self.parser(url)
  puts "Warning: JobParser.parser is old. Use JobParser.parse"
  JobParser.parse(url)
end