module JobParser
Constants
- APPLY_LINK_REGEX
- CLEAN_SALARY_REGEX
- JOB_TITLE_ID_REGEX
- JOB_TITLE_WORDS
words commonly used in job listings - not sure if this is a good way to go but I think it's worth a go could scope this regex just to headers
- LOCATION_REGEX
- NBSP
- POSTCODE_REGEX
- SALARY_GROUP_REGEX
- SALARY_REGEX
- SALARY_STRING_REGEX
- SALARY_TITLE_REGEX
- SALARY_UP_TO_REGEX
- VACANCY_TITLE_REGEX
- VERSION
Public Class Methods
cache()
click to toggle source
# File lib/jobparser.rb, line 53 def self.cache @cache end
config()
click to toggle source
# File lib/jobparser.rb, line 49 def self.config @config end
configure(opts = {})
click to toggle source
# File lib/jobparser.rb, line 57 def self.configure(opts = {}) opts.each do |key, val| @config[key.to_sym] = val if @config.keys.include?(key.to_sym) end end
parse(url)
click to toggle source
# File lib/jobparser.rb, line 31 def self.parse(url) if JobParser.cache.valid_for_url?(url) res = JobParser.cache.fetch_result_for_url(url) res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url) else begin html = open(URI.encode(url), :allow_redirections => :safe).read if html.include?("http://schema.org/JobPosting") ParseSchema.new(html, url) else ParseHtml.new(html, url) end rescue URI::InvalidURIError raise JobParser::Error::InvalidUrl, "The URI given (\"#{url}\") was not valid" end end end
parser(url)
click to toggle source
# File lib/jobparser.rb, line 26 def self.parser(url) puts "Warning: JobParser.parser is old. Use JobParser.parse" JobParser.parse(url) end