class MiniHarvest::MiniHarvest
Attributes
from[RW]
initial_res[RW]
initial_token[RW]
logger[RW]
marc_collection[RW]
oai_base_uri[RW]
set[RW]
Public Class Methods
new(oai_base_uri, set, from)
click to toggle source
# File lib/miniharvest.rb, line 13 def initialize(oai_base_uri, set, from) @oai_base_uri = oai_base_uri @set = set @from = from @marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">') @logger = Logger.new(STDOUT) params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from } @initial_res = oai_request(self.oai_base_uri,params) @initial_token = get_resumption_token(@initial_res) end
Public Instance Methods
append_to_collection(records)
click to toggle source
# File lib/miniharvest.rb, line 25 def append_to_collection(records) self.marc_collection.root.add_child(records) end
dedupe_records(node)
click to toggle source
# File lib/miniharvest.rb, line 62 def dedupe_records(node) seen = Hash.new(0) node.traverse {|n| n.unlink if (seen[n.to_xml] += 1) > 1} return node end
get_records(token)
click to toggle source
# File lib/miniharvest.rb, line 69 def get_records(token) params = { :verb => 'ListRecords', :resumptionToken => token[0].text } doc = oai_request(self.oai_base_uri,params) resumption_token = process_request(doc) if resumption_token != false get_records(resumption_token) else return dedupe_records(self.marc_collection.root) end end
get_resumption_token(doc)
click to toggle source
# File lib/miniharvest.rb, line 42 def get_resumption_token(doc) resumption_token = doc.xpath('//resumptionToken') if resumption_token.length == 1 return resumption_token else return false end end
oai_request(uri,params)
click to toggle source
# File lib/miniharvest.rb, line 29 def oai_request(uri,params) oai_uri = URI(uri) oai_uri.query = URI.encode_www_form(params) @logger.info(oai_uri.query) res = Net::HTTP.get_response(oai_uri) doc = Nokogiri::XML(res.body) doc.remove_namespaces! return doc end
process_request(doc)
click to toggle source
# File lib/miniharvest.rb, line 51 def process_request(doc) token = get_resumption_token(doc) if token != false records = doc.xpath('//record') append_to_collection(records) return token else return false end end