class MiniHarvest::MiniHarvest

Attributes

from[RW]
initial_res[RW]
initial_token[RW]
logger[RW]
marc_collection[RW]
oai_base_uri[RW]
set[RW]

Public Class Methods

new(oai_base_uri, set, from) click to toggle source
# File lib/miniharvest.rb, line 13
def initialize(oai_base_uri, set, from)
  @oai_base_uri = oai_base_uri
  @set = set
  @from = from
  @marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
  @logger = Logger.new(STDOUT)

  params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
  @initial_res = oai_request(self.oai_base_uri,params)
  @initial_token = get_resumption_token(@initial_res)
end

Public Instance Methods

append_to_collection(records) click to toggle source
# File lib/miniharvest.rb, line 25
def append_to_collection(records)
  self.marc_collection.root.add_child(records)
end
dedupe_records(node) click to toggle source
# File lib/miniharvest.rb, line 62
def dedupe_records(node)
  seen = Hash.new(0)
  node.traverse {|n| n.unlink if (seen[n.to_xml] += 1) > 1}
  return node
end
get_records(token) click to toggle source
# File lib/miniharvest.rb, line 69
def get_records(token)
  params = { :verb => 'ListRecords', :resumptionToken => token[0].text }
  doc = oai_request(self.oai_base_uri,params)
  resumption_token = process_request(doc)
  
  if resumption_token != false
    get_records(resumption_token)
  else
    return dedupe_records(self.marc_collection.root)
  end
end
get_resumption_token(doc) click to toggle source
# File lib/miniharvest.rb, line 42
def get_resumption_token(doc)
  resumption_token =  doc.xpath('//resumptionToken')
  if resumption_token.length == 1
    return resumption_token
  else
    return false
  end
end
oai_request(uri,params) click to toggle source
# File lib/miniharvest.rb, line 29
def oai_request(uri,params)
  oai_uri = URI(uri)
  oai_uri.query = URI.encode_www_form(params)
  @logger.info(oai_uri.query)

  res = Net::HTTP.get_response(oai_uri)

  doc = Nokogiri::XML(res.body)
  doc.remove_namespaces!

  return doc
end
process_request(doc) click to toggle source
# File lib/miniharvest.rb, line 51
def process_request(doc)
  token = get_resumption_token(doc)
  if token != false
    records =  doc.xpath('//record')
    append_to_collection(records)
    return token
  else
    return false
  end
end