class RelatonOgc::DataFetcher
Public Class Methods
fetch(output: "data", format: "yaml")
click to toggle source
# File lib/relaton_ogc/data_fetcher.rb, line 56 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output unless Dir.exist? output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end
new(output, format)
click to toggle source
Create DataFetcher
instance
@param [String] output directory to save the documents @param [String] format output format “yaml” or “xmo”
# File lib/relaton_ogc/data_fetcher.rb, line 48 def initialize(output, format) @output = output @etagfile = File.join output, "etag.txt" @format = format @docids = [] @dupids = [] end
Public Instance Methods
fetch()
click to toggle source
# File lib/relaton_ogc/data_fetcher.rb, line 66 def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize get_data do |etag, json| no_errors = true json.each do |_, hit| next if hit["type"] == "CC" bib = Scrapper.parse_page hit write_document bib rescue StandardError => e no_errors = false warn "Fetching document: #{hit['identifier']}" warn "#{e.class} #{e.message}" warn e.backtrace end warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any? self.etag = etag if no_errors end end
write_document(bib)
click to toggle source
# File lib/relaton_ogc/data_fetcher.rb, line 85 def write_document(bib) # rubocop:disable Metrics/AbcSize if @docids.include?(bib.docidentifier[0].id) @dupids << bib.docidentifier[0].id return end @docids << bib.docidentifier[0].id name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_") file = "#{@output}/#{name}.#{@format}" content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml File.write file, content, encoding: "UTF-8" end