class RelatonIana::DataFetcher

Constants

SOURCE

Public Class Methods

fetch(output: "data", format: "yaml") click to toggle source

Initialize fetcher and run fetch

@param [Strin] output directory to save files, default: “data” @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml

# File lib/relaton_iana/data_fetcher.rb, line 24
def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end
new(output, format) click to toggle source

Data fetcher initializer

@param [String] output directory to save files @param [String] format format of output files (xml, yaml, bibxml)

# File lib/relaton_iana/data_fetcher.rb, line 11
def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
  @files = []
end

Public Instance Methods

fetch(page = 1) click to toggle source

Parse documents

@param [Integer] page page number

# File lib/relaton_iana/data_fetcher.rb, line 39
def fetch(page = 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
  params = {
    q: "repo:ietf-ribose/iana-registries extension:xml",
    page: page, per_page: 100
  }
  if ENV["GITHUB_TOKEN"]
    headers = { "Authorization" => "token #{ENV['GITHUB_TOKEN']}" }
  end
  attempt = 0
  json = {}
  until attempt > 3 || json["items"]
    if attempt.positive?
      warn "Rate limit is reached. Retrying in 30 sec."
      sleep 30
    end
    attempt += 1
    resp = Faraday.get "https://api.github.com/search/code", params, headers
    json = JSON.parse resp.body
  end
  raise StandardError, json["message"] if json["message"]

  json["items"].each do |item|
    fetch_doc URI.join(SOURCE, item["path"]).to_s
  end
  fetch(page + 1) if (json["total_count"] - (page * 100)).positive?
end
fetch_doc(url) click to toggle source

Fetch document

@param [String] url url of document

# File lib/relaton_iana/data_fetcher.rb, line 71
def fetch_doc(url) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  resp = Net::HTTP.get_response URI(url)
  if resp.code == "200"
    return unless resp.body.include? "<registry"

    xml = Nokogiri::XML(resp.body)
    registry = xml.at("/xmlns:registry")
    doc = Parser.parse registry
    save_doc doc
    registry.xpath("./xmlns:registry").each do |r|
      save_doc Parser.parse(r, doc)
    end
  end
rescue StandardError => e
  warn "Error: #{e.message}. URL: #{url}"
end
file_name(bib) click to toggle source

Generate file name

@param [RelatonW3c::W3cBibliographicItem] bib bibliographic item

@return [String] file name

# File lib/relaton_iana/data_fetcher.rb, line 117
def file_name(bib)
  name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_")
  File.join @output, "#{name}.#{@ext}"
end
save_doc(bib) click to toggle source

Save document to file

@param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item

# File lib/relaton_iana/data_fetcher.rb, line 93
def save_doc(bib) # rubocop:disable Metrics/MethodLength
  return unless bib

  c = case @format
      when "xml" then bib.to_xml(bibdata: true)
      when "yaml" then bib.to_hash.to_yaml
      else bib.send("to_#{@format}")
      end
  file = file_name(bib)
  if @files.include? file
    warn "File #{file} already exists. Document: #{bib.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end