class Oddb2xml::Chapter70xtractor

Constants

LIMITATIONS

Public Class Methods

items() click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 31
def self.items
  @@items
end
parse(html_file = "http://www.spezialitaetenliste.ch/varia_De.htm") click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 35
def self.parse(html_file = "http://www.spezialitaetenliste.ch/varia_De.htm")
  data = Hash.new { |h, k| h[k] = [] }
  Ox.default_options = {
    mode: :generic,
    effort: :tolerant,
    smart: true
  }
  res = Ox.load(Oddb2xml.uri_open(html_file).read, mode: :hash_no_attrs).values.first["body"]
  result = []
  idx = 0
  @@items = {}
  res.values.last.each do |item|
    item.values.first.each do |sub_elem|
      what = Chapter70xtractor.parse_td(sub_elem)
      idx += 1
      puts "#{idx}: xx #{what}" if $VERBOSE
      result << what
    end
  end
  result2 = result.find_all { |x| (x.is_a?(Array) && x.first.is_a?(String)) && x.first.to_i > 100 }
  result2.each do |entry|
    data = {}
    pharma_code = entry.first
    ean13 = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s)
    german = if entry[2].encoding.to_s.eql?("ASCII-8BIT")
      CGI.unescape(entry[2].force_encoding("ISO-8859-1"))
    else
      entry[2]
    end
    @@items[ean13] = {
      data_origin: "Chapter70",
      line: entry.join(","),
      ean13: ean13,
      description: german,
      quantity: entry[3],
      pharmacode: pharma_code,
      pub_price: entry[4],
      limitation: entry[5],
      type: :pharma
    }
  end
  result2
end
parse_td(elem) click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 7
def self.parse_td(elem)
  begin
    values = elem.is_a?(Array) ? elem : elem.values
    res = values.flatten.collect { |x|
      if x.nil?
        nil
      else
        x.is_a?(Hash) ? x.values : x.gsub(/\r\n/, "").strip
      end
    }
    puts "parse_td returns: #{res}" if $VERBOSE
  rescue => exc
    puts "Unable to pars #{elem} #{exc}"
    # binding.pry
    return nil
  end
  res.flatten # .join("\t")
end