class Oddb2xml::Chapter70xtractor
Constants
- LIMITATIONS
Public Class Methods
items()
click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 31 def self.items @@items end
parse(html_file = "http://www.spezialitaetenliste.ch/varia_De.htm")
click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 35 def self.parse(html_file = "http://www.spezialitaetenliste.ch/varia_De.htm") data = Hash.new { |h, k| h[k] = [] } Ox.default_options = { mode: :generic, effort: :tolerant, smart: true } res = Ox.load(Oddb2xml.uri_open(html_file).read, mode: :hash_no_attrs).values.first["body"] result = [] idx = 0 @@items = {} res.values.last.each do |item| item.values.first.each do |sub_elem| what = Chapter70xtractor.parse_td(sub_elem) idx += 1 puts "#{idx}: xx #{what}" if $VERBOSE result << what end end result2 = result.find_all { |x| (x.is_a?(Array) && x.first.is_a?(String)) && x.first.to_i > 100 } result2.each do |entry| data = {} pharma_code = entry.first ean13 = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s) german = if entry[2].encoding.to_s.eql?("ASCII-8BIT") CGI.unescape(entry[2].force_encoding("ISO-8859-1")) else entry[2] end @@items[ean13] = { data_origin: "Chapter70", line: entry.join(","), ean13: ean13, description: german, quantity: entry[3], pharmacode: pharma_code, pub_price: entry[4], limitation: entry[5], type: :pharma } end result2 end
parse_td(elem)
click to toggle source
# File lib/oddb2xml/chapter_70_hack.rb, line 7 def self.parse_td(elem) begin values = elem.is_a?(Array) ? elem : elem.values res = values.flatten.collect { |x| if x.nil? nil else x.is_a?(Hash) ? x.values : x.gsub(/\r\n/, "").strip end } puts "parse_td returns: #{res}" if $VERBOSE rescue => exc puts "Unable to pars #{elem} #{exc}" # binding.pry return nil end res.flatten # .join("\t") end