def self.parse(html_file = "http://www.spezialitaetenliste.ch/varia_De.htm")
data = Hash.new { |h, k| h[k] = [] }
Ox.default_options = {
mode: :generic,
effort: :tolerant,
smart: true
}
res = Ox.load(Oddb2xml.uri_open(html_file).read, mode: :hash_no_attrs).values.first["body"]
result = []
idx = 0
@@items = {}
res.values.last.each do |item|
item.values.first.each do |sub_elem|
what = Chapter70xtractor.parse_td(sub_elem)
idx += 1
puts "#{idx}: xx #{what}" if $VERBOSE
result << what
end
end
result2 = result.find_all { |x| (x.is_a?(Array) && x.first.is_a?(String)) && x.first.to_i > 100 }
result2.each do |entry|
data = {}
pharma_code = entry.first
ean13 = (Oddb2xml::FAKE_GTIN_START + pharma_code.to_s)
german = if entry[2].encoding.to_s.eql?("ASCII-8BIT")
CGI.unescape(entry[2].force_encoding("ISO-8859-1"))
else
entry[2]
end
@@items[ean13] = {
data_origin: "Chapter70",
line: entry.join(","),
ean13: ean13,
description: german,
quantity: entry[3],
pharmacode: pharma_code,
pub_price: entry[4],
limitation: entry[5],
type: :pharma
}
end
result2
end