class LucaRecord::Dict

Public Class Methods

load(file = @filename) click to toggle source

load dictionary data

# File lib/luca_record/dict.rb, line 69
def self.load(file = @filename)
  case File.extname(file)
  when '.tsv', '.csv'
    load_tsv_dict(dict_path(file))
  when '.yaml', '.yml'
    YAML.load_file(dict_path(file), **{})
  else
    raise 'cannot load this filetype'
  end
end
load_tsv_dict(path) click to toggle source

generate dictionary from TSV file. Minimum assumption is as bellows: 1st row is converted symbol.

  • row is 'code'. Converted hash keys

  • row is 'label'. Should be human readable labels

  • after row can be app specific data

# File lib/luca_record/dict.rb, line 87
def self.load_tsv_dict(path)
  {}.tap do |dict|
    CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row|
      {}.tap do |entry|
        row.each do |header, field|
          next if row.index(header).zero?

          entry[header.to_sym] = field unless field.nil?
        end
        dict[row[0]] = entry
      end
    end
  end
end
new(file = @filename) click to toggle source
# File lib/luca_record/dict.rb, line 15
def initialize(file = @filename)
  @path = self.class.dict_path(file)
  set_driver
end
validate(filename, target_key = :label) click to toggle source
# File lib/luca_record/dict.rb, line 102
def self.validate(filename, target_key = :label)
  errors = load(filename).map { |k, v| v[target_key].nil? ? k : nil }.compact
  if errors.empty?
    puts 'No error detected.'
    nil
  else
    puts "Key #{errors.join(', ')} has nil #{target_key}."
    errors.count
  end
end

Private Class Methods

dict_path(filename) click to toggle source
# File lib/luca_record/dict.rb, line 127
def self.dict_path(filename)
  Pathname(LucaSupport::PJDIR) / 'dict' / filename
end
reverse(dict) click to toggle source
# File lib/luca_record/dict.rb, line 131
def self.reverse(dict)
  dict.map{ |k, v| [v[:label], k] }.to_h
end

Public Instance Methods

dig(*args) click to toggle source

Search with unique code.

# File lib/luca_record/dict.rb, line 40
def dig(*args)
  @data.dig(*args)
end
hash2multiassign(obj, main_key = 'label', options: nil) click to toggle source

Separate main item from other options. If options specified as Array of string, it works as safe list filter.

# File lib/luca_record/dict.rb, line 47
def hash2multiassign(obj, main_key = 'label', options: nil)
  options = {}.tap do |opt|
    obj.map do |k, v|
      next if k == main_key
      next if !options.nil? && !options.include?(k)

      opt[k.to_sym] = v
    end
  end
  [obj[main_key], options.compact]
end
load_csv(path) { |row| ... } click to toggle source

Load CSV with config options

# File lib/luca_record/dict.rb, line 61
def load_csv(path)
  CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row|
    yield row
  end
end

Private Instance Methods

definitions_lazyload() click to toggle source

Build Reverse dictionary for TSV data

# File lib/luca_record/dict.rb, line 123
def definitions_lazyload
  @definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] }
end
max_score_code(str) click to toggle source
# File lib/luca_record/dict.rb, line 135
def max_score_code(str)
  res = @definitions.map do |k, v|
    [v, match_score(str, k, 2)]
  end
  res.max { |x, y| x[1] <=> y[1] }
end
set_driver() click to toggle source
# File lib/luca_record/dict.rb, line 115
def set_driver
  @data = self.class.load(@path)
  @config = @data['config']
  @definitions = @data['definitions']
end