class LucaRecord::Dict
Public Class Methods
load(file = @filename)
click to toggle source
load dictionary data
# File lib/luca_record/dict.rb, line 69 def self.load(file = @filename) case File.extname(file) when '.tsv', '.csv' load_tsv_dict(dict_path(file)) when '.yaml', '.yml' YAML.load_file(dict_path(file), **{}) else raise 'cannot load this filetype' end end
load_tsv_dict(path)
click to toggle source
generate dictionary from TSV file. Minimum assumption is as bellows: 1st row is converted symbol.
-
row is 'code'. Converted hash keys
-
row is 'label'. Should be human readable labels
-
after row can be app specific data
# File lib/luca_record/dict.rb, line 87 def self.load_tsv_dict(path) {}.tap do |dict| CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row| {}.tap do |entry| row.each do |header, field| next if row.index(header).zero? entry[header.to_sym] = field unless field.nil? end dict[row[0]] = entry end end end end
new(file = @filename)
click to toggle source
# File lib/luca_record/dict.rb, line 15 def initialize(file = @filename) @path = self.class.dict_path(file) set_driver end
validate(filename, target_key = :label)
click to toggle source
# File lib/luca_record/dict.rb, line 102 def self.validate(filename, target_key = :label) errors = load(filename).map { |k, v| v[target_key].nil? ? k : nil }.compact if errors.empty? puts 'No error detected.' nil else puts "Key #{errors.join(', ')} has nil #{target_key}." errors.count end end
Private Class Methods
dict_path(filename)
click to toggle source
# File lib/luca_record/dict.rb, line 127 def self.dict_path(filename) Pathname(LucaSupport::PJDIR) / 'dict' / filename end
reverse(dict)
click to toggle source
# File lib/luca_record/dict.rb, line 131 def self.reverse(dict) dict.map{ |k, v| [v[:label], k] }.to_h end
Public Instance Methods
dig(*args)
click to toggle source
Search with unique code.
# File lib/luca_record/dict.rb, line 40 def dig(*args) @data.dig(*args) end
hash2multiassign(obj, main_key = 'label', options: nil)
click to toggle source
Separate main item from other options. If options specified as Array of string, it works as safe list filter.
# File lib/luca_record/dict.rb, line 47 def hash2multiassign(obj, main_key = 'label', options: nil) options = {}.tap do |opt| obj.map do |k, v| next if k == main_key next if !options.nil? && !options.include?(k) opt[k.to_sym] = v end end [obj[main_key], options.compact] end
load_csv(path) { |row| ... }
click to toggle source
Load CSV with config options
# File lib/luca_record/dict.rb, line 61 def load_csv(path) CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row| yield row end end
search(word, default_word = nil, main_key: 'label', options: nil)
click to toggle source
Search code with n-gram word. If dictionary has Hash or Array, it returns [label, options].
# File lib/luca_record/dict.rb, line 23 def search(word, default_word = nil, main_key: 'label', options: nil) definitions_lazyload res, score = max_score_code(word.gsub(/[[:space:]]/, '')) return default_word if score < 0.4 case res when Hash hash2multiassign(res, main_key, options: options) when Array res.map { |item| hash2multiassign(item, main_key, options: options) } else res end end
Private Instance Methods
definitions_lazyload()
click to toggle source
Build Reverse dictionary for TSV data
# File lib/luca_record/dict.rb, line 123 def definitions_lazyload @definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] } end
max_score_code(str)
click to toggle source
# File lib/luca_record/dict.rb, line 135 def max_score_code(str) res = @definitions.map do |k, v| [v, match_score(str, k, 2)] end res.max { |x, y| x[1] <=> y[1] } end
set_driver()
click to toggle source
# File lib/luca_record/dict.rb, line 115 def set_driver @data = self.class.load(@path) @config = @data['config'] @definitions = @data['definitions'] end