class DataModeler::Framework

Base class, core of the DataModeler framework.

Attributes

config[R]
data[R]
inputs[R]
model[R]
nruns[R]
out_dir[R]
targets[R]
test_size[R]
train_size[R]
tset_gen[R]

Public Class Methods

new(config) click to toggle source

@param config [Hash] configuration hash for the whole experiment setup

# File lib/data_modeler/framework.rb, line 14
def initialize config
  @config = config
  @inputs = config[:tset][:input_series].map! &:to_sym
  @targets =  config[:tset][:target_series].map! &:to_sym
  @train_size = config[:tset][:train_size]
  @test_size = config[:tset][:test_size]
  @nruns = config[:tset][:nruns] ||= Float::INFINITY # terminates with data
  @save_models = config[:data].delete :save_models

  @data = load_data config[:data].delete :input_file
  @out_dir = prepare_output config[:data]

  @tset_gen = DataModeler::Dataset::Generator.new data, **opts_for(:dataset_gen)
  @model = DataModeler::Model.selector **opts_for(:learner)
end

Public Instance Methods

run(report_interval: 1000) click to toggle source

Main control: up to `nruns` (or until end of data) loop train-test-save @param report_interval [Integer] interval at which to print to stdout

(in number of generations) -- will be passed to the `Model`

@return [void] @note saves model, preds and obs to the file sistem at the end of each run

# File lib/data_modeler/framework.rb, line 35
def run report_interval: 1000
  printing = report_interval && report_interval > 0
  over_nruns = nruns == Float::INFINITY ? "" : "/#{nruns}"
  puts "\nStarting @ #{Time.now}\n#{self}" if printing
  1.upto(nruns) do |nrun|
    begin
      train_set = tset_gen.train(nrun)
    rescue DataModeler::Dataset::Generator::NoDataLeft
      break # there's not enough data left for a train+test set pair
    end
    puts "\nRun #{nrun}#{over_nruns} -- starting @ #{Time.now}" if printing
    model.reset
    puts "-Training" if printing
    model.train train_set, report_interval: report_interval
    puts "-Testing" if printing
    times, test_input, observations = tset_gen.test(nrun).values
    predictions = model.test test_input
    puts "-Saving" if printing
    save_run nrun, model, [times, predictions, observations]
    puts "Run #{nrun}#{over_nruns} -- ending @ #{Time.now}" if printing
  end
  puts "\nDone! @ #{Time.now}" if printing
end
save_models?() click to toggle source

Attribute reader for instance variable `@save_models`, ending in '?' since it's a boolean value. @return [true|false] value of instance variable @save_models

(false if nil/uninitialized)
# File lib/data_modeler/framework.rb, line 63
def save_models?
  @save_models || false
end
to_s() click to toggle source

@return [String]

# File lib/data_modeler/framework.rb, line 68
def to_s
  config.to_s
end

Private Instance Methods

load_data(filename) click to toggle source

Loads the data in a Hash ready for `DatasetGen` (and `Dataset`) @param file [String/fname] path to the file containing the data (from `config`) @return [Hash] the data ready for access

# File lib/data_modeler/framework.rb, line 77
def load_data filename
  filename = Pathname.new filename
  abort "Only CSV data for now, sorry" unless filename.extname == '.csv'
  # avoid loading data we won't use
  series = [:time] + inputs + targets
  csv_opts = { headers: true, header_converters: :symbol, converters: :float }
  Hash.new { |h,k| h[k] = [] }.tap do |data|
    CSV.foreach(filename, **csv_opts) do |row|
      series.each { |s| data[s] << row[s] }
    end
  end
end
opts_for(whom) click to toggle source

Compatibility helper, preparing configuration hashes for different classes @param whom [Symbol] which class are you preparing the config for @return [Hash] configuration for the class as required

# File lib/data_modeler/framework.rb, line 102
def opts_for whom
  case whom
  when :dataset_gen
    { ds_args: opts_for(:dataset),
      train_size: config[:tset][:train_size],
      test_size: config[:tset][:test_size]
    }
  when :dataset
    { inputs: inputs,
      targets:  targets,
      ninput_points: config[:tset][:ninput_points],
      tspread: config[:tset][:tspread],
      look_ahead: config[:tset][:look_ahead]
    }
  when :learner
    config[:learner].merge({
      ninputs: (config[:tset][:ninput_points] * inputs.size),
      noutputs: targets.size
    })
  else abort "Unrecognized `whom`: '#{whom}'"
  end
end
prepare_output(results_dir:, exp_id: Pathname.new(results_dir).join(exp_id).tap { |path| FileUtils.mkdir_p path }) click to toggle source

Prepares a directory to hold the output of each run @param dir [String/path] directory where to save the results (from `config`) @param id [String/fname] id of current config/experiment (from `config`) @return [void] @note side effect: creates directories on file system to hold output

# File lib/data_modeler/framework.rb, line 95
def prepare_output results_dir:, exp_id:
  Pathname.new(results_dir).join(exp_id).tap { |path| FileUtils.mkdir_p path }
end
save_run(nrun, model, tpredobs) click to toggle source

Save a run's results on the file system @param nrun [Integer] the curent run number (used as id for naming) @param model [Model] the model trained in the current run @param predobs [Array<Array<pred, obs>>] list of prediction-observation pairs @return [void] @note side effect: saves model and predobs to file system

# File lib/data_modeler/framework.rb, line 131
def save_run nrun, model, tpredobs
  run_id = format '%02d', nrun
  model.save out_dir.join("model_#{run_id}.sav") if save_models?
  CSV.open(out_dir.join("tpredobs_#{run_id}.csv"), 'wb') do |csv|
    csv << (%w(time) + targets.collect { |t| ["p_#{t}", "o_#{t}"] }.transpose.flatten)
    tpredobs.transpose.each { |tpo| csv << tpo.flatten }
  end
end