class JobIteration::CsvEnumerator

CsvEnumerator makes it possible to write an Iteration job that uses CSV file as a collection to Iterate. @example

def build_enumerator(cursor:)
  csv = CSV.open('tmp/files', { converters: :integer, headers: true })
  JobIteration::CsvEnumerator.new(csv).rows(cursor: cursor)
end

def each_iteration(row)
  ...
end

Public Class Methods

new(csv) click to toggle source

Constructs CsvEnumerator instance based on a CSV file. @param [CSV] csv An instance of CSV object @return [JobIteration::CsvEnumerator] @example

csv = CSV.open('tmp/files', { converters: :integer, headers: true })
JobIteration::CsvEnumerator.new(csv).rows(cursor: cursor)
# File lib/job-iteration/csv_enumerator.rb, line 22
def initialize(csv)
  unless csv.instance_of?(CSV)
    raise ArgumentError, "CsvEnumerator.new takes CSV object"
  end

  @csv = csv
end

Public Instance Methods

batches(batch_size:, cursor:) click to toggle source

Constructs a enumerator on batches of CSV rows @return [Enumerator] Enumerator instance

# File lib/job-iteration/csv_enumerator.rb, line 41
def batches(batch_size:, cursor:)
  @csv.lazy
    .each_slice(batch_size)
    .each_with_index
    .drop(count_of_processed_rows(cursor))
    .to_enum { (count_of_rows_in_file.to_f / batch_size).ceil }
end
rows(cursor:) click to toggle source

Constructs a enumerator on CSV rows @return [Enumerator] Enumerator instance

# File lib/job-iteration/csv_enumerator.rb, line 32
def rows(cursor:)
  @csv.lazy
    .each_with_index
    .drop(count_of_processed_rows(cursor))
    .to_enum { count_of_rows_in_file }
end

Private Instance Methods

count_of_processed_rows(cursor) click to toggle source
# File lib/job-iteration/csv_enumerator.rb, line 67
def count_of_processed_rows(cursor)
  cursor.nil? ? 0 : cursor + 1
end
count_of_rows_in_file() click to toggle source
# File lib/job-iteration/csv_enumerator.rb, line 51
def count_of_rows_in_file
  # TODO: Remove rescue for NoMethodError when Ruby 2.6 is no longer supported.
  begin
    filepath = @csv.path
  rescue NoMethodError
    return
  end

  # Behaviour of CSV#path changed in Ruby 2.6.3 (returns nil instead of raising NoMethodError)
  return unless filepath

  count = %x(wc -l < #{filepath}).strip.to_i
  count -= 1 if @csv.headers
  count
end