class Rover::Vector

Constants

TYPE_CAST_MAPPING

if a user never specifies types, the defaults are bool, float, int, and object keep these simple

we could create aliases for float64, int64, uint64 if so, type should still return the simple type

Public Class Methods

new(data, type: nil) click to toggle source
# File lib/rover/vector.rb, line 24
def initialize(data, type: nil)
  @data = cast_data(data, type: type)
  raise ArgumentError, "Bad size: #{@data.shape}" unless @data.ndim == 1
end

Public Instance Methods

!() click to toggle source
# File lib/rover/vector.rb, line 142
def !
  if @data.is_a?(Numo::Bit)
    Vector.new(@data.eq(0))
  else
    raise "Not implemented yet"
  end
end
-@() click to toggle source
# File lib/rover/vector.rb, line 150
def -@
  self * -1
end
[](v) click to toggle source
# File lib/rover/vector.rb, line 81
def [](v)
  if v.is_a?(Vector)
    Vector.new(v.to_numo.mask(@data))
  else
    @data[v]
  end
end
[]=(k, v) click to toggle source
# File lib/rover/vector.rb, line 89
def []=(k, v)
  k = k.to_numo if k.is_a?(Vector)
  @data[k] = v
end
abs() click to toggle source
# File lib/rover/vector.rb, line 195
def abs
  Vector.new(@data.abs)
end
all?(&block) click to toggle source
# File lib/rover/vector.rb, line 245
def all?(&block)
  to_a.all?(&block)
end
any?(&block) click to toggle source
# File lib/rover/vector.rb, line 249
def any?(&block)
  to_a.any?(&block)
end
clamp(min, max) click to toggle source
# File lib/rover/vector.rb, line 159
def clamp(min, max)
  dup.clamp!(min, max)
end
clamp!(min, max) click to toggle source
# File lib/rover/vector.rb, line 154
def clamp!(min, max)
  @data = @data.clip(min, max)
  self
end
count()
Alias for: size
crosstab(other) click to toggle source
# File lib/rover/vector.rb, line 278
def crosstab(other)
  index = uniq.sort
  index_pos = index.to_a.map.with_index.to_h
  df = DataFrame.new({"_" => index})
  other.uniq.sort.each do |k|
    df[k] = 0
  end
  to_a.zip(other.to_a) do |v1, v2|
    df[v2][index_pos[v1]] += 1
  end
  df
end
diff() click to toggle source

keep same number of rows as original to make it easy to add to original data frame

# File lib/rover/vector.rb, line 76
def diff
  diff = @data.cast_to(Numo::DFloat).diff
  Vector.new(diff.insert(0, Float::NAN))
end
each(&block) click to toggle source
# File lib/rover/vector.rb, line 199
def each(&block)
  @data.each(&block)
end
each_with_index(&block) click to toggle source
# File lib/rover/vector.rb, line 203
def each_with_index(&block)
  @data.each_with_index(&block)
end
first(n = 1) click to toggle source
# File lib/rover/vector.rb, line 257
def first(n = 1)
  if n >= size
    Vector.new(@data)
  else
    Vector.new(@data[0...n])
  end
end
head(n = 5) click to toggle source
# File lib/rover/vector.rb, line 291
def head(n = 5)
  n += size if n < 0
  first(n)
end
in?(values) click to toggle source
# File lib/rover/vector.rb, line 128
def in?(values)
  ret = Numo::Bit.new(size).fill(false)
  values.each do |v|
    comp =
      if v.is_a?(Numeric) || v.is_a?(Numo::NArray)
        @data.eq(v)
      else
        Numo::Bit.cast(@data.map { |d| d == v })
      end
    ret |= comp
  end
  Vector.new(ret)
end
inspect() click to toggle source

TODO add type and size?

# File lib/rover/vector.rb, line 316
def inspect
  elements = first(5).to_a.map(&:inspect)
  elements << "..." if size > 5
  "#<Rover::Vector [#{elements.join(", ")}]>"
end
Also aliased as: to_s
last(n = 1) click to toggle source
# File lib/rover/vector.rb, line 265
def last(n = 1)
  if n >= size
    Vector.new(@data)
  else
    Vector.new(@data[-n..-1])
  end
end
length()
Alias for: size
map(&block) click to toggle source
# File lib/rover/vector.rb, line 163
def map(&block)
  # convert to Ruby first to cast properly
  # https://github.com/ruby-numo/numo-narray/issues/181
  Vector.new(@data.to_a.map(&block))
end
map!(&block) click to toggle source
# File lib/rover/vector.rb, line 169
def map!(&block)
  @data = cast_data(@data.to_a.map(&block))
  self
end
max() click to toggle source
# File lib/rover/vector.rb, line 207
def max
  @data.max
end
mean() click to toggle source
# File lib/rover/vector.rb, line 215
def mean
  # currently only floats have mean in Numo
  # https://github.com/ruby-numo/numo-narray/issues/79
  @data.cast_to(Numo::DFloat).mean
end
median() click to toggle source
# File lib/rover/vector.rb, line 221
def median
  # need to cast to get correct result
  # https://github.com/ruby-numo/numo-narray/issues/165
  @data.cast_to(Numo::DFloat).median
end
min() click to toggle source
# File lib/rover/vector.rb, line 211
def min
  @data.min
end
missing() click to toggle source
# File lib/rover/vector.rb, line 61
def missing
  bit =
    if @data.is_a?(Numo::RObject)
      Numo::Bit.cast(@data.map(&:nil?))
    elsif @data.respond_to?(:isnan)
      @data.isnan
    else
      Numo::Bit.new(size).fill(0)
    end

  Vector.new(bit)
end
numeric?() click to toggle source
# File lib/rover/vector.rb, line 47
def numeric?
  ![:object, :bool].include?(type)
end
one_hot(drop: false, prefix: nil) click to toggle source
# File lib/rover/vector.rb, line 301
def one_hot(drop: false, prefix: nil)
  raise ArgumentError, "All elements must be strings" unless all? { |vi| vi.is_a?(String) }

  new_vectors = {}
  # maybe sort values first
  values = uniq.to_a
  values.shift if drop
  values.each do |v2|
    # TODO use types
    new_vectors["#{prefix}#{v2}"] = (self == v2).to_numo.cast_to(Numo::Int64)
  end
  DataFrame.new(new_vectors)
end
percentile(q) click to toggle source
# File lib/rover/vector.rb, line 227
def percentile(q)
  @data.percentile(q)
end
reject(&block) click to toggle source
# File lib/rover/vector.rb, line 178
def reject(&block)
  Vector.new(@data.to_a.reject(&block))
end
select(&block) click to toggle source
# File lib/rover/vector.rb, line 174
def select(&block)
  Vector.new(@data.to_a.select(&block))
end
size() click to toggle source
# File lib/rover/vector.rb, line 51
def size
  @data.size
end
Also aliased as: length, count
sort() click to toggle source
# File lib/rover/vector.rb, line 191
def sort
  Vector.new(@data.respond_to?(:sort) ? @data.sort : @data.to_a.sort)
end
std() click to toggle source

uses Bessel's correction for now since that's all Numo supports

# File lib/rover/vector.rb, line 236
def std
  @data.cast_to(Numo::DFloat).stddev
end
sum() click to toggle source
# File lib/rover/vector.rb, line 231
def sum
  @data.sum
end
tail(n = 5) click to toggle source
# File lib/rover/vector.rb, line 296
def tail(n = 5)
  n += size if n < 0
  last(n)
end
take(n) click to toggle source
# File lib/rover/vector.rb, line 273
def take(n)
  raise ArgumentError, "attempt to take negative size" if n < 0
  first(n)
end
tally() click to toggle source
# File lib/rover/vector.rb, line 182
def tally
  result = Hash.new(0)
  @data.each do |v|
    result[v] += 1
  end
  result.default = nil
  result
end
to(type) click to toggle source
# File lib/rover/vector.rb, line 33
def to(type)
  Vector.new(self, type: type)
end
to_a() click to toggle source
# File lib/rover/vector.rb, line 41
def to_a
  a = @data.to_a
  a.map! { |v| !v.zero? } if @data.is_a?(Numo::Bit)
  a
end
to_html() click to toggle source

for IRuby

# File lib/rover/vector.rb, line 324
def to_html
  require "iruby"
  if size > 7
    # pass 8 rows so maxrows is applied
    IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
  else
    IRuby::HTML.table(to_a)
  end
end
to_numo() click to toggle source
# File lib/rover/vector.rb, line 37
def to_numo
  @data
end
to_s()
Alias for: inspect
type() click to toggle source
# File lib/rover/vector.rb, line 29
def type
  TYPE_CAST_MAPPING.find { |_, v| @data.is_a?(v) }[0]
end
uniq() click to toggle source
# File lib/rover/vector.rb, line 57
def uniq
  Vector.new(to_a.uniq)
end
var() click to toggle source

uses Bessel's correction for now since that's all Numo supports

# File lib/rover/vector.rb, line 241
def var
  @data.cast_to(Numo::DFloat).var
end
zip(other, &block) click to toggle source
# File lib/rover/vector.rb, line 253
def zip(other, &block)
  to_a.zip(other.to_a, &block)
end

Private Instance Methods

cast_data(data, type: nil) click to toggle source
# File lib/rover/vector.rb, line 336
def cast_data(data, type: nil)
  numo_type = numo_type(type) if type

  data = data.to_numo if data.is_a?(Vector)

  if data.is_a?(Numo::NArray)
    raise ArgumentError, "Complex types not supported yet" if data.is_a?(Numo::DComplex) || data.is_a?(Numo::SComplex)

    if type
      case type
      when /int/
        # Numo does not check these when casting
        raise RangeError, "float NaN out of range of integer" if data.respond_to?(:isnan) && data.isnan.any?
        raise RangeError, "float Inf out of range of integer" if data.respond_to?(:isinf) && data.isinf.any?

        data = data.to_a.map { |v| v.nil? ? nil : v.to_i } if data.is_a?(Numo::RObject)
      when /float/
        data = data.to_a.map { |v| v.nil? ? Float::NAN : v.to_f } if data.is_a?(Numo::RObject)
      end

      data = numo_type.cast(data)
    end
  else
    data = data.to_a

    if type
      data = numo_type.cast(data)
    else
      data =
        if data.all? { |v| v.is_a?(Integer) }
          Numo::Int64.cast(data)
        elsif data.all? { |v| v.is_a?(Numeric) || v.nil? }
          Numo::DFloat.cast(data.map { |v| v || Float::NAN })
        elsif data.all? { |v| v == true || v == false }
          Numo::Bit.cast(data)
        else
          Numo::RObject.cast(data)
        end
    end
  end

  data
end
numo_type(type) click to toggle source
# File lib/rover/vector.rb, line 380
def numo_type(type)
  numo_type = TYPE_CAST_MAPPING[type]
  raise ArgumentError, "Invalid type: #{type}" unless numo_type
  numo_type
end