class ComputeUnit::Gpu

Constants

DEVICE_CLASS
DEVICE_CLASS_NAME

Attributes

bios[R]
name[R]
pci_loc[R]
power_limit[RW]
use_opencl[RW]

Public Class Methods

attached_processes(field = :pctcpu, filter = %r{/dev/dri|nvidia\d+}) click to toggle source

@summary Finds all cpu attached processes and sorts by pctcpu @param filter [Regex] - if supplied filter out devices from fd list @param field [Symbol] - the field to sort by @return [Array] - an array of attached processes

# File lib/compute_unit/gpu.rb, line 20
def self.attached_processes(field = :pctcpu, filter = %r{/dev/dri|nvidia\d+})
  filter ||= %r{/dev/dri|nvidia\d+}
  # looks for any fd device with dri or nvidia in the name
  p = Sys::ProcTable.ps(smaps: false).find_all do |p|
    p.fd.values.find { |f| f =~ filter }
  end
  p.sort_by(&field)
end
devices() click to toggle source

@return [Array] - returns a list of device paths of all devices considered for display @note the devices are sorted by the device path @note this can mean AMD, NVIDIA, Intel or other crappy embedded devices

# File lib/compute_unit/gpu.rb, line 55
def self.devices
  @devices ||= ComputeUnit::ComputeBase.devices.find_all do |device|
    ComputeUnit::Device.device_class(device) == DEVICE_CLASS
  end.sort
end
find_all(use_opencl = false) click to toggle source

@return [Array] - returns an array of gpu objects, sorted by index

# File lib/compute_unit/gpu.rb, line 278
def self.find_all(use_opencl = false)
  require 'compute_unit/gpus/amd_gpu'
  require 'compute_unit/gpus/nvidia_gpu'
  g = compute_classes.map { |klass| klass.find_all(use_opencl) }.flatten
  g.sort_by(&:index)
end
found_devices() click to toggle source

@return [Array] - array of devices paths either from amd or nvidia

# File lib/compute_unit/gpu.rb, line 335
def self.found_devices
  @found_devices ||= ComputeUnit::AmdGpu.devices + ComputeUnit::NvidiaGpu.devices
end
new(device_path, opts = {}) click to toggle source

@param device_path [String] - that pci bus path to the device @param opts [Hash] @option bios [String] the bios id @option model [String] the model name @option serial [String] the serial id of the device @option busid [String] the pci bus path of the device @option meta [Hash] metadata about the device @option index [Integer] the index of the device found in the device tree @option uuid [String] the uuid of the device @option use_opencl [Boolean] set to true if you want to get info about the device from opencl, defaults to false

Calls superclass method ComputeUnit::ComputeBase::new
# File lib/compute_unit/gpu.rb, line 71
def initialize(device_path, opts = {})
  super(device_path, opts)
  @type = :GPU
  @bios = opts[:bios].upcase if opts[:bios]
  @model = opts[:model]
  @serial = opts[:serial]
  @pci_loc = opts[:busid]
  @meta = opts[:meta]
  @index = opts[:index].to_i
  @uuid = opts[:uuid] || opts[:serial]
  @name = model
  @power_offset = 0
  @use_opencl = opts[:use_opencl] || false
end
opencl_cache() click to toggle source

@return [CacheStore] - returns an instance of the cachestore for storign opencl cache

# File lib/compute_unit/gpu.rb, line 286
def self.opencl_cache
  @opencl_cache ||= ComputeUnit::CacheStore.new('opencl_cache')
end
opencl_devices() click to toggle source

@return [Array] - returns an array of opencl devices overwrites cache if new devices are found OpenCL should only be used when necessary as it can freeze sometimes OpenCL indexes items differently

# File lib/compute_unit/gpu.rb, line 343
def self.opencl_devices
  @opencl_devices ||= opencl_devices_from_cache || begin
    items = opencl_devices_from_platform
    opencl_cache.write_cache('opencl_compute_units', ComputeUnit::Device.system_checksum.to_s => items)
    items
  end
end
opencl_devices_from_cache() click to toggle source

@return [Array] - array of openstruct or nil

# File lib/compute_unit/gpu.rb, line 291
def self.opencl_devices_from_cache
  data = opencl_cache.read_cache('opencl_compute_units', {})
  data[ComputeUnit::Device.system_checksum]
end
opencl_devices_from_platform() click to toggle source

@returns [Array] - an array of openstruct objects

# File lib/compute_unit/gpu.rb, line 297
def self.opencl_devices_from_platform
  require 'ostruct'
  # opencl takes a second to load so we cache later in the process
  # which is why we need the openstruct object here
  # opencl can also freeze the system if it tries to enumerate a dead GPU
  # opencl sould be used sparingly as a result and only read when absolutely
  # neccessary and no dead GPUs.
  # TODO: warn when dead gpus detected
  begin
    require 'opencl_ruby_ffi'
    ComputeUnit::Logger.logger.debug('Searching for openCL devices')
    OpenCL.platforms.map(&:devices).flatten.map do |d|
      type = d.platform.name.include?('AMD') ? 'AMD' : 'Nvidia'
      board_name = type == 'AMD' ? d.board_name_amd : ''
      max_computes = d.respond_to?(:max_compute_units) ? d.max_compute_units : 0
      OpenStruct.new(
        name: d.name,
        type: type,
        board_name: board_name,
        max_compute_units: max_computes
      )
    end
  rescue OpenCL::Error::DEVICE_NOT_FOUND => e
    ComputeUnit::Logger.logger.debug("OpenCL error: #{e.message}, are you root?")
    []
  rescue RuntimeError => e # OpenCL::Error::PLATFORM_NOT_FOUND_KHR,
    ComputeUnit::Logger.logger.debug("OpenCL error: #{e.message}")
    ComputeUnit::Logger.logger.debug("OpenCL error: #{e.backtrace}")
    []
  end
end

Public Instance Methods

asic_temp() click to toggle source

@return [Integer] - the temperature of the asic chip

# File lib/compute_unit/gpu.rb, line 218
def asic_temp
  0
end
compute_type() click to toggle source
# File lib/compute_unit/gpu.rb, line 12
def compute_type
  type
end
configured_core_voltage() click to toggle source

@return [Numeric] - returns voltage of core in mV

# File lib/compute_unit/gpu.rb, line 165
def configured_core_voltage
  0
end
core_clock() click to toggle source

@return [Integer] - the core clock speed

# File lib/compute_unit/gpu.rb, line 155
def core_clock
  0
end
core_voltage() click to toggle source

@return [Numeric] - returns voltage of core in mV

# File lib/compute_unit/gpu.rb, line 160
def core_voltage
  0
end
fan() click to toggle source
# File lib/compute_unit/gpu.rb, line 86
def fan
  raise NotImplementedError
end
fan_limit() click to toggle source

@return [Integer] - a percentage value of the current fan limit

# File lib/compute_unit/gpu.rb, line 106
def fan_limit
  fan
end
fan_max_limit() click to toggle source

@return [Integer] - a percentage value of the max fan limit

# File lib/compute_unit/gpu.rb, line 116
def fan_max_limit
  nil
end
fan_min_limit() click to toggle source

@return [Integer] - a percentage value of the min fan limit

# File lib/compute_unit/gpu.rb, line 111
def fan_min_limit
  nil
end
hardware_info() click to toggle source

@return [Hash] - hash of information about the gpu data

# File lib/compute_unit/gpu.rb, line 202
def hardware_info
  {
    uuid: uuid,
    gpuId: "GPU#{index}",
    syspath: device_path,
    pciLoc: pci_loc,
    name: name,
    bios: bios,
    subType: subtype,
    make: make,
    model: model,
    vendor: vendor
  }
end
mem_info() click to toggle source
# File lib/compute_unit/gpu.rb, line 169
def mem_info
  {
    index: "#{device_class_name}#{index}",
    name: name,
    volt: memory_volt,
    clock: memory_clock,
    memory_name: nil,
    memory_type: nil,
    memory_used: memory_used,
    memory_free: memory_free,
    memory_total: memory_total,
    mem_temp: mem_temp
  }
end
mem_temp() click to toggle source

@return [Integer] - temperature of the memory

# File lib/compute_unit/gpu.rb, line 223
def mem_temp
  0
end
memory_clock() click to toggle source

@return [Integer] - the memory speed

# File lib/compute_unit/gpu.rb, line 145
def memory_clock
  0
end
memory_free() click to toggle source
# File lib/compute_unit/gpu.rb, line 136
def memory_free
  raise NotImplementedError
end
memory_total() click to toggle source
# File lib/compute_unit/gpu.rb, line 128
def memory_total
  raise NotImplementedError
end
memory_used() click to toggle source
# File lib/compute_unit/gpu.rb, line 132
def memory_used
  raise NotImplementedError
end
memory_volt() click to toggle source

@return [Integer] - the memory speed

# File lib/compute_unit/gpu.rb, line 150
def memory_volt
  0
end
opencl_board_name() click to toggle source

@return [String] - returns the raw data of the board name from opencl, return nil if no device

# File lib/compute_unit/gpu.rb, line 35
def opencl_board_name
  @opencl_board_name ||= opencl_device&.board_name if use_opencl
end
opencl_device() click to toggle source

@return [OpenCL_Device]

# File lib/compute_unit/gpu.rb, line 30
def opencl_device
  @opencl_device ||= self.class.opencl_devices.find_all { |cu| cu[:type] == make }[index] if use_opencl
end
opencl_name() click to toggle source

@return [String] - the device name ie. GeForce GTX 1070 or RX 580 @note not really needed for Nvidia types since nvidia-smi returns really complete information

# File lib/compute_unit/gpu.rb, line 48
def opencl_name
  @opencl_name ||= opencl_device.name if use_opencl
end
opencl_units() click to toggle source

@return [Integer] - returns the number of compute units decteded by opencl

not to be confused with stream processors.  Can be helpful when determining which product vega56 or vega64
# File lib/compute_unit/gpu.rb, line 41
def opencl_units
  @opencl_units ||= opencl_device.max_compute_units.to_i if use_opencl
end
power() click to toggle source
# File lib/compute_unit/gpu.rb, line 97
def power
  raise NotImplementedError
end
power_max_limit() click to toggle source
# File lib/compute_unit/gpu.rb, line 124
def power_max_limit
  raise NotImplementedError
end
pstate() click to toggle source
# File lib/compute_unit/gpu.rb, line 101
def pstate
  raise NotImplementedError
end
status() click to toggle source
# File lib/compute_unit/gpu.rb, line 90
def status
  return 0 if utilization > 20 && power >= 50
  return 2 if power < 20

  1
end
status_info() click to toggle source

@return [Hash] - hash of hardware status about the gpu

# File lib/compute_unit/gpu.rb, line 185
def status_info
  {
    index: "#{device_class_name}#{index}",
    name: name,
    bios: bios,
    core_clock: core_clock,
    memory_clock: memory_clock,
    power: power,
    fan: fan,
    core_volt: core_voltage,
    temp: temp,
    mem_temp: mem_temp,
    status: status
  }
end
temp() click to toggle source
# File lib/compute_unit/gpu.rb, line 232
def temp
  0
end
to_h() click to toggle source
# File lib/compute_unit/gpu.rb, line 236
def to_h
  {
    uuid: uuid,
    gpuId: "GPU#{index}",
    syspath: device_path,
    pciLoc: pci_loc,
    name: name,
    bios: bios,
    subType: subtype,
    make: make,
    model: model,
    vendor: vendor,
    # memory_name: nil,
    # memory_type: nil,
    # gpu_platform: nil,
    power: power,
    # power_limit: power_limit,
    # power_max_limit: power_max_limit,
    utilization: utilization,
    # memory_used: memory_used ,
    # memory_free: memory_free,
    # memory_total: memory_total,
    temperature: temp,
    status: status,
    pstate: pstate,
    fanSpeed: fan,
    type: compute_type,
    maxTemp: nil,
    mem: memory_clock,
    cor: core_clock,
    vlt: core_voltage,
    mem_temp: mem_temp,
    maxFan: nil,
    dpm: nil,
    vddci: nil,
    maxPower: nil,
    ocProfile: nil,
    opencl_enabled: use_opencl
  }
end
utilization() click to toggle source
# File lib/compute_unit/gpu.rb, line 140
def utilization
  raise NotImplementedError
end
vddgfx() click to toggle source

@return [Integer] - the voltage reading of the card, maybe just amd cards (mV)

# File lib/compute_unit/gpu.rb, line 228
def vddgfx
  0
end
voltage_table() click to toggle source

@return [Hash] - a hash of voltages per the voltage table, nil if no table available

# File lib/compute_unit/gpu.rb, line 330
def voltage_table
  []
end