class ComputeUnit::Gpu
Constants
- DEVICE_CLASS
- DEVICE_CLASS_NAME
Attributes
Public Class Methods
@summary Finds all cpu attached processes and sorts by pctcpu @param filter [Regex] - if supplied filter out devices from fd list @param field [Symbol] - the field to sort by @return [Array] - an array of attached processes
# File lib/compute_unit/gpu.rb, line 20 def self.attached_processes(field = :pctcpu, filter = %r{/dev/dri|nvidia\d+}) filter ||= %r{/dev/dri|nvidia\d+} # looks for any fd device with dri or nvidia in the name p = Sys::ProcTable.ps(smaps: false).find_all do |p| p.fd.values.find { |f| f =~ filter } end p.sort_by(&field) end
@return [Array] - returns a list of device paths of all devices considered for display @note the devices are sorted by the device path @note this can mean AMD, NVIDIA, Intel or other crappy embedded devices
# File lib/compute_unit/gpu.rb, line 55 def self.devices @devices ||= ComputeUnit::ComputeBase.devices.find_all do |device| ComputeUnit::Device.device_class(device) == DEVICE_CLASS end.sort end
@return [Array] - returns an array of gpu objects, sorted by index
# File lib/compute_unit/gpu.rb, line 278 def self.find_all(use_opencl = false) require 'compute_unit/gpus/amd_gpu' require 'compute_unit/gpus/nvidia_gpu' g = compute_classes.map { |klass| klass.find_all(use_opencl) }.flatten g.sort_by(&:index) end
@return [Array] - array of devices paths either from amd or nvidia
# File lib/compute_unit/gpu.rb, line 335 def self.found_devices @found_devices ||= ComputeUnit::AmdGpu.devices + ComputeUnit::NvidiaGpu.devices end
@param device_path [String] - that pci bus path to the device @param opts [Hash] @option bios [String] the bios id @option model [String] the model name @option serial [String] the serial id of the device @option busid [String] the pci bus path of the device @option meta [Hash] metadata about the device @option index [Integer] the index of the device found in the device tree @option uuid [String] the uuid of the device @option use_opencl
[Boolean] set to true if you want to get info about the device from opencl, defaults to false
ComputeUnit::ComputeBase::new
# File lib/compute_unit/gpu.rb, line 71 def initialize(device_path, opts = {}) super(device_path, opts) @type = :GPU @bios = opts[:bios].upcase if opts[:bios] @model = opts[:model] @serial = opts[:serial] @pci_loc = opts[:busid] @meta = opts[:meta] @index = opts[:index].to_i @uuid = opts[:uuid] || opts[:serial] @name = model @power_offset = 0 @use_opencl = opts[:use_opencl] || false end
@return [CacheStore] - returns an instance of the cachestore for storign opencl cache
# File lib/compute_unit/gpu.rb, line 286 def self.opencl_cache @opencl_cache ||= ComputeUnit::CacheStore.new('opencl_cache') end
@return [Array] - returns an array of opencl devices overwrites cache if new devices are found OpenCL should only be used when necessary as it can freeze sometimes OpenCL indexes items differently
# File lib/compute_unit/gpu.rb, line 343 def self.opencl_devices @opencl_devices ||= opencl_devices_from_cache || begin items = opencl_devices_from_platform opencl_cache.write_cache('opencl_compute_units', ComputeUnit::Device.system_checksum.to_s => items) items end end
@return [Array] - array of openstruct or nil
# File lib/compute_unit/gpu.rb, line 291 def self.opencl_devices_from_cache data = opencl_cache.read_cache('opencl_compute_units', {}) data[ComputeUnit::Device.system_checksum] end
@returns [Array] - an array of openstruct objects
# File lib/compute_unit/gpu.rb, line 297 def self.opencl_devices_from_platform require 'ostruct' # opencl takes a second to load so we cache later in the process # which is why we need the openstruct object here # opencl can also freeze the system if it tries to enumerate a dead GPU # opencl sould be used sparingly as a result and only read when absolutely # neccessary and no dead GPUs. # TODO: warn when dead gpus detected begin require 'opencl_ruby_ffi' ComputeUnit::Logger.logger.debug('Searching for openCL devices') OpenCL.platforms.map(&:devices).flatten.map do |d| type = d.platform.name.include?('AMD') ? 'AMD' : 'Nvidia' board_name = type == 'AMD' ? d.board_name_amd : '' max_computes = d.respond_to?(:max_compute_units) ? d.max_compute_units : 0 OpenStruct.new( name: d.name, type: type, board_name: board_name, max_compute_units: max_computes ) end rescue OpenCL::Error::DEVICE_NOT_FOUND => e ComputeUnit::Logger.logger.debug("OpenCL error: #{e.message}, are you root?") [] rescue RuntimeError => e # OpenCL::Error::PLATFORM_NOT_FOUND_KHR, ComputeUnit::Logger.logger.debug("OpenCL error: #{e.message}") ComputeUnit::Logger.logger.debug("OpenCL error: #{e.backtrace}") [] end end
Public Instance Methods
@return [Integer] - the temperature of the asic chip
# File lib/compute_unit/gpu.rb, line 218 def asic_temp 0 end
# File lib/compute_unit/gpu.rb, line 12 def compute_type type end
@return [Numeric] - returns voltage of core in mV
# File lib/compute_unit/gpu.rb, line 165 def configured_core_voltage 0 end
@return [Integer] - the core clock speed
# File lib/compute_unit/gpu.rb, line 155 def core_clock 0 end
@return [Numeric] - returns voltage of core in mV
# File lib/compute_unit/gpu.rb, line 160 def core_voltage 0 end
# File lib/compute_unit/gpu.rb, line 86 def fan raise NotImplementedError end
@return [Integer] - a percentage value of the current fan limit
# File lib/compute_unit/gpu.rb, line 106 def fan_limit fan end
@return [Integer] - a percentage value of the max fan limit
# File lib/compute_unit/gpu.rb, line 116 def fan_max_limit nil end
@return [Integer] - a percentage value of the min fan limit
# File lib/compute_unit/gpu.rb, line 111 def fan_min_limit nil end
@return [Hash] - hash of information about the gpu data
# File lib/compute_unit/gpu.rb, line 202 def hardware_info { uuid: uuid, gpuId: "GPU#{index}", syspath: device_path, pciLoc: pci_loc, name: name, bios: bios, subType: subtype, make: make, model: model, vendor: vendor } end
# File lib/compute_unit/gpu.rb, line 169 def mem_info { index: "#{device_class_name}#{index}", name: name, volt: memory_volt, clock: memory_clock, memory_name: nil, memory_type: nil, memory_used: memory_used, memory_free: memory_free, memory_total: memory_total, mem_temp: mem_temp } end
@return [Integer] - temperature of the memory
# File lib/compute_unit/gpu.rb, line 223 def mem_temp 0 end
@return [Integer] - the memory speed
# File lib/compute_unit/gpu.rb, line 145 def memory_clock 0 end
# File lib/compute_unit/gpu.rb, line 136 def memory_free raise NotImplementedError end
# File lib/compute_unit/gpu.rb, line 128 def memory_total raise NotImplementedError end
# File lib/compute_unit/gpu.rb, line 132 def memory_used raise NotImplementedError end
@return [Integer] - the memory speed
# File lib/compute_unit/gpu.rb, line 150 def memory_volt 0 end
@return [String] - returns the raw data of the board name from opencl, return nil if no device
# File lib/compute_unit/gpu.rb, line 35 def opencl_board_name @opencl_board_name ||= opencl_device&.board_name if use_opencl end
@return [OpenCL_Device]
# File lib/compute_unit/gpu.rb, line 30 def opencl_device @opencl_device ||= self.class.opencl_devices.find_all { |cu| cu[:type] == make }[index] if use_opencl end
@return [String] - the device name ie. GeForce GTX 1070 or RX 580 @note not really needed for Nvidia types since nvidia-smi returns really complete information
# File lib/compute_unit/gpu.rb, line 48 def opencl_name @opencl_name ||= opencl_device.name if use_opencl end
@return [Integer] - returns the number of compute units decteded by opencl
not to be confused with stream processors. Can be helpful when determining which product vega56 or vega64
# File lib/compute_unit/gpu.rb, line 41 def opencl_units @opencl_units ||= opencl_device.max_compute_units.to_i if use_opencl end
# File lib/compute_unit/gpu.rb, line 97 def power raise NotImplementedError end
# File lib/compute_unit/gpu.rb, line 124 def power_max_limit raise NotImplementedError end
# File lib/compute_unit/gpu.rb, line 101 def pstate raise NotImplementedError end
# File lib/compute_unit/gpu.rb, line 90 def status return 0 if utilization > 20 && power >= 50 return 2 if power < 20 1 end
@return [Hash] - hash of hardware status about the gpu
# File lib/compute_unit/gpu.rb, line 185 def status_info { index: "#{device_class_name}#{index}", name: name, bios: bios, core_clock: core_clock, memory_clock: memory_clock, power: power, fan: fan, core_volt: core_voltage, temp: temp, mem_temp: mem_temp, status: status } end
# File lib/compute_unit/gpu.rb, line 232 def temp 0 end
# File lib/compute_unit/gpu.rb, line 236 def to_h { uuid: uuid, gpuId: "GPU#{index}", syspath: device_path, pciLoc: pci_loc, name: name, bios: bios, subType: subtype, make: make, model: model, vendor: vendor, # memory_name: nil, # memory_type: nil, # gpu_platform: nil, power: power, # power_limit: power_limit, # power_max_limit: power_max_limit, utilization: utilization, # memory_used: memory_used , # memory_free: memory_free, # memory_total: memory_total, temperature: temp, status: status, pstate: pstate, fanSpeed: fan, type: compute_type, maxTemp: nil, mem: memory_clock, cor: core_clock, vlt: core_voltage, mem_temp: mem_temp, maxFan: nil, dpm: nil, vddci: nil, maxPower: nil, ocProfile: nil, opencl_enabled: use_opencl } end
# File lib/compute_unit/gpu.rb, line 140 def utilization raise NotImplementedError end
@return [Integer] - the voltage reading of the card, maybe just amd cards (mV)
# File lib/compute_unit/gpu.rb, line 228 def vddgfx 0 end
@return [Hash] - a hash of voltages per the voltage table, nil if no table available
# File lib/compute_unit/gpu.rb, line 330 def voltage_table [] end