class ComputeUnit::AmdGpu

Constants

MAKE
SUBTYPE
SYS_DEBUG_PATH
VENDOR_ID

Public Class Methods

create_from_path(device_path, index, use_opencl = false) click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 311
def self.create_from_path(device_path, index, use_opencl = false)
  opts = {
    device_class_id: device_class(device_path),
    device_id: device(device_path),
    device_vendor_id: device_vendor(device_path),
    subsystem_vendor_id: subsystem_vendor(device_path),
    subsystem_device_id: subsystem_device(device_path),
    use_opencl: use_opencl,
    index: index
  }
  new(device_path, opts)
end
devices() click to toggle source

@return [Array] - returns a list of device paths of all devices specific to the vendor id

# File lib/compute_unit/gpus/amd_gpu.rb, line 74
def self.devices
  ComputeUnit::Gpu.devices.find_all { |f| device_vendor(f) == VENDOR_ID }
end
find_all(use_opencl = false) click to toggle source

@return [Array] - returns and array of gpu instances of AMD type only

# File lib/compute_unit/gpus/amd_gpu.rb, line 325
def self.find_all(use_opencl = false)
  devices.map.with_index do |device_path, _index|
    found_index = ComputeUnit::Gpu.found_devices.index(device_path)
    create_from_path(device_path, found_index, use_opencl)
  end
end
new(device_path, opts = {}) click to toggle source
Calls superclass method ComputeUnit::Gpu::new
# File lib/compute_unit/gpus/amd_gpu.rb, line 14
def initialize(device_path, opts = {})
  super(device_path, opts)
  @pci_loc = File.basename(device_path)
  @model = opts[:model] if opts[:use_opencl]

  @uuid = "GPU#{index}"
end

Public Instance Methods

amdgpu_pm_info() click to toggle source

@returns [Array] - list of pm info {:mclk=>{:value=>“1950”, :unit=>“MHz”}, :sclk=>{:value=>“1125”, :unit=>“MHz”},

:vddgfx=>{:value=>"950", :unit=>"mV"},
:vddc=>{:value=>"61.49", :unit=>"W"},
:vddci=>{:value=>"1.0", :unit=>"W"},
:max_gpu=>{:value=>"81.243", :unit=>"W"},
:average_gpu=>{:value=>"82.117", :unit=>"W"},
:temperature=>{:value=>"41", :unit=>"C"},
:load=>{:value=>"100", :unit=>"%"}}
# File lib/compute_unit/gpus/amd_gpu.rb, line 365
def amdgpu_pm_info
  @amdgpu_pm_info ||= begin
    content = read_dri_debug_file('amdgpu_pm_info')
    data = content.scan(/(\d+\.?\d*)\s+(\w*)\s\(([\w\s]*)\)?/) + content.scan(/(\w*):\s(\d+)\s(.*)/).map(&:rotate)
    data_hash = {}
    data.each do |value, unit, name|
      data_hash[name.gsub(/\s/, '_').downcase.to_sym] = { value: value, unit: unit }
    end
    data_hash
  end
end
asic_temp() click to toggle source

@return [Integer] - the temperature of the asic chip

# File lib/compute_unit/gpus/amd_gpu.rb, line 94
def asic_temp
  read_hwmon_data('temp2_input', 0).to_i / 1000
end
bios() click to toggle source

@return [String] - the name of the bios which is unique for every card

# File lib/compute_unit/gpus/amd_gpu.rb, line 203
def bios
  @bios ||= begin
    a = read_kernel_setting('vbios_version', 'unreadable').upcase
    b = rom_bios
    /\d{3}-/.match?(b) ? b : a
  end
end
board_name() click to toggle source

@return [String] - returns the name of compute board for vegas we have to also get the compute units

# File lib/compute_unit/gpus/amd_gpu.rb, line 64
def board_name
  @board_name ||= begin
    return nil unless opencl_board_name

    name = opencl_board_name.sub(/Series|\(TM\)/, '').sub('Graphics', '').sub(/\s{2}/, ' ').strip
    /vega/i.match?(name) ? "#{name} #{opencl_units}" : name
  end
end
clock_limits() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 156
def clock_limits
  read_kernel_setting('pp_od_clk_limits', '')
end
clock_max_defaults() click to toggle source

@return [Array] reading from file “Sclk Limit: 2000 Mhz”, “Mclk Limit: 2250 Mhz” @example [2000, 2250]

# File lib/compute_unit/gpus/amd_gpu.rb, line 400
def clock_max_defaults
  read_kernel_setting('pp_od_clk_limits', '0 0').scan(/\d+/).map(&:to_i)
end
configured_core_voltage() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 118
def configured_core_voltage
  vddc
end
core_clock() click to toggle source

@return [Integer] - the core clock speed

# File lib/compute_unit/gpus/amd_gpu.rb, line 130
def core_clock
  data = read_kernel_setting('pp_dpm_sclk', '').split("\n")
  item = data.find { |d| d.include?('*') }
  item.nil? ? item : item.match(/\d{2,6}/).to_a.first.to_i
end
core_voltage() click to toggle source

@return [Numeric] - returns voltage of core in mV

# File lib/compute_unit/gpus/amd_gpu.rb, line 114
def core_voltage
  dpm_core_vddc.zero? ? vddgfx.to_i : dpm_core_vddc
end
debug_dri_dir() click to toggle source

@return [String] - returns the path the debug dri directory ie. “/sys/kernel/debug/dri/0”

# File lib/compute_unit/gpus/amd_gpu.rb, line 345
def debug_dri_dir
  @debug_dri_dir ||= begin
    # if the user does not have permission the path will be nil
    path = Dir.glob(File.join(SYS_DEBUG_PATH, '*', 'name')).find { |file| File.read(file).include?(pci_loc) }
    raise Errno::EACCES.new("Permission denied #{SYS_DEBUG_PATH}") unless path

    File.dirname(path)
  end
end
debug_rom_path() click to toggle source

@return [String] - the path to the readonly rom file

# File lib/compute_unit/gpus/amd_gpu.rb, line 50
def debug_rom_path
  @rom_path ||= File.join(SYS_DEBUG_PATH, index.to_s, 'amdgpu_vbios')
end
dpm_core_vddc() click to toggle source

currently running gpu core voltage

# File lib/compute_unit/gpus/amd_gpu.rb, line 148
def dpm_core_vddc
  read_kernel_setting('pp_core_vddc', 0).to_i
end
dpm_force_performance() click to toggle source

@return [String] - reads the setting after writing the setting and returns current value

# File lib/compute_unit/gpus/amd_gpu.rb, line 378
def dpm_force_performance
  read_kernel_setting('power_dpm_force_performance_level', nil)
end
dpm_force_performance_setting(setting = 'manual') click to toggle source

@param setting [String] - the dpm performance setting to adjust the dpm (manual or auto) @return [String] - reads the setting after writing the setting and returns current value

# File lib/compute_unit/gpus/amd_gpu.rb, line 384
def dpm_force_performance_setting(setting = 'manual')
  raise ArgumentError.new('setting must be one of manual or auto') unless setting =~ /manual|auto/

  write_kernel_setting('power_dpm_force_performance_level', "#{setting}\n")
end
fan() click to toggle source

@return [Integer] - returns fan rpm speed, 0 if cannot be found

# File lib/compute_unit/gpus/amd_gpu.rb, line 109
def fan
  read_hwmon_data('fan1_input', 0).to_i
end
fan_limit() click to toggle source

@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255

# File lib/compute_unit/gpus/amd_gpu.rb, line 258
def fan_limit
  cur = read_hwmon_data('pwm1', 0).to_i
  return cur unless cur > 0

  ((cur / 255.0) * 100).round(0)
end
fan_max_limit() click to toggle source

@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255

# File lib/compute_unit/gpus/amd_gpu.rb, line 267
def fan_max_limit
  cur = read_hwmon_data('pwm1_max', 0).to_i
  return cur unless cur > 0

  ((cur / 255.0) * 100).round(0)
end
fan_min_limit() click to toggle source

@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255

# File lib/compute_unit/gpus/amd_gpu.rb, line 276
def fan_min_limit
  cur = read_hwmon_data('pwm1_min', 0).to_i
  return cur unless cur > 0

  ((cur / 255.0) * 100).round(0)
end
gpu_defaults() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 160
def gpu_defaults
  read_kernel_setting('gpu_defaults', '')
end
load() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 89
def load
  utilization
end
max_core_clock() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 405
def max_core_clock
  clock_max_defaults.first
end
max_mem_clock() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 415
def max_mem_clock
  clock_max_defaults.last # or vddci_voltage_table.last[:clk]
end
max_mem_volt() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 425
def max_mem_volt
  vddci_voltage_table.last[:volt]
end
mem_temp() click to toggle source

@return [Integer] - temperature of the memory

# File lib/compute_unit/gpus/amd_gpu.rb, line 99
def mem_temp
  read_hwmon_data('temp3_input', 0).to_i / 1000
end
memory_clock() click to toggle source

@return [Integer] - the memory speed

# File lib/compute_unit/gpus/amd_gpu.rb, line 123
def memory_clock
  data = read_kernel_setting('pp_dpm_mclk', '').split("\n")
  item = data.find { |d| d.include?('*') }
  item.nil? ? item : item.match(/\d{2,6}/).to_a.first.to_i
end
memory_free() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 301
def memory_free
  0
end
memory_total() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 293
def memory_total
  0
end
memory_used() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 297
def memory_used
  0
end
meta() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 22
def meta
  {}
end
min_core_clock() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 410
def min_core_clock
  voltage_table[0][:clk]
end
min_mem_clock() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 420
def min_mem_clock
  vddci_voltage_table.first[:clk]
end
min_mem_volt() click to toggle source

@return [Integer]

# File lib/compute_unit/gpus/amd_gpu.rb, line 430
def min_mem_volt
  vddci_voltage_table.first[:volt]
end
model() click to toggle source

@return [String] - the name of the device model (specific name)

# File lib/compute_unit/gpus/amd_gpu.rb, line 83
def model
  @model ||= begin
    board_name || sysfs_model_name
  end
end
name() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 78
def name
  model
end
power() click to toggle source

@return [Float] the power being used by the gpu

# File lib/compute_unit/gpus/amd_gpu.rb, line 196
def power
  pp_value = read_kernel_setting('pp_power_usage', 0).to_i
  value = pp_value > 0 ? pp_value : power_average
  value + power_offset
end
power_average() click to toggle source

@return [Float] the average power being used by the gpu

# File lib/compute_unit/gpus/amd_gpu.rb, line 190
def power_average
  # TODO: if a gpu crashes the average power can sometimes take 3000 ms to read!
  read_hwmon_data('power1_average', 0).to_i / 1000000
end
power_limit() click to toggle source

@return [Numeric] - current power limit

# File lib/compute_unit/gpus/amd_gpu.rb, line 284
def power_limit
  read_hwmon_data('power1_cap', 0).to_i / 1000000
end
power_limit=(value) click to toggle source

@param value [Numeric] - the power limit that should be applied to the gpu @return [Numeric] - original passed in value after being set

# File lib/compute_unit/gpus/amd_gpu.rb, line 222
def power_limit=(value)
  max = power_max_limit
  raise ArgumentError.new("Power Value #{value} cannot exceed #{max}") if value > max
  raise ArgumentError.new("Value must be between 10 and #{max}") if value < 10

  # hwmon expects the value to have 6 zeros
  write_hwmon_data('power1_cap', value * 1000000)
  # logger.info("GPU#{index} power set to #{value} Watts")
end
power_max_limit() click to toggle source

@return [Numeric] - the maximum power that can be set

# File lib/compute_unit/gpus/amd_gpu.rb, line 289
def power_max_limit
  read_hwmon_data('power1_cap_max').to_i / 1000000
end
pstate() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 211
def pstate
  -1
end
read_dri_debug_file(file_name, default = '') click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 332
def read_dri_debug_file(file_name, default = '')
  File.read(File.join(debug_dri_dir, file_name))
rescue Errno::EINVAL
  default
rescue Errno::ENOENT
  default
rescue Errno::EACCES
  logger.debug('run this command as root or with sudo, using default values')
  default
end
read_rom_data() click to toggle source

@return [String::IO] - the contents of the rom file

# File lib/compute_unit/gpus/amd_gpu.rb, line 39
def read_rom_data
  if File.exist?(debug_rom_path)
    IO.read(debug_rom_path, mode: 'rb')
  elsif File.exist?(rom_path)
    rom_data
  else
    ''
  end
end
reset_to_defaults() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 390
def reset_to_defaults
  dpm_force_performance_setting('auto')
  write_kernel_setting('pp_od_clk_voltage', 'r')
  write_kernel_setting('pp_od_clk_voltage', 'c')
  write_hwmon_data('pwm1_enable', '2')
end
rom_bios() click to toggle source

@return [String] - the bios according to the vbios rom sometimes the kernel / driver extracted rom can be incorrect this is the bios gathered from the vbios itself.

# File lib/compute_unit/gpus/amd_gpu.rb, line 29
def rom_bios
  if !/\d{3}-/.match?(rom_metadata[2])
    logger.warn("Invalid rom bios name for GPU#{index} using alternate name for #{rom_metadata[3]}")
    rom_metadata[3]
  elsif /\d{3}-/.match?(rom_metadata[2])
    rom_metadata[2]
  end
end
rom_metadata() click to toggle source

@return [Array] - an array of readable strings from the rom file

# File lib/compute_unit/gpus/amd_gpu.rb, line 55
def rom_metadata
  @rom_metadata || begin
    printable_chars = %r{[A-Za-z0-9`~!@#%^&*()-_=+|'";:/?.>,< \t\$\{\}\[\]\\]{10,}}
    read_rom_data.scan(printable_chars)[0..9]
  end
end
serial() click to toggle source

@return [String] - the serial number of the card

# File lib/compute_unit/gpus/amd_gpu.rb, line 216
def serial
  'unknown'
end
set_fan_limit(value, type = 'current') click to toggle source

@param value [Numeric] - the fan limit that should be applied to the gpu as a percentage @return [Numeric] - original passed in value after being set

# File lib/compute_unit/gpus/amd_gpu.rb, line 234
def set_fan_limit(value, type = 'current')
  write_hwmon_data('fan1_enable', '1')
  hwmon_file = if type == 'min'
                 'pwm1_min'
               elsif type == 'max'
                 'pwm1_max'
               elsif type == 'current'
                 'pwm1'
               else
                 raise ArgumentError.new("Invalid fan setting type, must be one of 'current, min or max'")
               end
  raise ArgumentError.new('Fan limit cannot exceed 100') if value > 100
  raise ArgumentError.new('Fan limit value must be between 20 and 100') if value < 20

  # Value must be between 0-255
  amount = (255 * (value / 100.0)).round
  logger.debug("Setting #{type} Fan on GPU#{index} to #{amount}")
  write_hwmon_data(hwmon_file, amount)
  logger.info("GPU#{index} #{type} fan set to #{value} percent")
  value
end
set_mem_clock_and_vddc(mem_clock, mem_volt) click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 434
def set_mem_clock_and_vddc(mem_clock, mem_volt)
  return unless experimental_on?

  mem_clock = mem_clock.to_i
  mem_volt = mem_volt.to_i
  # TODO: find max and min values and limit input
  dpm_force_performance_setting('manual')
  raise ArgumentError.new("MemClock value #{mem_clock} must be between #{min_mem_clock}-#{max_mem_clock}") unless mem_clock.between?(min_mem_clock, max_mem_clock)
  raise ArgumentError.new("MemVolt value #{mem_volt} must be between #{min_mem_volt}-#{max_mem_volt}") unless mem_volt.between?(min_mem_volt, max_mem_volt)

  write_kernel_setting('pp_od_clk_voltage', "r\n") # unlocks in order to write
  # set row in table (m = manual), 3 = row,
  write_kernel_setting('pp_od_clk_voltage', "m 3 #{mem_clock} #{mem_volt}\n")
  write_kernel_setting('pp_od_clk_voltage', "c\n") # locks file
  write_kernel_setting('pp_mclk_od', "3\n")
  logger.info("Successfully applied overclock #{mem_clock} #{mem_volt} to #{name} at #{pci_loc}")
end
subtype() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 152
def subtype
  SUBTYPE
end
temp() click to toggle source

@return [Integer] - returns temp of gpu in celius

# File lib/compute_unit/gpus/amd_gpu.rb, line 104
def temp
  read_hwmon_data('temp1_input', 0).to_i / 1000
end
utilization() click to toggle source
# File lib/compute_unit/gpus/amd_gpu.rb, line 305
def utilization
  return 0 unless amdgpu_pm_info[:load]

  amdgpu_pm_info[:load][:value].to_i || 0
end
vddc() click to toggle source

currently configured gpu core voltage @return [Numeric] - returns voltage of core in mV

# File lib/compute_unit/gpus/amd_gpu.rb, line 143
def vddc
  read_kernel_setting('pp_voltage', 0).to_i
end
vddci_voltage_table() click to toggle source

@return [Array] - array of hashes of voltages {:pstate=>0, :mclk=>300, :volt=>750}

# File lib/compute_unit/gpus/amd_gpu.rb, line 177
def vddci_voltage_table
  # not sure if this is what mclk is but left it here anyways
  data = read_kernel_setting('pp_od_clk_voltage', nil)
  return data if data.nil?

  _, _, mclk = data.split(/OD_[S,M]CLK:\s?\n/)
  mclk.split("\n").map do |line|
    pstate, clk, volt, = line.gsub(/:|Mhz|mV/, '').split(/\s{2,}/).map(&:to_i)
    { pstate: pstate, clk: clk, volt: volt, type: :mclk }
  end
end
vddgfx() click to toggle source

@return [Integer] - the core voltage reading of the GPU via HWMON

# File lib/compute_unit/gpus/amd_gpu.rb, line 137
def vddgfx
  read_hwmon_data('in0_input', 0).to_i
end
voltage_table() click to toggle source

@return [Array] - array of hashes of voltages {:pstate=>0, :sclk=>300, :volt=>750}

# File lib/compute_unit/gpus/amd_gpu.rb, line 165
def voltage_table
  data = read_kernel_setting('pp_od_clk_voltage', nil)
  return [] if data.nil?

  _, sclk, = data.split(/OD_[S,M]CLK:\s?\n/)
  sclk.split("\n").map do |line|
    pstate, clk, volt, = line.gsub(/:|Mhz|mV/, '').split(/\s{2,}/).map(&:to_i)
    { pstate: pstate, clk: clk, volt: volt, type: :sclk }
  end
end