class ComputeUnit::AmdGpu
Constants
- MAKE
- SUBTYPE
- SYS_DEBUG_PATH
- VENDOR_ID
Public Class Methods
# File lib/compute_unit/gpus/amd_gpu.rb, line 311 def self.create_from_path(device_path, index, use_opencl = false) opts = { device_class_id: device_class(device_path), device_id: device(device_path), device_vendor_id: device_vendor(device_path), subsystem_vendor_id: subsystem_vendor(device_path), subsystem_device_id: subsystem_device(device_path), use_opencl: use_opencl, index: index } new(device_path, opts) end
@return [Array] - returns a list of device paths of all devices specific to the vendor id
# File lib/compute_unit/gpus/amd_gpu.rb, line 74 def self.devices ComputeUnit::Gpu.devices.find_all { |f| device_vendor(f) == VENDOR_ID } end
@return [Array] - returns and array of gpu instances of AMD type only
# File lib/compute_unit/gpus/amd_gpu.rb, line 325 def self.find_all(use_opencl = false) devices.map.with_index do |device_path, _index| found_index = ComputeUnit::Gpu.found_devices.index(device_path) create_from_path(device_path, found_index, use_opencl) end end
ComputeUnit::Gpu::new
# File lib/compute_unit/gpus/amd_gpu.rb, line 14 def initialize(device_path, opts = {}) super(device_path, opts) @pci_loc = File.basename(device_path) @model = opts[:model] if opts[:use_opencl] @uuid = "GPU#{index}" end
Public Instance Methods
@returns [Array] - list of pm info {:mclk=>{:value=>“1950”, :unit=>“MHz”}, :sclk=>{:value=>“1125”, :unit=>“MHz”},
:vddgfx=>{:value=>"950", :unit=>"mV"}, :vddc=>{:value=>"61.49", :unit=>"W"}, :vddci=>{:value=>"1.0", :unit=>"W"}, :max_gpu=>{:value=>"81.243", :unit=>"W"}, :average_gpu=>{:value=>"82.117", :unit=>"W"}, :temperature=>{:value=>"41", :unit=>"C"}, :load=>{:value=>"100", :unit=>"%"}}
# File lib/compute_unit/gpus/amd_gpu.rb, line 365 def amdgpu_pm_info @amdgpu_pm_info ||= begin content = read_dri_debug_file('amdgpu_pm_info') data = content.scan(/(\d+\.?\d*)\s+(\w*)\s\(([\w\s]*)\)?/) + content.scan(/(\w*):\s(\d+)\s(.*)/).map(&:rotate) data_hash = {} data.each do |value, unit, name| data_hash[name.gsub(/\s/, '_').downcase.to_sym] = { value: value, unit: unit } end data_hash end end
@return [Integer] - the temperature of the asic chip
# File lib/compute_unit/gpus/amd_gpu.rb, line 94 def asic_temp read_hwmon_data('temp2_input', 0).to_i / 1000 end
@return [String] - the name of the bios which is unique for every card
# File lib/compute_unit/gpus/amd_gpu.rb, line 203 def bios @bios ||= begin a = read_kernel_setting('vbios_version', 'unreadable').upcase b = rom_bios /\d{3}-/.match?(b) ? b : a end end
@return [String] - returns the name of compute board for vegas we have to also get the compute units
# File lib/compute_unit/gpus/amd_gpu.rb, line 64 def board_name @board_name ||= begin return nil unless opencl_board_name name = opencl_board_name.sub(/Series|\(TM\)/, '').sub('Graphics', '').sub(/\s{2}/, ' ').strip /vega/i.match?(name) ? "#{name} #{opencl_units}" : name end end
# File lib/compute_unit/gpus/amd_gpu.rb, line 156 def clock_limits read_kernel_setting('pp_od_clk_limits', '') end
@return [Array] reading from file “Sclk Limit: 2000 Mhz”, “Mclk Limit: 2250 Mhz” @example [2000, 2250]
# File lib/compute_unit/gpus/amd_gpu.rb, line 400 def clock_max_defaults read_kernel_setting('pp_od_clk_limits', '0 0').scan(/\d+/).map(&:to_i) end
# File lib/compute_unit/gpus/amd_gpu.rb, line 118 def configured_core_voltage vddc end
@return [Integer] - the core clock speed
# File lib/compute_unit/gpus/amd_gpu.rb, line 130 def core_clock data = read_kernel_setting('pp_dpm_sclk', '').split("\n") item = data.find { |d| d.include?('*') } item.nil? ? item : item.match(/\d{2,6}/).to_a.first.to_i end
@return [Numeric] - returns voltage of core in mV
# File lib/compute_unit/gpus/amd_gpu.rb, line 114 def core_voltage dpm_core_vddc.zero? ? vddgfx.to_i : dpm_core_vddc end
@return [String] - returns the path the debug dri directory ie. “/sys/kernel/debug/dri/0”
# File lib/compute_unit/gpus/amd_gpu.rb, line 345 def debug_dri_dir @debug_dri_dir ||= begin # if the user does not have permission the path will be nil path = Dir.glob(File.join(SYS_DEBUG_PATH, '*', 'name')).find { |file| File.read(file).include?(pci_loc) } raise Errno::EACCES.new("Permission denied #{SYS_DEBUG_PATH}") unless path File.dirname(path) end end
@return [String] - the path to the readonly rom file
# File lib/compute_unit/gpus/amd_gpu.rb, line 50 def debug_rom_path @rom_path ||= File.join(SYS_DEBUG_PATH, index.to_s, 'amdgpu_vbios') end
currently running gpu core voltage
# File lib/compute_unit/gpus/amd_gpu.rb, line 148 def dpm_core_vddc read_kernel_setting('pp_core_vddc', 0).to_i end
@return [String] - reads the setting after writing the setting and returns current value
# File lib/compute_unit/gpus/amd_gpu.rb, line 378 def dpm_force_performance read_kernel_setting('power_dpm_force_performance_level', nil) end
@param setting [String] - the dpm performance setting to adjust the dpm (manual or auto) @return [String] - reads the setting after writing the setting and returns current value
# File lib/compute_unit/gpus/amd_gpu.rb, line 384 def dpm_force_performance_setting(setting = 'manual') raise ArgumentError.new('setting must be one of manual or auto') unless setting =~ /manual|auto/ write_kernel_setting('power_dpm_force_performance_level', "#{setting}\n") end
@return [Integer] - returns fan rpm speed, 0 if cannot be found
# File lib/compute_unit/gpus/amd_gpu.rb, line 109 def fan read_hwmon_data('fan1_input', 0).to_i end
@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255
# File lib/compute_unit/gpus/amd_gpu.rb, line 258 def fan_limit cur = read_hwmon_data('pwm1', 0).to_i return cur unless cur > 0 ((cur / 255.0) * 100).round(0) end
@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255
# File lib/compute_unit/gpus/amd_gpu.rb, line 267 def fan_max_limit cur = read_hwmon_data('pwm1_max', 0).to_i return cur unless cur > 0 ((cur / 255.0) * 100).round(0) end
@return [Numeric] - current fan limit as a percentage @note the OS values is between 0 - 255
# File lib/compute_unit/gpus/amd_gpu.rb, line 276 def fan_min_limit cur = read_hwmon_data('pwm1_min', 0).to_i return cur unless cur > 0 ((cur / 255.0) * 100).round(0) end
# File lib/compute_unit/gpus/amd_gpu.rb, line 160 def gpu_defaults read_kernel_setting('gpu_defaults', '') end
# File lib/compute_unit/gpus/amd_gpu.rb, line 89 def load utilization end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 405 def max_core_clock clock_max_defaults.first end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 415 def max_mem_clock clock_max_defaults.last # or vddci_voltage_table.last[:clk] end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 425 def max_mem_volt vddci_voltage_table.last[:volt] end
@return [Integer] - temperature of the memory
# File lib/compute_unit/gpus/amd_gpu.rb, line 99 def mem_temp read_hwmon_data('temp3_input', 0).to_i / 1000 end
@return [Integer] - the memory speed
# File lib/compute_unit/gpus/amd_gpu.rb, line 123 def memory_clock data = read_kernel_setting('pp_dpm_mclk', '').split("\n") item = data.find { |d| d.include?('*') } item.nil? ? item : item.match(/\d{2,6}/).to_a.first.to_i end
# File lib/compute_unit/gpus/amd_gpu.rb, line 301 def memory_free 0 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 293 def memory_total 0 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 297 def memory_used 0 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 22 def meta {} end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 410 def min_core_clock voltage_table[0][:clk] end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 420 def min_mem_clock vddci_voltage_table.first[:clk] end
@return [Integer]
# File lib/compute_unit/gpus/amd_gpu.rb, line 430 def min_mem_volt vddci_voltage_table.first[:volt] end
@return [String] - the name of the device model (specific name)
# File lib/compute_unit/gpus/amd_gpu.rb, line 83 def model @model ||= begin board_name || sysfs_model_name end end
# File lib/compute_unit/gpus/amd_gpu.rb, line 78 def name model end
@return [Float] the power being used by the gpu
# File lib/compute_unit/gpus/amd_gpu.rb, line 196 def power pp_value = read_kernel_setting('pp_power_usage', 0).to_i value = pp_value > 0 ? pp_value : power_average value + power_offset end
@return [Float] the average power being used by the gpu
# File lib/compute_unit/gpus/amd_gpu.rb, line 190 def power_average # TODO: if a gpu crashes the average power can sometimes take 3000 ms to read! read_hwmon_data('power1_average', 0).to_i / 1000000 end
@return [Numeric] - current power limit
# File lib/compute_unit/gpus/amd_gpu.rb, line 284 def power_limit read_hwmon_data('power1_cap', 0).to_i / 1000000 end
@param value [Numeric] - the power limit that should be applied to the gpu @return [Numeric] - original passed in value after being set
# File lib/compute_unit/gpus/amd_gpu.rb, line 222 def power_limit=(value) max = power_max_limit raise ArgumentError.new("Power Value #{value} cannot exceed #{max}") if value > max raise ArgumentError.new("Value must be between 10 and #{max}") if value < 10 # hwmon expects the value to have 6 zeros write_hwmon_data('power1_cap', value * 1000000) # logger.info("GPU#{index} power set to #{value} Watts") end
@return [Numeric] - the maximum power that can be set
# File lib/compute_unit/gpus/amd_gpu.rb, line 289 def power_max_limit read_hwmon_data('power1_cap_max').to_i / 1000000 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 211 def pstate -1 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 332 def read_dri_debug_file(file_name, default = '') File.read(File.join(debug_dri_dir, file_name)) rescue Errno::EINVAL default rescue Errno::ENOENT default rescue Errno::EACCES logger.debug('run this command as root or with sudo, using default values') default end
@return [String::IO] - the contents of the rom file
# File lib/compute_unit/gpus/amd_gpu.rb, line 39 def read_rom_data if File.exist?(debug_rom_path) IO.read(debug_rom_path, mode: 'rb') elsif File.exist?(rom_path) rom_data else '' end end
# File lib/compute_unit/gpus/amd_gpu.rb, line 390 def reset_to_defaults dpm_force_performance_setting('auto') write_kernel_setting('pp_od_clk_voltage', 'r') write_kernel_setting('pp_od_clk_voltage', 'c') write_hwmon_data('pwm1_enable', '2') end
@return [String] - the bios according to the vbios rom sometimes the kernel / driver extracted rom can be incorrect this is the bios gathered from the vbios itself.
# File lib/compute_unit/gpus/amd_gpu.rb, line 29 def rom_bios if !/\d{3}-/.match?(rom_metadata[2]) logger.warn("Invalid rom bios name for GPU#{index} using alternate name for #{rom_metadata[3]}") rom_metadata[3] elsif /\d{3}-/.match?(rom_metadata[2]) rom_metadata[2] end end
@return [Array] - an array of readable strings from the rom file
# File lib/compute_unit/gpus/amd_gpu.rb, line 55 def rom_metadata @rom_metadata || begin printable_chars = %r{[A-Za-z0-9`~!@#%^&*()-_=+|'";:/?.>,< \t\$\{\}\[\]\\]{10,}} read_rom_data.scan(printable_chars)[0..9] end end
@return [String] - the serial number of the card
# File lib/compute_unit/gpus/amd_gpu.rb, line 216 def serial 'unknown' end
@param value [Numeric] - the fan limit that should be applied to the gpu as a percentage @return [Numeric] - original passed in value after being set
# File lib/compute_unit/gpus/amd_gpu.rb, line 234 def set_fan_limit(value, type = 'current') write_hwmon_data('fan1_enable', '1') hwmon_file = if type == 'min' 'pwm1_min' elsif type == 'max' 'pwm1_max' elsif type == 'current' 'pwm1' else raise ArgumentError.new("Invalid fan setting type, must be one of 'current, min or max'") end raise ArgumentError.new('Fan limit cannot exceed 100') if value > 100 raise ArgumentError.new('Fan limit value must be between 20 and 100') if value < 20 # Value must be between 0-255 amount = (255 * (value / 100.0)).round logger.debug("Setting #{type} Fan on GPU#{index} to #{amount}") write_hwmon_data(hwmon_file, amount) logger.info("GPU#{index} #{type} fan set to #{value} percent") value end
# File lib/compute_unit/gpus/amd_gpu.rb, line 434 def set_mem_clock_and_vddc(mem_clock, mem_volt) return unless experimental_on? mem_clock = mem_clock.to_i mem_volt = mem_volt.to_i # TODO: find max and min values and limit input dpm_force_performance_setting('manual') raise ArgumentError.new("MemClock value #{mem_clock} must be between #{min_mem_clock}-#{max_mem_clock}") unless mem_clock.between?(min_mem_clock, max_mem_clock) raise ArgumentError.new("MemVolt value #{mem_volt} must be between #{min_mem_volt}-#{max_mem_volt}") unless mem_volt.between?(min_mem_volt, max_mem_volt) write_kernel_setting('pp_od_clk_voltage', "r\n") # unlocks in order to write # set row in table (m = manual), 3 = row, write_kernel_setting('pp_od_clk_voltage', "m 3 #{mem_clock} #{mem_volt}\n") write_kernel_setting('pp_od_clk_voltage', "c\n") # locks file write_kernel_setting('pp_mclk_od', "3\n") logger.info("Successfully applied overclock #{mem_clock} #{mem_volt} to #{name} at #{pci_loc}") end
# File lib/compute_unit/gpus/amd_gpu.rb, line 152 def subtype SUBTYPE end
@return [Integer] - returns temp of gpu in celius
# File lib/compute_unit/gpus/amd_gpu.rb, line 104 def temp read_hwmon_data('temp1_input', 0).to_i / 1000 end
# File lib/compute_unit/gpus/amd_gpu.rb, line 305 def utilization return 0 unless amdgpu_pm_info[:load] amdgpu_pm_info[:load][:value].to_i || 0 end
currently configured gpu core voltage @return [Numeric] - returns voltage of core in mV
# File lib/compute_unit/gpus/amd_gpu.rb, line 143 def vddc read_kernel_setting('pp_voltage', 0).to_i end
@return [Array] - array of hashes of voltages {:pstate=>0, :mclk=>300, :volt=>750}
# File lib/compute_unit/gpus/amd_gpu.rb, line 177 def vddci_voltage_table # not sure if this is what mclk is but left it here anyways data = read_kernel_setting('pp_od_clk_voltage', nil) return data if data.nil? _, _, mclk = data.split(/OD_[S,M]CLK:\s?\n/) mclk.split("\n").map do |line| pstate, clk, volt, = line.gsub(/:|Mhz|mV/, '').split(/\s{2,}/).map(&:to_i) { pstate: pstate, clk: clk, volt: volt, type: :mclk } end end
@return [Integer] - the core voltage reading of the GPU via HWMON
# File lib/compute_unit/gpus/amd_gpu.rb, line 137 def vddgfx read_hwmon_data('in0_input', 0).to_i end
@return [Array] - array of hashes of voltages {:pstate=>0, :sclk=>300, :volt=>750}
# File lib/compute_unit/gpus/amd_gpu.rb, line 165 def voltage_table data = read_kernel_setting('pp_od_clk_voltage', nil) return [] if data.nil? _, sclk, = data.split(/OD_[S,M]CLK:\s?\n/) sclk.split("\n").map do |line| pstate, clk, volt, = line.gsub(/:|Mhz|mV/, '').split(/\s{2,}/).map(&:to_i) { pstate: pstate, clk: clk, volt: volt, type: :sclk } end end