From 7081014c73d8d4720757aa55f8cc7beaef1ab85e Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:56:01 +0300 Subject: [PATCH] am_smi: mi300 (#13737) * am_smi: mi300 * smi * remo --- extra/amdpci/am_smi.py | 126 +++++++--- tinygrad/runtime/autogen/am/__init__.py | 4 +- tinygrad/runtime/autogen/am/smu_v13_0_6.py | 255 ++++++++++++++++++++- tinygrad/runtime/support/am/ip.py | 6 +- 4 files changed, 357 insertions(+), 34 deletions(-) diff --git a/extra/amdpci/am_smi.py b/extra/amdpci/am_smi.py index a6f5bb89c2..3dfcc71472 100755 --- a/extra/amdpci/am_smi.py +++ b/extra/amdpci/am_smi.py @@ -26,11 +26,13 @@ def color_temp(temp): def color_voltage(voltage): return colored(f"{voltage/1000:>5.3f}V", "cyan") def draw_bar(percentage, width=40, fill='|', empty=' ', opt_text='', color='cyan'): + percentage = 0.0 if percentage != percentage else percentage # NaN guard + percentage = max(0.0, min(1.0, float(percentage))) filled_width = int(width * percentage) if not opt_text: opt_text = f'{percentage*100:.1f}%' bar = fill * filled_width + empty * (width - filled_width) - bar = (bar[:-len(opt_text)] + opt_text) if opt_text else bar + if opt_text and len(opt_text) <= len(bar): bar = (bar[:-len(opt_text)] + opt_text) bar = colored(bar[:filled_width], color) + bar[filled_width:] return f'[{bar}]' @@ -88,6 +90,7 @@ class SMICtx: self.opened_pci_resources = {} self.prev_lines_cnt = 0 self.prev_terminal_width = 0 + self.prev_terminal_height = 0 remove_parts = ["Advanced Micro Devices, Inc. [AMD/ATI]", "VGA compatible controller:"] lspci = subprocess.check_output(["lspci"]).decode("utf-8").splitlines() @@ -95,6 +98,20 @@ class SMICtx: for k,v in self.lspci.items(): for part in remove_parts: self.lspci[k] = self.lspci[k].replace(part, "").strip().rstrip() + def _smuq10_round(self, v:int) -> int: + v = int(v) + return (v + 512) >> 10 # SMUQ10_ROUND + + def _fmt_kb(self, kb:int) -> str: + kb = int(kb) + if kb < 1024: return f"{kb}KB" + mb = kb / 1024.0 + if mb < 1024: return f"{mb:.1f}MB" + gb = mb / 1024.0 + if gb < 1024: return f"{gb:.2f}GB" + tb = gb / 1024.0 + return f"{tb:.2f}TB" + def _open_am_device(self, pcibus): if pcibus not in self.opened_pci_resources: bar_fds = {bar: os.open(f"/sys/bus/pci/devices/{pcibus}/resource{bar}", os.O_RDWR | os.O_SYNC) for bar in [0, 2, 5]} @@ -116,6 +133,7 @@ class SMICtx: def rescan_devs(self): pattern = os.path.join('/tmp', 'am_*.lock') for d in [f[8:-5] for f in glob.glob(pattern)]: + if d.startswith("usb"): continue if d not in self.opened_pcidevs: self._open_am_device(d) @@ -131,21 +149,53 @@ class SMICtx: os.system('clear') if DEBUG >= 2: print(f"Removed AM device {d.pcibus}") - def collect(self): return {d: d.smu.read_metrics() if d.pci_state == "D0" else None for d in self.devs} + def collect(self): + tables = {} + for dev in self.devs: + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): table_t = dev.smu.smu_mod.MetricsTableX_t + case (13,0,12): table_t = dev.smu.smu_mod.MetricsTableV2_t + case _: table_t = dev.smu.smu_mod.SmuMetricsExternal_t + tables[dev] = dev.smu.read_table(table_t, dev.smu.smu_mod.SMU_TABLE_SMU_METRICS) if dev.pci_state == "D0" else None + return tables - def get_gfx_activity(self, dev, metrics): return metrics.SmuMetrics.AverageGfxActivity - def get_mem_activity(self, dev, metrics): return metrics.SmuMetrics.AverageUclkActivity + def _pick_nonzero_avg(self, vals) -> int: + xs = [x for x in vals if x > 0] + return int(sum(xs) / len(xs)) if xs else 0 + + def get_gfx_activity(self, dev, metrics): + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return max(0, min(100, self._smuq10_round(metrics.SocketGfxBusy))) + case _: return metrics.SmuMetrics.AverageGfxActivity + + def get_mem_activity(self, dev, metrics): + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return max(0, min(100, self._smuq10_round(metrics.DramBandwidthUtilization))) + case _: return metrics.SmuMetrics.AverageUclkActivity def get_temps(self, dev, metrics, compact=False): - temps_keys = [(k, name) for k, name in dev.smu.smu_mod.c__EA_TEMP_e__enumvalues.items() - if k < dev.smu.smu_mod.TEMP_COUNT and metrics.SmuMetrics.AvgTemperature[k] != 0] - if compact: temps_keys = [(k, name) for k, name in temps_keys if k in (dev.smu.smu_mod.TEMP_HOTSPOT, dev.smu.smu_mod.TEMP_MEM)] - return {name: metrics.SmuMetrics.AvgTemperature[k] for k, name in temps_keys} + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): + temps = { + "Hotspot": self._smuq10_round(metrics.MaxSocketTemperature), + "HBM": self._smuq10_round(metrics.MaxHbmTemperature), + "VR": self._smuq10_round(metrics.MaxVrTemperature), + } + if compact: return {k: temps[k] for k in ("Hotspot", "HBM") if temps.get(k, 0) != 0} + return {k: v for k, v in temps.items() if v != 0} + case _: + temps_keys = [(k, name) for k, name in dev.smu.smu_mod.c__EA_TEMP_e__enumvalues.items() + if k < dev.smu.smu_mod.TEMP_COUNT and metrics.SmuMetrics.AvgTemperature[k] != 0] + if compact: temps_keys = [(k, name) for k, name in temps_keys if k in (dev.smu.smu_mod.TEMP_HOTSPOT, dev.smu.smu_mod.TEMP_MEM)] + return {name: metrics.SmuMetrics.AvgTemperature[k] for k, name in temps_keys} def get_voltage(self, dev, metrics, compact=False): - voltage_keys = [(k, name) for k, name in dev.smu.smu_mod.c__EA_SVI_PLANE_e__enumvalues.items() + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return {} + case _: + voltage_keys = [(k, name) for k, name in dev.smu.smu_mod.c__EA_SVI_PLANE_e__enumvalues.items() if k < dev.smu.smu_mod.SVI_PLANE_COUNT and metrics.SmuMetrics.AvgVoltage[k] != 0] - return {name: metrics.SmuMetrics.AvgVoltage[k] for k, name in voltage_keys} + return {name: metrics.SmuMetrics.AvgVoltage[k] for k, name in voltage_keys} def get_busy_threshold(self, dev): match dev.ip_ver[am.MP1_HWIP]: @@ -153,22 +203,40 @@ class SMICtx: case _: return 15 def get_gfx_freq(self, dev, metrics): - return metrics.SmuMetrics.AverageGfxclkFrequencyPostDs if self.get_gfx_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ - metrics.SmuMetrics.AverageGfxclkFrequencyPreDs + if metrics is None: return 0 + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return self._smuq10_round(metrics.GfxclkFrequency[0]) + case _: + return metrics.SmuMetrics.AverageGfxclkFrequencyPostDs if self.get_gfx_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ + metrics.SmuMetrics.AverageGfxclkFrequencyPreDs def get_mem_freq(self, dev, metrics): - return metrics.SmuMetrics.AverageMemclkFrequencyPostDs if self.get_mem_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ - metrics.SmuMetrics.AverageMemclkFrequencyPreDs + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return self._smuq10_round(metrics.UclkFrequency) + case _: + return metrics.SmuMetrics.AverageMemclkFrequencyPostDs if self.get_mem_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ + metrics.SmuMetrics.AverageMemclkFrequencyPreDs def get_fckl_freq(self, dev, metrics): - return metrics.SmuMetrics.AverageFclkFrequencyPostDs if self.get_mem_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ - metrics.SmuMetrics.AverageFclkFrequencyPreDs + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return self._smuq10_round(metrics.FclkFrequency) + case _: + return metrics.SmuMetrics.AverageFclkFrequencyPostDs if self.get_mem_activity(dev, metrics) <= self.get_busy_threshold(dev) else \ + metrics.SmuMetrics.AverageFclkFrequencyPreDs - def get_fan_rpm_pwm(self, dev, metrics): return metrics.SmuMetrics.AvgFanRpm, metrics.SmuMetrics.AvgFanPwm + def get_fan_rpm_pwm(self, dev, metrics): + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return None, None + case _: return metrics.SmuMetrics.AvgFanRpm, metrics.SmuMetrics.AvgFanPwm - def get_power(self, dev, metrics): return metrics.SmuMetrics.AverageSocketPower, metrics.SmuMetrics.dGPU_W_MAX + def get_power(self, dev, metrics): + match dev.ip_ver[am.MP1_HWIP]: + case (13,0,6): return self._smuq10_round(metrics.SocketPower), self._smuq10_round(metrics.MaxSocketPowerLimit) + case _: return metrics.SmuMetrics.AverageSocketPower, metrics.SmuMetrics.dGPU_W_MAX def get_mem_usage(self, dev): + return 0 + usage = 0 pt_stack = [dev.mm.root_page_table] while len(pt_stack) > 0: @@ -177,7 +245,7 @@ class SMICtx: entry = pt.entries[i] if (entry & am.AMDGPU_PTE_VALID) == 0: continue - if pt.lv!=am.AMDGPU_VM_PTB and not dev.gmc.is_pte_huge_page(entry): + if pt.lv!=am.AMDGPU_VM_PTB and not dev.gmc.is_pte_huge_page(pt.lv, entry): pt_stack.append(AMPageTableEntry(dev, entry & 0x0000FFFFFFFFF000, lv=pt.lv+1)) continue if (entry & am.AMDGPU_PTE_SYSTEM) != 0: continue @@ -219,23 +287,28 @@ class SMICtx: temps_table_compact = ["Temps (°C):" + '/'.join([f"{color_temp(val)} {name}" for name, val in temps_data_compact.items()])] fan_rpm, fan_pwm = self.get_fan_rpm_pwm(dev, metrics) - power_table = ["=== Power ==="] + [f"Fan Speed: {fan_rpm} RPM"] + [f"Fan Power: {fan_pwm}%"] + power_table = ["=== Power ==="] + power_table += ["Fan: N/A"] if fan_rpm is None or fan_pwm is None else [f"Fan Speed: {fan_rpm} RPM", f"Fan Power: {fan_pwm}%"] total_power, max_power = self.get_power(dev, metrics) - power_line = [f"Power: " + draw_bar(total_power / max_power, 16, opt_text=f"{total_power}/{max_power}W")] - power_line_compact = [f"Power: " + draw_bar(total_power / max_power, activity_line_width, opt_text=f"{total_power}/{max_power}W")] + if max_power > 0: + power_line = [f"Power: " + draw_bar(total_power / max_power, 16, opt_text=f"{total_power}/{max_power}W")] + power_line_compact = [f"Power: " + draw_bar(total_power / max_power, activity_line_width, opt_text=f"{total_power}/{max_power}W")] + else: + power_line = ["Power: N/A"] + power_line_compact = ["Power: N/A"] voltage_data = self.get_voltage(dev, metrics) - voltage_table = ["=== Voltages ==="] + [f"{name:<20}: {color_voltage(voltage)}" for name, voltage in voltage_data.items()] + voltage_table = None if not voltage_data else (["=== Voltages ==="] + [f"{name:<20}: {color_voltage(voltage)}" for name, voltage in voltage_data.items()]) gfx_freq = self.get_gfx_freq(dev, metrics) mclk_freq = self.get_mem_freq(dev, metrics) fclk_freq = self.get_fckl_freq(dev, metrics) - frequency_table = ["=== Frequencies ===", f"GFXCLK: {gfx_freq:>4} MHz", f"FCLK : {fclk_freq:>4} MHz", f"MCLK : {mclk_freq:>4} MHz"] if self.prev_terminal_width >= 231: - power_table += power_line + [""] + voltage_table + power_table += power_line + if voltage_table is not None: power_table += [""] + voltage_table activity_line += [""] elif self.prev_terminal_width >= 171: power_table += power_line + [""] + frequency_table @@ -307,4 +380,5 @@ if __name__ == "__main__": smi_ctx.draw(args.list) if args.list: break time.sleep(1) - except KeyboardInterrupt: print("Exiting...") + except KeyboardInterrupt: + print("Exiting...") diff --git a/tinygrad/runtime/autogen/am/__init__.py b/tinygrad/runtime/autogen/am/__init__.py index eedb9c1cde..0b281a35e9 100644 --- a/tinygrad/runtime/autogen/am/__init__.py +++ b/tinygrad/runtime/autogen/am/__init__.py @@ -19,8 +19,8 @@ def __getattr__(nm): args=["-I/opt/rocm/include", "-x", "c++"], tarball=am_src) case "smu_v13_0_0": return load("am/smu_v13_0_0",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_0_ppsmc","smu13_driver_if_v13_0_0"]] +[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) - case "smu_v13_0_6": return load("am/smu_v13_0_6",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_6_ppsmc","smu13_driver_if_v13_0_6"]] - +[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) + case "smu_v13_0_6": return load("am/smu_v13_0_6",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_6_ppsmc","smu_v13_0_6_pmfw", \ + "smu13_driver_if_v13_0_6"]] +[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) case "smu_v14_0_2": return load("am/smu_v14_0_2", [], [f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v14_0_0_pmfw", "smu_v14_0_2_ppsmc", "smu14_driver_if_v14_0"]]+[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) case _: raise AttributeError(f"no such autogen: {nm}") diff --git a/tinygrad/runtime/autogen/am/smu_v13_0_6.py b/tinygrad/runtime/autogen/am/smu_v13_0_6.py index 6b8f2dc169..f1f2520720 100644 --- a/tinygrad/runtime/autogen/am/smu_v13_0_6.py +++ b/tinygrad/runtime/autogen/am/smu_v13_0_6.py @@ -1,9 +1,246 @@ # mypy: ignore-errors import ctypes -from tinygrad.helpers import unwrap -from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR +from tinygrad.runtime.support.c import DLL, Struct, CEnum, _IO, _IOW, _IOR, _IOWR PPSMC_Result = ctypes.c_uint32 PPSMC_MSG = ctypes.c_uint32 +FEATURE_LIST_e = CEnum(ctypes.c_uint32) +FEATURE_DATA_CALCULATION = FEATURE_LIST_e.define('FEATURE_DATA_CALCULATION', 0) +FEATURE_DPM_CCLK = FEATURE_LIST_e.define('FEATURE_DPM_CCLK', 1) +FEATURE_DPM_FCLK = FEATURE_LIST_e.define('FEATURE_DPM_FCLK', 2) +FEATURE_DPM_GFXCLK = FEATURE_LIST_e.define('FEATURE_DPM_GFXCLK', 3) +FEATURE_DPM_LCLK = FEATURE_LIST_e.define('FEATURE_DPM_LCLK', 4) +FEATURE_DPM_SOCCLK = FEATURE_LIST_e.define('FEATURE_DPM_SOCCLK', 5) +FEATURE_DPM_UCLK = FEATURE_LIST_e.define('FEATURE_DPM_UCLK', 6) +FEATURE_DPM_VCN = FEATURE_LIST_e.define('FEATURE_DPM_VCN', 7) +FEATURE_DPM_XGMI = FEATURE_LIST_e.define('FEATURE_DPM_XGMI', 8) +FEATURE_DS_FCLK = FEATURE_LIST_e.define('FEATURE_DS_FCLK', 9) +FEATURE_DS_GFXCLK = FEATURE_LIST_e.define('FEATURE_DS_GFXCLK', 10) +FEATURE_DS_LCLK = FEATURE_LIST_e.define('FEATURE_DS_LCLK', 11) +FEATURE_DS_MP0CLK = FEATURE_LIST_e.define('FEATURE_DS_MP0CLK', 12) +FEATURE_DS_MP1CLK = FEATURE_LIST_e.define('FEATURE_DS_MP1CLK', 13) +FEATURE_DS_MPIOCLK = FEATURE_LIST_e.define('FEATURE_DS_MPIOCLK', 14) +FEATURE_DS_SOCCLK = FEATURE_LIST_e.define('FEATURE_DS_SOCCLK', 15) +FEATURE_DS_VCN = FEATURE_LIST_e.define('FEATURE_DS_VCN', 16) +FEATURE_APCC_DFLL = FEATURE_LIST_e.define('FEATURE_APCC_DFLL', 17) +FEATURE_APCC_PLUS = FEATURE_LIST_e.define('FEATURE_APCC_PLUS', 18) +FEATURE_DF_CSTATE = FEATURE_LIST_e.define('FEATURE_DF_CSTATE', 19) +FEATURE_CC6 = FEATURE_LIST_e.define('FEATURE_CC6', 20) +FEATURE_PC6 = FEATURE_LIST_e.define('FEATURE_PC6', 21) +FEATURE_CPPC = FEATURE_LIST_e.define('FEATURE_CPPC', 22) +FEATURE_PPT = FEATURE_LIST_e.define('FEATURE_PPT', 23) +FEATURE_TDC = FEATURE_LIST_e.define('FEATURE_TDC', 24) +FEATURE_THERMAL = FEATURE_LIST_e.define('FEATURE_THERMAL', 25) +FEATURE_SOC_PCC = FEATURE_LIST_e.define('FEATURE_SOC_PCC', 26) +FEATURE_CCD_PCC = FEATURE_LIST_e.define('FEATURE_CCD_PCC', 27) +FEATURE_CCD_EDC = FEATURE_LIST_e.define('FEATURE_CCD_EDC', 28) +FEATURE_PROCHOT = FEATURE_LIST_e.define('FEATURE_PROCHOT', 29) +FEATURE_DVO_CCLK = FEATURE_LIST_e.define('FEATURE_DVO_CCLK', 30) +FEATURE_FDD_AID_HBM = FEATURE_LIST_e.define('FEATURE_FDD_AID_HBM', 31) +FEATURE_FDD_AID_SOC = FEATURE_LIST_e.define('FEATURE_FDD_AID_SOC', 32) +FEATURE_FDD_XCD_EDC = FEATURE_LIST_e.define('FEATURE_FDD_XCD_EDC', 33) +FEATURE_FDD_XCD_XVMIN = FEATURE_LIST_e.define('FEATURE_FDD_XCD_XVMIN', 34) +FEATURE_FW_CTF = FEATURE_LIST_e.define('FEATURE_FW_CTF', 35) +FEATURE_GFXOFF = FEATURE_LIST_e.define('FEATURE_GFXOFF', 36) +FEATURE_SMU_CG = FEATURE_LIST_e.define('FEATURE_SMU_CG', 37) +FEATURE_PSI7 = FEATURE_LIST_e.define('FEATURE_PSI7', 38) +FEATURE_CSTATE_BOOST = FEATURE_LIST_e.define('FEATURE_CSTATE_BOOST', 39) +FEATURE_XGMI_PER_LINK_PWR_DOWN = FEATURE_LIST_e.define('FEATURE_XGMI_PER_LINK_PWR_DOWN', 40) +FEATURE_CXL_QOS = FEATURE_LIST_e.define('FEATURE_CXL_QOS', 41) +FEATURE_SOC_DC_RTC = FEATURE_LIST_e.define('FEATURE_SOC_DC_RTC', 42) +FEATURE_GFX_DC_RTC = FEATURE_LIST_e.define('FEATURE_GFX_DC_RTC', 43) +NUM_FEATURES = FEATURE_LIST_e.define('NUM_FEATURES', 44) + +PCIE_LINK_SPEED_INDEX_TABLE_e = CEnum(ctypes.c_uint32) +PCIE_LINK_SPEED_INDEX_TABLE_GEN1 = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN1', 0) +PCIE_LINK_SPEED_INDEX_TABLE_GEN2 = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN2', 1) +PCIE_LINK_SPEED_INDEX_TABLE_GEN3 = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN3', 2) +PCIE_LINK_SPEED_INDEX_TABLE_GEN4 = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN4', 3) +PCIE_LINK_SPEED_INDEX_TABLE_GEN4_ESM = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN4_ESM', 4) +PCIE_LINK_SPEED_INDEX_TABLE_GEN5 = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_GEN5', 5) +PCIE_LINK_SPEED_INDEX_TABLE_COUNT = PCIE_LINK_SPEED_INDEX_TABLE_e.define('PCIE_LINK_SPEED_INDEX_TABLE_COUNT', 6) + +GFX_GUARDBAND_e = CEnum(ctypes.c_uint32) +VOLTAGE_COLD_0 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_0', 0) +VOLTAGE_COLD_1 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_1', 1) +VOLTAGE_COLD_2 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_2', 2) +VOLTAGE_COLD_3 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_3', 3) +VOLTAGE_COLD_4 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_4', 4) +VOLTAGE_COLD_5 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_5', 5) +VOLTAGE_COLD_6 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_6', 6) +VOLTAGE_COLD_7 = GFX_GUARDBAND_e.define('VOLTAGE_COLD_7', 7) +VOLTAGE_MID_0 = GFX_GUARDBAND_e.define('VOLTAGE_MID_0', 8) +VOLTAGE_MID_1 = GFX_GUARDBAND_e.define('VOLTAGE_MID_1', 9) +VOLTAGE_MID_2 = GFX_GUARDBAND_e.define('VOLTAGE_MID_2', 10) +VOLTAGE_MID_3 = GFX_GUARDBAND_e.define('VOLTAGE_MID_3', 11) +VOLTAGE_MID_4 = GFX_GUARDBAND_e.define('VOLTAGE_MID_4', 12) +VOLTAGE_MID_5 = GFX_GUARDBAND_e.define('VOLTAGE_MID_5', 13) +VOLTAGE_MID_6 = GFX_GUARDBAND_e.define('VOLTAGE_MID_6', 14) +VOLTAGE_MID_7 = GFX_GUARDBAND_e.define('VOLTAGE_MID_7', 15) +VOLTAGE_HOT_0 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_0', 16) +VOLTAGE_HOT_1 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_1', 17) +VOLTAGE_HOT_2 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_2', 18) +VOLTAGE_HOT_3 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_3', 19) +VOLTAGE_HOT_4 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_4', 20) +VOLTAGE_HOT_5 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_5', 21) +VOLTAGE_HOT_6 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_6', 22) +VOLTAGE_HOT_7 = GFX_GUARDBAND_e.define('VOLTAGE_HOT_7', 23) +VOLTAGE_GUARDBAND_COUNT = GFX_GUARDBAND_e.define('VOLTAGE_GUARDBAND_COUNT', 24) + +class MetricsTableX_t(Struct): pass +uint32_t = ctypes.c_uint32 +uint64_t = ctypes.c_uint64 +MetricsTableX_t._fields_ = [ + ('AccumulationCounter', uint32_t), + ('MaxSocketTemperature', uint32_t), + ('MaxVrTemperature', uint32_t), + ('MaxHbmTemperature', uint32_t), + ('MaxSocketTemperatureAcc', uint64_t), + ('MaxVrTemperatureAcc', uint64_t), + ('MaxHbmTemperatureAcc', uint64_t), + ('SocketPowerLimit', uint32_t), + ('MaxSocketPowerLimit', uint32_t), + ('SocketPower', uint32_t), + ('Timestamp', uint64_t), + ('SocketEnergyAcc', uint64_t), + ('CcdEnergyAcc', uint64_t), + ('XcdEnergyAcc', uint64_t), + ('AidEnergyAcc', uint64_t), + ('HbmEnergyAcc', uint64_t), + ('CclkFrequencyLimit', uint32_t), + ('GfxclkFrequencyLimit', uint32_t), + ('FclkFrequency', uint32_t), + ('UclkFrequency', uint32_t), + ('SocclkFrequency', (uint32_t * 4)), + ('VclkFrequency', (uint32_t * 4)), + ('DclkFrequency', (uint32_t * 4)), + ('LclkFrequency', (uint32_t * 4)), + ('GfxclkFrequencyAcc', (uint64_t * 8)), + ('CclkFrequencyAcc', (uint64_t * 96)), + ('MaxCclkFrequency', uint32_t), + ('MinCclkFrequency', uint32_t), + ('MaxGfxclkFrequency', uint32_t), + ('MinGfxclkFrequency', uint32_t), + ('FclkFrequencyTable', (uint32_t * 4)), + ('UclkFrequencyTable', (uint32_t * 4)), + ('SocclkFrequencyTable', (uint32_t * 4)), + ('VclkFrequencyTable', (uint32_t * 4)), + ('DclkFrequencyTable', (uint32_t * 4)), + ('LclkFrequencyTable', (uint32_t * 4)), + ('MaxLclkDpmRange', uint32_t), + ('MinLclkDpmRange', uint32_t), + ('XgmiWidth', uint32_t), + ('XgmiBitrate', uint32_t), + ('XgmiReadBandwidthAcc', (uint64_t * 8)), + ('XgmiWriteBandwidthAcc', (uint64_t * 8)), + ('SocketC0Residency', uint32_t), + ('SocketGfxBusy', uint32_t), + ('DramBandwidthUtilization', uint32_t), + ('SocketC0ResidencyAcc', uint64_t), + ('SocketGfxBusyAcc', uint64_t), + ('DramBandwidthAcc', uint64_t), + ('MaxDramBandwidth', uint32_t), + ('DramBandwidthUtilizationAcc', uint64_t), + ('PcieBandwidthAcc', (uint64_t * 4)), + ('ProchotResidencyAcc', uint32_t), + ('PptResidencyAcc', uint32_t), + ('SocketThmResidencyAcc', uint32_t), + ('VrThmResidencyAcc', uint32_t), + ('HbmThmResidencyAcc', uint32_t), + ('GfxLockXCDMak', uint32_t), + ('GfxclkFrequency', (uint32_t * 8)), + ('PublicSerialNumber_AID', (uint64_t * 4)), + ('PublicSerialNumber_XCD', (uint64_t * 8)), + ('PublicSerialNumber_CCD', (uint64_t * 12)), + ('XgmiReadDataSizeAcc', (uint64_t * 8)), + ('XgmiWriteDataSizeAcc', (uint64_t * 8)), + ('PcieBandwidth', (uint32_t * 4)), + ('PCIeL0ToRecoveryCountAcc', uint32_t), + ('PCIenReplayAAcc', uint32_t), + ('PCIenReplayARolloverCountAcc', uint32_t), + ('PCIeNAKSentCountAcc', uint32_t), + ('PCIeNAKReceivedCountAcc', uint32_t), + ('VcnBusy', (uint32_t * 4)), + ('JpegBusy', (uint32_t * 32)), + ('PCIeLinkSpeed', uint32_t), + ('PCIeLinkWidth', uint32_t), + ('GfxBusy', (uint32_t * 8)), + ('GfxBusyAcc', (uint64_t * 8)), +] +class MetricsTableA_t(Struct): pass +MetricsTableA_t._fields_ = [ + ('AccumulationCounter', uint32_t), + ('MaxSocketTemperature', uint32_t), + ('MaxVrTemperature', uint32_t), + ('MaxHbmTemperature', uint32_t), + ('MaxSocketTemperatureAcc', uint64_t), + ('MaxVrTemperatureAcc', uint64_t), + ('MaxHbmTemperatureAcc', uint64_t), + ('SocketPowerLimit', uint32_t), + ('MaxSocketPowerLimit', uint32_t), + ('SocketPower', uint32_t), + ('Timestamp', uint64_t), + ('SocketEnergyAcc', uint64_t), + ('CcdEnergyAcc', uint64_t), + ('XcdEnergyAcc', uint64_t), + ('AidEnergyAcc', uint64_t), + ('HbmEnergyAcc', uint64_t), + ('CclkFrequencyLimit', uint32_t), + ('GfxclkFrequencyLimit', uint32_t), + ('FclkFrequency', uint32_t), + ('UclkFrequency', uint32_t), + ('SocclkFrequency', (uint32_t * 4)), + ('VclkFrequency', (uint32_t * 4)), + ('DclkFrequency', (uint32_t * 4)), + ('LclkFrequency', (uint32_t * 4)), + ('GfxclkFrequencyAcc', (uint64_t * 8)), + ('CclkFrequencyAcc', (uint64_t * 96)), + ('MaxCclkFrequency', uint32_t), + ('MinCclkFrequency', uint32_t), + ('MaxGfxclkFrequency', uint32_t), + ('MinGfxclkFrequency', uint32_t), + ('FclkFrequencyTable', (uint32_t * 4)), + ('UclkFrequencyTable', (uint32_t * 4)), + ('SocclkFrequencyTable', (uint32_t * 4)), + ('VclkFrequencyTable', (uint32_t * 4)), + ('DclkFrequencyTable', (uint32_t * 4)), + ('LclkFrequencyTable', (uint32_t * 4)), + ('MaxLclkDpmRange', uint32_t), + ('MinLclkDpmRange', uint32_t), + ('XgmiWidth', uint32_t), + ('XgmiBitrate', uint32_t), + ('XgmiReadBandwidthAcc', (uint64_t * 8)), + ('XgmiWriteBandwidthAcc', (uint64_t * 8)), + ('SocketC0Residency', uint32_t), + ('SocketGfxBusy', uint32_t), + ('DramBandwidthUtilization', uint32_t), + ('SocketC0ResidencyAcc', uint64_t), + ('SocketGfxBusyAcc', uint64_t), + ('DramBandwidthAcc', uint64_t), + ('MaxDramBandwidth', uint32_t), + ('DramBandwidthUtilizationAcc', uint64_t), + ('PcieBandwidthAcc', (uint64_t * 4)), + ('ProchotResidencyAcc', uint32_t), + ('PptResidencyAcc', uint32_t), + ('SocketThmResidencyAcc', uint32_t), + ('VrThmResidencyAcc', uint32_t), + ('HbmThmResidencyAcc', uint32_t), + ('GfxLockXCDMak', uint32_t), + ('GfxclkFrequency', (uint32_t * 8)), + ('PublicSerialNumber_AID', (uint64_t * 4)), + ('PublicSerialNumber_XCD', (uint64_t * 8)), + ('PublicSerialNumber_CCD', (uint64_t * 12)), + ('XgmiReadDataSizeAcc', (uint64_t * 8)), + ('XgmiWriteDataSizeAcc', (uint64_t * 8)), + ('VcnBusy', (uint32_t * 4)), + ('JpegBusy', (uint32_t * 32)), +] +class VfMetricsTable_t(Struct): pass +VfMetricsTable_t._fields_ = [ + ('AccumulationCounter', uint32_t), + ('InstGfxclk_TargFreq', uint32_t), + ('AccGfxclk_TargFreq', uint64_t), + ('AccGfxRsmuDpm_Busy', uint64_t), +] I2cControllerPort_e = CEnum(ctypes.c_uint32) I2C_CONTROLLER_PORT_0 = I2cControllerPort_e.define('I2C_CONTROLLER_PORT_0', 0) I2C_CONTROLLER_PORT_1 = I2cControllerPort_e.define('I2C_CONTROLLER_PORT_1', 1) @@ -106,7 +343,6 @@ SwI2cRequest_t._fields_ = [ ('SwI2cCmds', (SwI2cCmd_t * 24)), ] class SwI2cRequestExternal_t(Struct): pass -uint32_t = ctypes.c_uint32 SwI2cRequestExternal_t._fields_ = [ ('SwI2cRequest', SwI2cRequest_t), ('Spare', (uint32_t * 8)), @@ -467,6 +703,19 @@ PPSMC_XCD_THM_TYPE = 0x3 PPSMC_HBM_THM_TYPE = 0x4 PPSMC_PLPD_MODE_DEFAULT = 0x1 PPSMC_PLPD_MODE_OPTIMIZED = 0x2 +NUM_VCLK_DPM_LEVELS = 4 +NUM_DCLK_DPM_LEVELS = 4 +NUM_SOCCLK_DPM_LEVELS = 4 +NUM_LCLK_DPM_LEVELS = 4 +NUM_UCLK_DPM_LEVELS = 4 +NUM_FCLK_DPM_LEVELS = 4 +NUM_XGMI_DPM_LEVELS = 2 +NUM_CXL_BITRATES = 4 +NUM_PCIE_BITRATES = 4 +NUM_XGMI_BITRATES = 4 +NUM_XGMI_WIDTHS = 3 +SMU_METRICS_TABLE_VERSION = 0xD +SMU_VF_METRICS_TABLE_VERSION = 0x3 SMU13_0_6_DRIVER_IF_VERSION = 0x08042024 NUM_I2C_CONTROLLERS = 8 I2C_CONTROLLER_ENABLED = 1 diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index aa7d4b4ac4..370acae4e8 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -182,10 +182,10 @@ class AM_SMU(AM_IP): else: self._send_msg(self.smu_mod.PPSMC_MSG_Mode1Reset, 0) time.sleep(0.5) # 500ms - def read_table(self, table_t, cmd): - self._send_msg(self.smu_mod.PPSMC_MSG_TransferTableSmu2Dram, cmd) + def read_table(self, table_t, arg): + if self.adev.ip_ver[am.MP0_HWIP] in {(13,0,6),(13,0,12)}: self._send_msg(self.smu_mod.PPSMC_MSG_GetMetricsTable, arg) + else: self._send_msg(self.smu_mod.PPSMC_MSG_TransferTableSmu2Dram, arg) return table_t.from_buffer(bytearray(self.adev.vram.view(self.driver_table_paddr, ctypes.sizeof(table_t))[:])) - def read_metrics(self): return self.read_table(self.smu_mod.SmuMetricsExternal_t, self.smu_mod.TABLE_SMU_METRICS) def set_clocks(self, level): if self.adev.ip_ver[am.MP0_HWIP] in {(13,0,6), (13,0,12)}: return # TODO