diff --git a/extra/amdpci/hive_reset.py b/extra/amdpci/hive_reset.py new file mode 100755 index 0000000000..2ac5e8cc6f --- /dev/null +++ b/extra/amdpci/hive_reset.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +from tinygrad.helpers import Context +from tinygrad.runtime.support.system import System, PCIDevice, PCIDevImplBase +from tinygrad.runtime.support.am.amdev import AMDev + +if __name__ == "__main__": + gpus = System.pci_scan_bus(0x1002, [(0xffff, [0x74a1])]) + pcidevs = [PCIDevice(f"reset:{gpu}", gpu, bars=[0, 2, 5]) for gpu in gpus] + amdevs = [] + with Context(DEBUG=2): + for pcidev in pcidevs: + amdevs.append(AMDev(pcidev, reset_mode=True)) + for amdev in amdevs: amdev.smu.mode1_reset() diff --git a/tinygrad/runtime/autogen/am/__init__.py b/tinygrad/runtime/autogen/am/__init__.py index 55968d60a0..eedb9c1cde 100644 --- a/tinygrad/runtime/autogen/am/__init__.py +++ b/tinygrad/runtime/autogen/am/__init__.py @@ -6,8 +6,9 @@ inc = ["-include", "stdint.h"] def __getattr__(nm): match nm: - case "am": return load("am/am", [], [root/f"extra/amdpci/headers/{s}.h" for s in ["v11_structs", "v12_structs", "amdgpu_vm", "discovery", - "amdgpu_ucode", "psp_gfx_if", "amdgpu_psp", "amdgpu_irq", "amdgpu_doorbell"]]+[f"{AMD}/include/soc15_ih_clientid.h"], args=inc, tarball=am_src) + case "am": return load("am/am", [], [root/f"extra/amdpci/headers/{s}.h" for s in ["v11_structs", "v12_structs", "amdgpu_vm", + "discovery", "amdgpu_ucode", "psp_gfx_if", "amdgpu_psp", "amdgpu_irq", "amdgpu_doorbell"]] + \ + [f"{AMD}/include/{s}.h" for s in ["v9_structs", "soc15_ih_clientid"]], args=inc, tarball=am_src) case "pm4_soc15": return load("am/pm4_soc15", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/soc15d.h"], tarball=am_src) case "pm4_nv": return load("am/pm4_nv", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/nvd.h"], tarball=am_src) case "sdma_4_0_0": return load("am/sdma_4_0_0", [], [root/"extra/hip_gpu_driver/sdma_registers.h", f"{AMD}/amdgpu/vega10_sdma_pkt_open.h"], @@ -18,6 +19,8 @@ def __getattr__(nm): args=["-I/opt/rocm/include", "-x", "c++"], tarball=am_src) case "smu_v13_0_0": return load("am/smu_v13_0_0",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_0_ppsmc","smu13_driver_if_v13_0_0"]] +[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) + case "smu_v13_0_6": return load("am/smu_v13_0_6",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_6_ppsmc","smu13_driver_if_v13_0_6"]] + +[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) case "smu_v14_0_2": return load("am/smu_v14_0_2", [], [f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v14_0_0_pmfw", "smu_v14_0_2_ppsmc", "smu14_driver_if_v14_0"]]+[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src) case _: raise AttributeError(f"no such autogen: {nm}") diff --git a/tinygrad/runtime/autogen/am/am.py b/tinygrad/runtime/autogen/am/am.py index 37e881205c..93cd8a8f0b 100644 --- a/tinygrad/runtime/autogen/am/am.py +++ b/tinygrad/runtime/autogen/am/am.py @@ -3878,6 +3878,745 @@ AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = enum_AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1.de AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = enum_AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1.define('AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT', 488) AMDGPU_DOORBELL_LAYOUT1_INVALID = enum_AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1.define('AMDGPU_DOORBELL_LAYOUT1_INVALID', 65535) +class struct_v9_sdma_mqd(Struct): pass +struct_v9_sdma_mqd._fields_ = [ + ('sdmax_rlcx_rb_cntl', uint32_t), + ('sdmax_rlcx_rb_base', uint32_t), + ('sdmax_rlcx_rb_base_hi', uint32_t), + ('sdmax_rlcx_rb_rptr', uint32_t), + ('sdmax_rlcx_rb_rptr_hi', uint32_t), + ('sdmax_rlcx_rb_wptr', uint32_t), + ('sdmax_rlcx_rb_wptr_hi', uint32_t), + ('sdmax_rlcx_rb_wptr_poll_cntl', uint32_t), + ('sdmax_rlcx_rb_rptr_addr_hi', uint32_t), + ('sdmax_rlcx_rb_rptr_addr_lo', uint32_t), + ('sdmax_rlcx_ib_cntl', uint32_t), + ('sdmax_rlcx_ib_rptr', uint32_t), + ('sdmax_rlcx_ib_offset', uint32_t), + ('sdmax_rlcx_ib_base_lo', uint32_t), + ('sdmax_rlcx_ib_base_hi', uint32_t), + ('sdmax_rlcx_ib_size', uint32_t), + ('sdmax_rlcx_skip_cntl', uint32_t), + ('sdmax_rlcx_context_status', uint32_t), + ('sdmax_rlcx_doorbell', uint32_t), + ('sdmax_rlcx_status', uint32_t), + ('sdmax_rlcx_doorbell_log', uint32_t), + ('sdmax_rlcx_watermark', uint32_t), + ('sdmax_rlcx_doorbell_offset', uint32_t), + ('sdmax_rlcx_csa_addr_lo', uint32_t), + ('sdmax_rlcx_csa_addr_hi', uint32_t), + ('sdmax_rlcx_ib_sub_remain', uint32_t), + ('sdmax_rlcx_preempt', uint32_t), + ('sdmax_rlcx_dummy_reg', uint32_t), + ('sdmax_rlcx_rb_wptr_poll_addr_hi', uint32_t), + ('sdmax_rlcx_rb_wptr_poll_addr_lo', uint32_t), + ('sdmax_rlcx_rb_aql_cntl', uint32_t), + ('sdmax_rlcx_minor_ptr_update', uint32_t), + ('sdmax_rlcx_midcmd_data0', uint32_t), + ('sdmax_rlcx_midcmd_data1', uint32_t), + ('sdmax_rlcx_midcmd_data2', uint32_t), + ('sdmax_rlcx_midcmd_data3', uint32_t), + ('sdmax_rlcx_midcmd_data4', uint32_t), + ('sdmax_rlcx_midcmd_data5', uint32_t), + ('sdmax_rlcx_midcmd_data6', uint32_t), + ('sdmax_rlcx_midcmd_data7', uint32_t), + ('sdmax_rlcx_midcmd_data8', uint32_t), + ('sdmax_rlcx_midcmd_cntl', uint32_t), + ('reserved_42', uint32_t), + ('reserved_43', uint32_t), + ('reserved_44', uint32_t), + ('reserved_45', uint32_t), + ('reserved_46', uint32_t), + ('reserved_47', uint32_t), + ('reserved_48', uint32_t), + ('reserved_49', uint32_t), + ('reserved_50', uint32_t), + ('reserved_51', uint32_t), + ('reserved_52', uint32_t), + ('reserved_53', uint32_t), + ('reserved_54', uint32_t), + ('reserved_55', uint32_t), + ('reserved_56', uint32_t), + ('reserved_57', uint32_t), + ('reserved_58', uint32_t), + ('reserved_59', uint32_t), + ('reserved_60', uint32_t), + ('reserved_61', uint32_t), + ('reserved_62', uint32_t), + ('reserved_63', uint32_t), + ('reserved_64', uint32_t), + ('reserved_65', uint32_t), + ('reserved_66', uint32_t), + ('reserved_67', uint32_t), + ('reserved_68', uint32_t), + ('reserved_69', uint32_t), + ('reserved_70', uint32_t), + ('reserved_71', uint32_t), + ('reserved_72', uint32_t), + ('reserved_73', uint32_t), + ('reserved_74', uint32_t), + ('reserved_75', uint32_t), + ('reserved_76', uint32_t), + ('reserved_77', uint32_t), + ('reserved_78', uint32_t), + ('reserved_79', uint32_t), + ('reserved_80', uint32_t), + ('reserved_81', uint32_t), + ('reserved_82', uint32_t), + ('reserved_83', uint32_t), + ('reserved_84', uint32_t), + ('reserved_85', uint32_t), + ('reserved_86', uint32_t), + ('reserved_87', uint32_t), + ('reserved_88', uint32_t), + ('reserved_89', uint32_t), + ('reserved_90', uint32_t), + ('reserved_91', uint32_t), + ('reserved_92', uint32_t), + ('reserved_93', uint32_t), + ('reserved_94', uint32_t), + ('reserved_95', uint32_t), + ('reserved_96', uint32_t), + ('reserved_97', uint32_t), + ('reserved_98', uint32_t), + ('reserved_99', uint32_t), + ('reserved_100', uint32_t), + ('reserved_101', uint32_t), + ('reserved_102', uint32_t), + ('reserved_103', uint32_t), + ('reserved_104', uint32_t), + ('reserved_105', uint32_t), + ('reserved_106', uint32_t), + ('reserved_107', uint32_t), + ('reserved_108', uint32_t), + ('reserved_109', uint32_t), + ('reserved_110', uint32_t), + ('reserved_111', uint32_t), + ('reserved_112', uint32_t), + ('reserved_113', uint32_t), + ('reserved_114', uint32_t), + ('reserved_115', uint32_t), + ('reserved_116', uint32_t), + ('reserved_117', uint32_t), + ('reserved_118', uint32_t), + ('reserved_119', uint32_t), + ('reserved_120', uint32_t), + ('reserved_121', uint32_t), + ('reserved_122', uint32_t), + ('reserved_123', uint32_t), + ('reserved_124', uint32_t), + ('reserved_125', uint32_t), + ('sdma_engine_id', uint32_t), + ('sdma_queue_id', uint32_t), +] +class struct_v9_mqd(Struct): pass +class struct_v9_mqd_0(ctypes.Union): pass +class struct_v9_mqd_0_0(Struct): pass +struct_v9_mqd_0_0._fields_ = [ + ('compute_static_thread_mgmt_se4', uint32_t), + ('compute_static_thread_mgmt_se5', uint32_t), + ('compute_static_thread_mgmt_se6', uint32_t), + ('compute_static_thread_mgmt_se7', uint32_t), +] +class struct_v9_mqd_0_1(Struct): pass +struct_v9_mqd_0_1._fields_ = [ + ('compute_current_logic_xcc_id', uint32_t), + ('compute_restart_cg_tg_id', uint32_t), + ('compute_tg_chunk_size', uint32_t), + ('compute_restore_tg_chunk_size', uint32_t), +] +struct_v9_mqd_0._anonymous_ = ['_0', '_1'] +struct_v9_mqd_0._fields_ = [ + ('_0', struct_v9_mqd_0_0), + ('_1', struct_v9_mqd_0_1), +] +class struct_v9_mqd_1(ctypes.Union): pass +class struct_v9_mqd_1_0(Struct): pass +struct_v9_mqd_1_0._fields_ = [ + ('reserved_225', uint32_t), + ('reserved_226', uint32_t), +] +class struct_v9_mqd_1_1(Struct): pass +struct_v9_mqd_1_1._fields_ = [ + ('pm4_target_xcc_in_xcp', uint32_t), + ('cp_mqd_stride_size', uint32_t), +] +struct_v9_mqd_1._anonymous_ = ['_0', '_1'] +struct_v9_mqd_1._fields_ = [ + ('_0', struct_v9_mqd_1_0), + ('_1', struct_v9_mqd_1_1), +] +struct_v9_mqd._anonymous_ = ['_0', '_1'] +struct_v9_mqd._fields_ = [ + ('header', uint32_t), + ('compute_dispatch_initiator', uint32_t), + ('compute_dim_x', uint32_t), + ('compute_dim_y', uint32_t), + ('compute_dim_z', uint32_t), + ('compute_start_x', uint32_t), + ('compute_start_y', uint32_t), + ('compute_start_z', uint32_t), + ('compute_num_thread_x', uint32_t), + ('compute_num_thread_y', uint32_t), + ('compute_num_thread_z', uint32_t), + ('compute_pipelinestat_enable', uint32_t), + ('compute_perfcount_enable', uint32_t), + ('compute_pgm_lo', uint32_t), + ('compute_pgm_hi', uint32_t), + ('compute_tba_lo', uint32_t), + ('compute_tba_hi', uint32_t), + ('compute_tma_lo', uint32_t), + ('compute_tma_hi', uint32_t), + ('compute_pgm_rsrc1', uint32_t), + ('compute_pgm_rsrc2', uint32_t), + ('compute_vmid', uint32_t), + ('compute_resource_limits', uint32_t), + ('compute_static_thread_mgmt_se0', uint32_t), + ('compute_static_thread_mgmt_se1', uint32_t), + ('compute_tmpring_size', uint32_t), + ('compute_static_thread_mgmt_se2', uint32_t), + ('compute_static_thread_mgmt_se3', uint32_t), + ('compute_restart_x', uint32_t), + ('compute_restart_y', uint32_t), + ('compute_restart_z', uint32_t), + ('compute_thread_trace_enable', uint32_t), + ('compute_misc_reserved', uint32_t), + ('compute_dispatch_id', uint32_t), + ('compute_threadgroup_id', uint32_t), + ('compute_relaunch', uint32_t), + ('compute_wave_restore_addr_lo', uint32_t), + ('compute_wave_restore_addr_hi', uint32_t), + ('compute_wave_restore_control', uint32_t), + ('_0', struct_v9_mqd_0), + ('reserved_43', uint32_t), + ('reserved_44', uint32_t), + ('reserved_45', uint32_t), + ('reserved_46', uint32_t), + ('reserved_47', uint32_t), + ('reserved_48', uint32_t), + ('reserved_49', uint32_t), + ('reserved_50', uint32_t), + ('reserved_51', uint32_t), + ('reserved_52', uint32_t), + ('reserved_53', uint32_t), + ('reserved_54', uint32_t), + ('reserved_55', uint32_t), + ('reserved_56', uint32_t), + ('reserved_57', uint32_t), + ('reserved_58', uint32_t), + ('reserved_59', uint32_t), + ('reserved_60', uint32_t), + ('reserved_61', uint32_t), + ('reserved_62', uint32_t), + ('reserved_63', uint32_t), + ('reserved_64', uint32_t), + ('compute_user_data_0', uint32_t), + ('compute_user_data_1', uint32_t), + ('compute_user_data_2', uint32_t), + ('compute_user_data_3', uint32_t), + ('compute_user_data_4', uint32_t), + ('compute_user_data_5', uint32_t), + ('compute_user_data_6', uint32_t), + ('compute_user_data_7', uint32_t), + ('compute_user_data_8', uint32_t), + ('compute_user_data_9', uint32_t), + ('compute_user_data_10', uint32_t), + ('compute_user_data_11', uint32_t), + ('compute_user_data_12', uint32_t), + ('compute_user_data_13', uint32_t), + ('compute_user_data_14', uint32_t), + ('compute_user_data_15', uint32_t), + ('cp_compute_csinvoc_count_lo', uint32_t), + ('cp_compute_csinvoc_count_hi', uint32_t), + ('reserved_83', uint32_t), + ('reserved_84', uint32_t), + ('reserved_85', uint32_t), + ('cp_mqd_query_time_lo', uint32_t), + ('cp_mqd_query_time_hi', uint32_t), + ('cp_mqd_connect_start_time_lo', uint32_t), + ('cp_mqd_connect_start_time_hi', uint32_t), + ('cp_mqd_connect_end_time_lo', uint32_t), + ('cp_mqd_connect_end_time_hi', uint32_t), + ('cp_mqd_connect_end_wf_count', uint32_t), + ('cp_mqd_connect_end_pq_rptr', uint32_t), + ('cp_mqd_connect_end_pq_wptr', uint32_t), + ('cp_mqd_connect_end_ib_rptr', uint32_t), + ('cp_mqd_readindex_lo', uint32_t), + ('cp_mqd_readindex_hi', uint32_t), + ('cp_mqd_save_start_time_lo', uint32_t), + ('cp_mqd_save_start_time_hi', uint32_t), + ('cp_mqd_save_end_time_lo', uint32_t), + ('cp_mqd_save_end_time_hi', uint32_t), + ('cp_mqd_restore_start_time_lo', uint32_t), + ('cp_mqd_restore_start_time_hi', uint32_t), + ('cp_mqd_restore_end_time_lo', uint32_t), + ('cp_mqd_restore_end_time_hi', uint32_t), + ('disable_queue', uint32_t), + ('reserved_107', uint32_t), + ('gds_cs_ctxsw_cnt0', uint32_t), + ('gds_cs_ctxsw_cnt1', uint32_t), + ('gds_cs_ctxsw_cnt2', uint32_t), + ('gds_cs_ctxsw_cnt3', uint32_t), + ('reserved_112', uint32_t), + ('reserved_113', uint32_t), + ('cp_pq_exe_status_lo', uint32_t), + ('cp_pq_exe_status_hi', uint32_t), + ('cp_packet_id_lo', uint32_t), + ('cp_packet_id_hi', uint32_t), + ('cp_packet_exe_status_lo', uint32_t), + ('cp_packet_exe_status_hi', uint32_t), + ('gds_save_base_addr_lo', uint32_t), + ('gds_save_base_addr_hi', uint32_t), + ('gds_save_mask_lo', uint32_t), + ('gds_save_mask_hi', uint32_t), + ('ctx_save_base_addr_lo', uint32_t), + ('ctx_save_base_addr_hi', uint32_t), + ('dynamic_cu_mask_addr_lo', uint32_t), + ('dynamic_cu_mask_addr_hi', uint32_t), + ('cp_mqd_base_addr_lo', uint32_t), + ('cp_mqd_base_addr_hi', uint32_t), + ('cp_hqd_active', uint32_t), + ('cp_hqd_vmid', uint32_t), + ('cp_hqd_persistent_state', uint32_t), + ('cp_hqd_pipe_priority', uint32_t), + ('cp_hqd_queue_priority', uint32_t), + ('cp_hqd_quantum', uint32_t), + ('cp_hqd_pq_base_lo', uint32_t), + ('cp_hqd_pq_base_hi', uint32_t), + ('cp_hqd_pq_rptr', uint32_t), + ('cp_hqd_pq_rptr_report_addr_lo', uint32_t), + ('cp_hqd_pq_rptr_report_addr_hi', uint32_t), + ('cp_hqd_pq_wptr_poll_addr_lo', uint32_t), + ('cp_hqd_pq_wptr_poll_addr_hi', uint32_t), + ('cp_hqd_pq_doorbell_control', uint32_t), + ('reserved_144', uint32_t), + ('cp_hqd_pq_control', uint32_t), + ('cp_hqd_ib_base_addr_lo', uint32_t), + ('cp_hqd_ib_base_addr_hi', uint32_t), + ('cp_hqd_ib_rptr', uint32_t), + ('cp_hqd_ib_control', uint32_t), + ('cp_hqd_iq_timer', uint32_t), + ('cp_hqd_iq_rptr', uint32_t), + ('cp_hqd_dequeue_request', uint32_t), + ('cp_hqd_dma_offload', uint32_t), + ('cp_hqd_sema_cmd', uint32_t), + ('cp_hqd_msg_type', uint32_t), + ('cp_hqd_atomic0_preop_lo', uint32_t), + ('cp_hqd_atomic0_preop_hi', uint32_t), + ('cp_hqd_atomic1_preop_lo', uint32_t), + ('cp_hqd_atomic1_preop_hi', uint32_t), + ('cp_hqd_hq_status0', uint32_t), + ('cp_hqd_hq_control0', uint32_t), + ('cp_mqd_control', uint32_t), + ('cp_hqd_hq_status1', uint32_t), + ('cp_hqd_hq_control1', uint32_t), + ('cp_hqd_eop_base_addr_lo', uint32_t), + ('cp_hqd_eop_base_addr_hi', uint32_t), + ('cp_hqd_eop_control', uint32_t), + ('cp_hqd_eop_rptr', uint32_t), + ('cp_hqd_eop_wptr', uint32_t), + ('cp_hqd_eop_done_events', uint32_t), + ('cp_hqd_ctx_save_base_addr_lo', uint32_t), + ('cp_hqd_ctx_save_base_addr_hi', uint32_t), + ('cp_hqd_ctx_save_control', uint32_t), + ('cp_hqd_cntl_stack_offset', uint32_t), + ('cp_hqd_cntl_stack_size', uint32_t), + ('cp_hqd_wg_state_offset', uint32_t), + ('cp_hqd_ctx_save_size', uint32_t), + ('cp_hqd_gds_resource_state', uint32_t), + ('cp_hqd_error', uint32_t), + ('cp_hqd_eop_wptr_mem', uint32_t), + ('cp_hqd_aql_control', uint32_t), + ('cp_hqd_pq_wptr_lo', uint32_t), + ('cp_hqd_pq_wptr_hi', uint32_t), + ('reserved_184', uint32_t), + ('reserved_185', uint32_t), + ('reserved_186', uint32_t), + ('reserved_187', uint32_t), + ('reserved_188', uint32_t), + ('reserved_189', uint32_t), + ('reserved_190', uint32_t), + ('reserved_191', uint32_t), + ('iqtimer_pkt_header', uint32_t), + ('iqtimer_pkt_dw0', uint32_t), + ('iqtimer_pkt_dw1', uint32_t), + ('iqtimer_pkt_dw2', uint32_t), + ('iqtimer_pkt_dw3', uint32_t), + ('iqtimer_pkt_dw4', uint32_t), + ('iqtimer_pkt_dw5', uint32_t), + ('iqtimer_pkt_dw6', uint32_t), + ('iqtimer_pkt_dw7', uint32_t), + ('iqtimer_pkt_dw8', uint32_t), + ('iqtimer_pkt_dw9', uint32_t), + ('iqtimer_pkt_dw10', uint32_t), + ('iqtimer_pkt_dw11', uint32_t), + ('iqtimer_pkt_dw12', uint32_t), + ('iqtimer_pkt_dw13', uint32_t), + ('iqtimer_pkt_dw14', uint32_t), + ('iqtimer_pkt_dw15', uint32_t), + ('iqtimer_pkt_dw16', uint32_t), + ('iqtimer_pkt_dw17', uint32_t), + ('iqtimer_pkt_dw18', uint32_t), + ('iqtimer_pkt_dw19', uint32_t), + ('iqtimer_pkt_dw20', uint32_t), + ('iqtimer_pkt_dw21', uint32_t), + ('iqtimer_pkt_dw22', uint32_t), + ('iqtimer_pkt_dw23', uint32_t), + ('iqtimer_pkt_dw24', uint32_t), + ('iqtimer_pkt_dw25', uint32_t), + ('iqtimer_pkt_dw26', uint32_t), + ('iqtimer_pkt_dw27', uint32_t), + ('iqtimer_pkt_dw28', uint32_t), + ('iqtimer_pkt_dw29', uint32_t), + ('iqtimer_pkt_dw30', uint32_t), + ('iqtimer_pkt_dw31', uint32_t), + ('_1', struct_v9_mqd_1), + ('reserved_227', uint32_t), + ('set_resources_header', uint32_t), + ('set_resources_dw1', uint32_t), + ('set_resources_dw2', uint32_t), + ('set_resources_dw3', uint32_t), + ('set_resources_dw4', uint32_t), + ('set_resources_dw5', uint32_t), + ('set_resources_dw6', uint32_t), + ('set_resources_dw7', uint32_t), + ('reserved_236', uint32_t), + ('reserved_237', uint32_t), + ('reserved_238', uint32_t), + ('reserved_239', uint32_t), + ('queue_doorbell_id0', uint32_t), + ('queue_doorbell_id1', uint32_t), + ('queue_doorbell_id2', uint32_t), + ('queue_doorbell_id3', uint32_t), + ('queue_doorbell_id4', uint32_t), + ('queue_doorbell_id5', uint32_t), + ('queue_doorbell_id6', uint32_t), + ('queue_doorbell_id7', uint32_t), + ('queue_doorbell_id8', uint32_t), + ('queue_doorbell_id9', uint32_t), + ('queue_doorbell_id10', uint32_t), + ('queue_doorbell_id11', uint32_t), + ('queue_doorbell_id12', uint32_t), + ('queue_doorbell_id13', uint32_t), + ('queue_doorbell_id14', uint32_t), + ('queue_doorbell_id15', uint32_t), + ('reserved_256', uint32_t), + ('reserved_257', uint32_t), + ('reserved_258', uint32_t), + ('reserved_259', uint32_t), + ('reserved_260', uint32_t), + ('reserved_261', uint32_t), + ('reserved_262', uint32_t), + ('reserved_263', uint32_t), + ('reserved_264', uint32_t), + ('reserved_265', uint32_t), + ('reserved_266', uint32_t), + ('reserved_267', uint32_t), + ('reserved_268', uint32_t), + ('reserved_269', uint32_t), + ('reserved_270', uint32_t), + ('reserved_271', uint32_t), + ('reserved_272', uint32_t), + ('reserved_273', uint32_t), + ('reserved_274', uint32_t), + ('reserved_275', uint32_t), + ('reserved_276', uint32_t), + ('reserved_277', uint32_t), + ('reserved_278', uint32_t), + ('reserved_279', uint32_t), + ('reserved_280', uint32_t), + ('reserved_281', uint32_t), + ('reserved_282', uint32_t), + ('reserved_283', uint32_t), + ('reserved_284', uint32_t), + ('reserved_285', uint32_t), + ('reserved_286', uint32_t), + ('reserved_287', uint32_t), + ('reserved_288', uint32_t), + ('reserved_289', uint32_t), + ('reserved_290', uint32_t), + ('reserved_291', uint32_t), + ('reserved_292', uint32_t), + ('reserved_293', uint32_t), + ('reserved_294', uint32_t), + ('reserved_295', uint32_t), + ('reserved_296', uint32_t), + ('reserved_297', uint32_t), + ('reserved_298', uint32_t), + ('reserved_299', uint32_t), + ('reserved_300', uint32_t), + ('reserved_301', uint32_t), + ('reserved_302', uint32_t), + ('reserved_303', uint32_t), + ('reserved_304', uint32_t), + ('reserved_305', uint32_t), + ('reserved_306', uint32_t), + ('reserved_307', uint32_t), + ('reserved_308', uint32_t), + ('reserved_309', uint32_t), + ('reserved_310', uint32_t), + ('reserved_311', uint32_t), + ('reserved_312', uint32_t), + ('reserved_313', uint32_t), + ('reserved_314', uint32_t), + ('reserved_315', uint32_t), + ('reserved_316', uint32_t), + ('reserved_317', uint32_t), + ('reserved_318', uint32_t), + ('reserved_319', uint32_t), + ('reserved_320', uint32_t), + ('reserved_321', uint32_t), + ('reserved_322', uint32_t), + ('reserved_323', uint32_t), + ('reserved_324', uint32_t), + ('reserved_325', uint32_t), + ('reserved_326', uint32_t), + ('reserved_327', uint32_t), + ('reserved_328', uint32_t), + ('reserved_329', uint32_t), + ('reserved_330', uint32_t), + ('reserved_331', uint32_t), + ('reserved_332', uint32_t), + ('reserved_333', uint32_t), + ('reserved_334', uint32_t), + ('reserved_335', uint32_t), + ('reserved_336', uint32_t), + ('reserved_337', uint32_t), + ('reserved_338', uint32_t), + ('reserved_339', uint32_t), + ('reserved_340', uint32_t), + ('reserved_341', uint32_t), + ('reserved_342', uint32_t), + ('reserved_343', uint32_t), + ('reserved_344', uint32_t), + ('reserved_345', uint32_t), + ('reserved_346', uint32_t), + ('reserved_347', uint32_t), + ('reserved_348', uint32_t), + ('reserved_349', uint32_t), + ('reserved_350', uint32_t), + ('reserved_351', uint32_t), + ('reserved_352', uint32_t), + ('reserved_353', uint32_t), + ('reserved_354', uint32_t), + ('reserved_355', uint32_t), + ('reserved_356', uint32_t), + ('reserved_357', uint32_t), + ('reserved_358', uint32_t), + ('reserved_359', uint32_t), + ('reserved_360', uint32_t), + ('reserved_361', uint32_t), + ('reserved_362', uint32_t), + ('reserved_363', uint32_t), + ('reserved_364', uint32_t), + ('reserved_365', uint32_t), + ('reserved_366', uint32_t), + ('reserved_367', uint32_t), + ('reserved_368', uint32_t), + ('reserved_369', uint32_t), + ('reserved_370', uint32_t), + ('reserved_371', uint32_t), + ('reserved_372', uint32_t), + ('reserved_373', uint32_t), + ('reserved_374', uint32_t), + ('reserved_375', uint32_t), + ('reserved_376', uint32_t), + ('reserved_377', uint32_t), + ('reserved_378', uint32_t), + ('reserved_379', uint32_t), + ('reserved_380', uint32_t), + ('reserved_381', uint32_t), + ('reserved_382', uint32_t), + ('reserved_383', uint32_t), + ('reserved_384', uint32_t), + ('reserved_385', uint32_t), + ('reserved_386', uint32_t), + ('reserved_387', uint32_t), + ('reserved_388', uint32_t), + ('reserved_389', uint32_t), + ('reserved_390', uint32_t), + ('reserved_391', uint32_t), + ('reserved_392', uint32_t), + ('reserved_393', uint32_t), + ('reserved_394', uint32_t), + ('reserved_395', uint32_t), + ('reserved_396', uint32_t), + ('reserved_397', uint32_t), + ('reserved_398', uint32_t), + ('reserved_399', uint32_t), + ('reserved_400', uint32_t), + ('reserved_401', uint32_t), + ('reserved_402', uint32_t), + ('reserved_403', uint32_t), + ('reserved_404', uint32_t), + ('reserved_405', uint32_t), + ('reserved_406', uint32_t), + ('reserved_407', uint32_t), + ('reserved_408', uint32_t), + ('reserved_409', uint32_t), + ('reserved_410', uint32_t), + ('reserved_411', uint32_t), + ('reserved_412', uint32_t), + ('reserved_413', uint32_t), + ('reserved_414', uint32_t), + ('reserved_415', uint32_t), + ('reserved_416', uint32_t), + ('reserved_417', uint32_t), + ('reserved_418', uint32_t), + ('reserved_419', uint32_t), + ('reserved_420', uint32_t), + ('reserved_421', uint32_t), + ('reserved_422', uint32_t), + ('reserved_423', uint32_t), + ('reserved_424', uint32_t), + ('reserved_425', uint32_t), + ('reserved_426', uint32_t), + ('reserved_427', uint32_t), + ('reserved_428', uint32_t), + ('reserved_429', uint32_t), + ('reserved_430', uint32_t), + ('reserved_431', uint32_t), + ('reserved_432', uint32_t), + ('reserved_433', uint32_t), + ('reserved_434', uint32_t), + ('reserved_435', uint32_t), + ('reserved_436', uint32_t), + ('reserved_437', uint32_t), + ('reserved_438', uint32_t), + ('reserved_439', uint32_t), + ('reserved_440', uint32_t), + ('reserved_441', uint32_t), + ('reserved_442', uint32_t), + ('reserved_443', uint32_t), + ('reserved_444', uint32_t), + ('reserved_445', uint32_t), + ('reserved_446', uint32_t), + ('reserved_447', uint32_t), + ('reserved_448', uint32_t), + ('reserved_449', uint32_t), + ('reserved_450', uint32_t), + ('reserved_451', uint32_t), + ('reserved_452', uint32_t), + ('reserved_453', uint32_t), + ('reserved_454', uint32_t), + ('reserved_455', uint32_t), + ('reserved_456', uint32_t), + ('reserved_457', uint32_t), + ('reserved_458', uint32_t), + ('reserved_459', uint32_t), + ('reserved_460', uint32_t), + ('reserved_461', uint32_t), + ('reserved_462', uint32_t), + ('reserved_463', uint32_t), + ('reserved_464', uint32_t), + ('reserved_465', uint32_t), + ('reserved_466', uint32_t), + ('reserved_467', uint32_t), + ('reserved_468', uint32_t), + ('reserved_469', uint32_t), + ('reserved_470', uint32_t), + ('reserved_471', uint32_t), + ('reserved_472', uint32_t), + ('reserved_473', uint32_t), + ('reserved_474', uint32_t), + ('reserved_475', uint32_t), + ('reserved_476', uint32_t), + ('reserved_477', uint32_t), + ('reserved_478', uint32_t), + ('reserved_479', uint32_t), + ('reserved_480', uint32_t), + ('reserved_481', uint32_t), + ('reserved_482', uint32_t), + ('reserved_483', uint32_t), + ('reserved_484', uint32_t), + ('reserved_485', uint32_t), + ('reserved_486', uint32_t), + ('reserved_487', uint32_t), + ('reserved_488', uint32_t), + ('reserved_489', uint32_t), + ('reserved_490', uint32_t), + ('reserved_491', uint32_t), + ('reserved_492', uint32_t), + ('reserved_493', uint32_t), + ('reserved_494', uint32_t), + ('reserved_495', uint32_t), + ('reserved_496', uint32_t), + ('reserved_497', uint32_t), + ('reserved_498', uint32_t), + ('reserved_499', uint32_t), + ('reserved_500', uint32_t), + ('reserved_501', uint32_t), + ('reserved_502', uint32_t), + ('reserved_503', uint32_t), + ('reserved_504', uint32_t), + ('reserved_505', uint32_t), + ('reserved_506', uint32_t), + ('reserved_507', uint32_t), + ('reserved_508', uint32_t), + ('reserved_509', uint32_t), + ('reserved_510', uint32_t), + ('reserved_511', uint32_t), +] +class struct_v9_mqd_allocation(Struct): pass +struct_v9_mqd_allocation._fields_ = [ + ('mqd', struct_v9_mqd), + ('wptr_poll_mem', uint32_t), + ('rptr_report_mem', uint32_t), + ('dynamic_cu_mask', uint32_t), + ('dynamic_rb_mask', uint32_t), +] +class struct_v9_ce_ib_state(Struct): pass +struct_v9_ce_ib_state._fields_ = [ + ('ce_ib_completion_status', uint32_t), + ('ce_constegnine_count', uint32_t), + ('ce_ibOffset_ib1', uint32_t), + ('ce_ibOffset_ib2', uint32_t), + ('ce_chainib_addrlo_ib1', uint32_t), + ('ce_chainib_addrlo_ib2', uint32_t), + ('ce_chainib_addrhi_ib1', uint32_t), + ('ce_chainib_addrhi_ib2', uint32_t), + ('ce_chainib_size_ib1', uint32_t), + ('ce_chainib_size_ib2', uint32_t), +] +class struct_v9_de_ib_state(Struct): pass +struct_v9_de_ib_state._fields_ = [ + ('ib_completion_status', uint32_t), + ('de_constEngine_count', uint32_t), + ('ib_offset_ib1', uint32_t), + ('ib_offset_ib2', uint32_t), + ('chain_ib_addrlo_ib1', uint32_t), + ('chain_ib_addrlo_ib2', uint32_t), + ('chain_ib_addrhi_ib1', uint32_t), + ('chain_ib_addrhi_ib2', uint32_t), + ('chain_ib_size_ib1', uint32_t), + ('chain_ib_size_ib2', uint32_t), + ('preamble_begin_ib1', uint32_t), + ('preamble_begin_ib2', uint32_t), + ('preamble_end_ib1', uint32_t), + ('preamble_end_ib2', uint32_t), + ('chain_ib_pream_addrlo_ib1', uint32_t), + ('chain_ib_pream_addrlo_ib2', uint32_t), + ('chain_ib_pream_addrhi_ib1', uint32_t), + ('chain_ib_pream_addrhi_ib2', uint32_t), + ('draw_indirect_baseLo', uint32_t), + ('draw_indirect_baseHi', uint32_t), + ('disp_indirect_baseLo', uint32_t), + ('disp_indirect_baseHi', uint32_t), + ('gds_backup_addrlo', uint32_t), + ('gds_backup_addrhi', uint32_t), + ('index_base_addrlo', uint32_t), + ('index_base_addrhi', uint32_t), + ('sample_cntl', uint32_t), +] +class struct_v9_gfx_meta_data(Struct): pass +struct_v9_gfx_meta_data._fields_ = [ + ('ce_payload', struct_v9_ce_ib_state), + ('reserved1', (uint32_t * 54)), + ('de_payload', struct_v9_de_ib_state), + ('DeIbBaseAddrLo', uint32_t), + ('DeIbBaseAddrHi', uint32_t), + ('reserved2', (uint32_t * 931)), +] enum_soc15_ih_clientid = CEnum(ctypes.c_uint32) SOC15_IH_CLIENTID_IH = enum_soc15_ih_clientid.define('SOC15_IH_CLIENTID_IH', 0) SOC15_IH_CLIENTID_ACP = enum_soc15_ih_clientid.define('SOC15_IH_CLIENTID_ACP', 1) diff --git a/tinygrad/runtime/autogen/am/smu_v13_0_6.py b/tinygrad/runtime/autogen/am/smu_v13_0_6.py new file mode 100644 index 0000000000..6b8f2dc169 --- /dev/null +++ b/tinygrad/runtime/autogen/am/smu_v13_0_6.py @@ -0,0 +1,531 @@ +# mypy: ignore-errors +import ctypes +from tinygrad.helpers import unwrap +from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR +PPSMC_Result = ctypes.c_uint32 +PPSMC_MSG = ctypes.c_uint32 +I2cControllerPort_e = CEnum(ctypes.c_uint32) +I2C_CONTROLLER_PORT_0 = I2cControllerPort_e.define('I2C_CONTROLLER_PORT_0', 0) +I2C_CONTROLLER_PORT_1 = I2cControllerPort_e.define('I2C_CONTROLLER_PORT_1', 1) +I2C_CONTROLLER_PORT_COUNT = I2cControllerPort_e.define('I2C_CONTROLLER_PORT_COUNT', 2) + +I2cSpeed_e = CEnum(ctypes.c_uint32) +UNSUPPORTED_1 = I2cSpeed_e.define('UNSUPPORTED_1', 0) +I2C_SPEED_STANDARD_100K = I2cSpeed_e.define('I2C_SPEED_STANDARD_100K', 1) +I2C_SPEED_FAST_400K = I2cSpeed_e.define('I2C_SPEED_FAST_400K', 2) +I2C_SPEED_FAST_PLUS_1M = I2cSpeed_e.define('I2C_SPEED_FAST_PLUS_1M', 3) +UNSUPPORTED_2 = I2cSpeed_e.define('UNSUPPORTED_2', 4) +UNSUPPORTED_3 = I2cSpeed_e.define('UNSUPPORTED_3', 5) +I2C_SPEED_COUNT = I2cSpeed_e.define('I2C_SPEED_COUNT', 6) + +I2cCmdType_e = CEnum(ctypes.c_uint32) +I2C_CMD_READ = I2cCmdType_e.define('I2C_CMD_READ', 0) +I2C_CMD_WRITE = I2cCmdType_e.define('I2C_CMD_WRITE', 1) +I2C_CMD_COUNT = I2cCmdType_e.define('I2C_CMD_COUNT', 2) + +ERR_CODE_e = CEnum(ctypes.c_uint32) +CODE_DAGB0 = ERR_CODE_e.define('CODE_DAGB0', 0) +CODE_EA0 = ERR_CODE_e.define('CODE_EA0', 5) +CODE_UTCL2_ROUTER = ERR_CODE_e.define('CODE_UTCL2_ROUTER', 10) +CODE_VML2 = ERR_CODE_e.define('CODE_VML2', 11) +CODE_VML2_WALKER = ERR_CODE_e.define('CODE_VML2_WALKER', 12) +CODE_MMCANE = ERR_CODE_e.define('CODE_MMCANE', 13) +CODE_VIDD = ERR_CODE_e.define('CODE_VIDD', 14) +CODE_VIDV = ERR_CODE_e.define('CODE_VIDV', 15) +CODE_JPEG0S = ERR_CODE_e.define('CODE_JPEG0S', 16) +CODE_JPEG0D = ERR_CODE_e.define('CODE_JPEG0D', 17) +CODE_JPEG1S = ERR_CODE_e.define('CODE_JPEG1S', 18) +CODE_JPEG1D = ERR_CODE_e.define('CODE_JPEG1D', 19) +CODE_JPEG2S = ERR_CODE_e.define('CODE_JPEG2S', 20) +CODE_JPEG2D = ERR_CODE_e.define('CODE_JPEG2D', 21) +CODE_JPEG3S = ERR_CODE_e.define('CODE_JPEG3S', 22) +CODE_JPEG3D = ERR_CODE_e.define('CODE_JPEG3D', 23) +CODE_JPEG4S = ERR_CODE_e.define('CODE_JPEG4S', 24) +CODE_JPEG4D = ERR_CODE_e.define('CODE_JPEG4D', 25) +CODE_JPEG5S = ERR_CODE_e.define('CODE_JPEG5S', 26) +CODE_JPEG5D = ERR_CODE_e.define('CODE_JPEG5D', 27) +CODE_JPEG6S = ERR_CODE_e.define('CODE_JPEG6S', 28) +CODE_JPEG6D = ERR_CODE_e.define('CODE_JPEG6D', 29) +CODE_JPEG7S = ERR_CODE_e.define('CODE_JPEG7S', 30) +CODE_JPEG7D = ERR_CODE_e.define('CODE_JPEG7D', 31) +CODE_MMSCHD = ERR_CODE_e.define('CODE_MMSCHD', 32) +CODE_SDMA0 = ERR_CODE_e.define('CODE_SDMA0', 33) +CODE_SDMA1 = ERR_CODE_e.define('CODE_SDMA1', 34) +CODE_SDMA2 = ERR_CODE_e.define('CODE_SDMA2', 35) +CODE_SDMA3 = ERR_CODE_e.define('CODE_SDMA3', 36) +CODE_HDP = ERR_CODE_e.define('CODE_HDP', 37) +CODE_ATHUB = ERR_CODE_e.define('CODE_ATHUB', 38) +CODE_IH = ERR_CODE_e.define('CODE_IH', 39) +CODE_XHUB_POISON = ERR_CODE_e.define('CODE_XHUB_POISON', 40) +CODE_SMN_SLVERR = ERR_CODE_e.define('CODE_SMN_SLVERR', 40) +CODE_WDT = ERR_CODE_e.define('CODE_WDT', 41) +CODE_UNKNOWN = ERR_CODE_e.define('CODE_UNKNOWN', 42) +CODE_COUNT = ERR_CODE_e.define('CODE_COUNT', 43) + +GC_ERROR_CODE_e = CEnum(ctypes.c_uint32) +SH_FED_CODE = GC_ERROR_CODE_e.define('SH_FED_CODE', 0) +GCEA_CODE = GC_ERROR_CODE_e.define('GCEA_CODE', 1) +SQ_CODE = GC_ERROR_CODE_e.define('SQ_CODE', 2) +LDS_CODE = GC_ERROR_CODE_e.define('LDS_CODE', 3) +GDS_CODE = GC_ERROR_CODE_e.define('GDS_CODE', 4) +SP0_CODE = GC_ERROR_CODE_e.define('SP0_CODE', 5) +SP1_CODE = GC_ERROR_CODE_e.define('SP1_CODE', 6) +TCC_CODE = GC_ERROR_CODE_e.define('TCC_CODE', 7) +TCA_CODE = GC_ERROR_CODE_e.define('TCA_CODE', 8) +TCX_CODE = GC_ERROR_CODE_e.define('TCX_CODE', 9) +CPC_CODE = GC_ERROR_CODE_e.define('CPC_CODE', 10) +CPF_CODE = GC_ERROR_CODE_e.define('CPF_CODE', 11) +CPG_CODE = GC_ERROR_CODE_e.define('CPG_CODE', 12) +SPI_CODE = GC_ERROR_CODE_e.define('SPI_CODE', 13) +RLC_CODE = GC_ERROR_CODE_e.define('RLC_CODE', 14) +SQC_CODE = GC_ERROR_CODE_e.define('SQC_CODE', 15) +TA_CODE = GC_ERROR_CODE_e.define('TA_CODE', 16) +TD_CODE = GC_ERROR_CODE_e.define('TD_CODE', 17) +TCP_CODE = GC_ERROR_CODE_e.define('TCP_CODE', 18) +TCI_CODE = GC_ERROR_CODE_e.define('TCI_CODE', 19) +GC_ROUTER_CODE = GC_ERROR_CODE_e.define('GC_ROUTER_CODE', 20) +VML2_CODE = GC_ERROR_CODE_e.define('VML2_CODE', 21) +VML2_WALKER_CODE = GC_ERROR_CODE_e.define('VML2_WALKER_CODE', 22) +ATCL2_CODE = GC_ERROR_CODE_e.define('ATCL2_CODE', 23) +GC_CANE_CODE = GC_ERROR_CODE_e.define('GC_CANE_CODE', 24) +MP5_CODE_SMN_SLVERR = GC_ERROR_CODE_e.define('MP5_CODE_SMN_SLVERR', 40) +MP5_CODE_UNKNOWN = GC_ERROR_CODE_e.define('MP5_CODE_UNKNOWN', 42) + +class SwI2cCmd_t(Struct): pass +uint8_t = ctypes.c_ubyte +SwI2cCmd_t._fields_ = [ + ('ReadWriteData', uint8_t), + ('CmdConfig', uint8_t), +] +class SwI2cRequest_t(Struct): pass +SwI2cRequest_t._fields_ = [ + ('I2CcontrollerPort', uint8_t), + ('I2CSpeed', uint8_t), + ('SlaveAddress', uint8_t), + ('NumCmds', uint8_t), + ('SwI2cCmds', (SwI2cCmd_t * 24)), +] +class SwI2cRequestExternal_t(Struct): pass +uint32_t = ctypes.c_uint32 +SwI2cRequestExternal_t._fields_ = [ + ('SwI2cRequest', SwI2cRequest_t), + ('Spare', (uint32_t * 8)), + ('MmHubPadding', (uint32_t * 8)), +] +PPCLK_e = CEnum(ctypes.c_uint32) +PPCLK_VCLK = PPCLK_e.define('PPCLK_VCLK', 0) +PPCLK_DCLK = PPCLK_e.define('PPCLK_DCLK', 1) +PPCLK_SOCCLK = PPCLK_e.define('PPCLK_SOCCLK', 2) +PPCLK_UCLK = PPCLK_e.define('PPCLK_UCLK', 3) +PPCLK_FCLK = PPCLK_e.define('PPCLK_FCLK', 4) +PPCLK_LCLK = PPCLK_e.define('PPCLK_LCLK', 5) +PPCLK_COUNT = PPCLK_e.define('PPCLK_COUNT', 6) + +GpioIntPolarity_e = CEnum(ctypes.c_uint32) +GPIO_INT_POLARITY_ACTIVE_LOW = GpioIntPolarity_e.define('GPIO_INT_POLARITY_ACTIVE_LOW', 0) +GPIO_INT_POLARITY_ACTIVE_HIGH = GpioIntPolarity_e.define('GPIO_INT_POLARITY_ACTIVE_HIGH', 1) + +UCLK_DPM_MODE_e = CEnum(ctypes.c_uint32) +UCLK_DPM_MODE_BANDWIDTH = UCLK_DPM_MODE_e.define('UCLK_DPM_MODE_BANDWIDTH', 0) +UCLK_DPM_MODE_LATENCY = UCLK_DPM_MODE_e.define('UCLK_DPM_MODE_LATENCY', 1) + +class AvfsDebugTableAid_t(Struct): pass +uint16_t = ctypes.c_uint16 +AvfsDebugTableAid_t._fields_ = [ + ('avgPsmCount', (uint16_t * 30)), + ('minPsmCount', (uint16_t * 30)), + ('avgPsmVoltage', (ctypes.c_float * 30)), + ('minPsmVoltage', (ctypes.c_float * 30)), +] +class AvfsDebugTableXcd_t(Struct): pass +AvfsDebugTableXcd_t._fields_ = [ + ('avgPsmCount', (uint16_t * 30)), + ('minPsmCount', (uint16_t * 30)), + ('avgPsmVoltage', (ctypes.c_float * 30)), + ('minPsmVoltage', (ctypes.c_float * 30)), +] +class struct_smu_hw_power_state(Struct): pass +struct_smu_hw_power_state._fields_ = [ + ('magic', ctypes.c_uint32), +] +class struct_smu_power_state(Struct): pass +enum_smu_state_ui_label = CEnum(ctypes.c_uint32) +SMU_STATE_UI_LABEL_NONE = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_NONE', 0) +SMU_STATE_UI_LABEL_BATTERY = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_BATTERY', 1) +SMU_STATE_UI_TABEL_MIDDLE_LOW = enum_smu_state_ui_label.define('SMU_STATE_UI_TABEL_MIDDLE_LOW', 2) +SMU_STATE_UI_LABEL_BALLANCED = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_BALLANCED', 3) +SMU_STATE_UI_LABEL_MIDDLE_HIGHT = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_MIDDLE_HIGHT', 4) +SMU_STATE_UI_LABEL_PERFORMANCE = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_PERFORMANCE', 5) +SMU_STATE_UI_LABEL_BACO = enum_smu_state_ui_label.define('SMU_STATE_UI_LABEL_BACO', 6) + +enum_smu_state_classification_flag = CEnum(ctypes.c_uint32) +SMU_STATE_CLASSIFICATION_FLAG_BOOT = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_BOOT', 1) +SMU_STATE_CLASSIFICATION_FLAG_THERMAL = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_THERMAL', 2) +SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE', 4) +SMU_STATE_CLASSIFICATION_FLAG_RESET = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_RESET', 8) +SMU_STATE_CLASSIFICATION_FLAG_FORCED = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_FORCED', 16) +SMU_STATE_CLASSIFICATION_FLAG_USER_3D_PERFORMANCE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_USER_3D_PERFORMANCE', 32) +SMU_STATE_CLASSIFICATION_FLAG_USER_2D_PERFORMANCE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_USER_2D_PERFORMANCE', 64) +SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE', 128) +SMU_STATE_CLASSIFICATION_FLAG_AC_OVERDIRVER_TEMPLATE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_AC_OVERDIRVER_TEMPLATE', 256) +SMU_STATE_CLASSIFICATION_FLAG_UVD = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_UVD', 512) +SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE_LOW = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE_LOW', 1024) +SMU_STATE_CLASSIFICATION_FLAG_ACPI = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_ACPI', 2048) +SMU_STATE_CLASSIFICATION_FLAG_HD2 = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_HD2', 4096) +SMU_STATE_CLASSIFICATION_FLAG_UVD_HD = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_UVD_HD', 8192) +SMU_STATE_CLASSIFICATION_FLAG_UVD_SD = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_UVD_SD', 16384) +SMU_STATE_CLASSIFICATION_FLAG_USER_DC_PERFORMANCE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_USER_DC_PERFORMANCE', 32768) +SMU_STATE_CLASSIFICATION_FLAG_DC_OVERDIRVER_TEMPLATE = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_DC_OVERDIRVER_TEMPLATE', 65536) +SMU_STATE_CLASSIFICATION_FLAG_BACO = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_BACO', 131072) +SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE2 = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE2', 262144) +SMU_STATE_CLASSIFICATION_FLAG_ULV = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_ULV', 524288) +SMU_STATE_CLASSIFICATION_FLAG_UVD_MVC = enum_smu_state_classification_flag.define('SMU_STATE_CLASSIFICATION_FLAG_UVD_MVC', 1048576) + +class struct_smu_state_classification_block(Struct): pass +struct_smu_state_classification_block._fields_ = [ + ('ui_label', enum_smu_state_ui_label), + ('flags', enum_smu_state_classification_flag), + ('bios_index', ctypes.c_int32), + ('temporary_state', ctypes.c_bool), + ('to_be_deleted', ctypes.c_bool), +] +class struct_smu_state_pcie_block(Struct): pass +struct_smu_state_pcie_block._fields_ = [ + ('lanes', ctypes.c_uint32), +] +enum_smu_refreshrate_source = CEnum(ctypes.c_uint32) +SMU_REFRESHRATE_SOURCE_EDID = enum_smu_refreshrate_source.define('SMU_REFRESHRATE_SOURCE_EDID', 0) +SMU_REFRESHRATE_SOURCE_EXPLICIT = enum_smu_refreshrate_source.define('SMU_REFRESHRATE_SOURCE_EXPLICIT', 1) + +class struct_smu_state_display_block(Struct): pass +struct_smu_state_display_block._fields_ = [ + ('disable_frame_modulation', ctypes.c_bool), + ('limit_refreshrate', ctypes.c_bool), + ('refreshrate_source', enum_smu_refreshrate_source), + ('explicit_refreshrate', ctypes.c_int32), + ('edid_refreshrate_index', ctypes.c_int32), + ('enable_vari_bright', ctypes.c_bool), +] +class struct_smu_state_memory_block(Struct): pass +struct_smu_state_memory_block._fields_ = [ + ('dll_off', ctypes.c_bool), + ('m3arb', ctypes.c_ubyte), + ('unused', (ctypes.c_ubyte * 3)), +] +class struct_smu_state_software_algorithm_block(Struct): pass +struct_smu_state_software_algorithm_block._fields_ = [ + ('disable_load_balancing', ctypes.c_bool), + ('enable_sleep_for_timestamps', ctypes.c_bool), +] +class struct_smu_temperature_range(Struct): pass +struct_smu_temperature_range._fields_ = [ + ('min', ctypes.c_int32), + ('max', ctypes.c_int32), + ('edge_emergency_max', ctypes.c_int32), + ('hotspot_min', ctypes.c_int32), + ('hotspot_crit_max', ctypes.c_int32), + ('hotspot_emergency_max', ctypes.c_int32), + ('mem_min', ctypes.c_int32), + ('mem_crit_max', ctypes.c_int32), + ('mem_emergency_max', ctypes.c_int32), + ('software_shutdown_temp', ctypes.c_int32), + ('software_shutdown_temp_offset', ctypes.c_int32), +] +class struct_smu_state_validation_block(Struct): pass +struct_smu_state_validation_block._fields_ = [ + ('single_display_only', ctypes.c_bool), + ('disallow_on_dc', ctypes.c_bool), + ('supported_power_levels', ctypes.c_ubyte), +] +class struct_smu_uvd_clocks(Struct): pass +struct_smu_uvd_clocks._fields_ = [ + ('vclk', ctypes.c_uint32), + ('dclk', ctypes.c_uint32), +] +enum_smu_power_src_type = CEnum(ctypes.c_uint32) +SMU_POWER_SOURCE_AC = enum_smu_power_src_type.define('SMU_POWER_SOURCE_AC', 0) +SMU_POWER_SOURCE_DC = enum_smu_power_src_type.define('SMU_POWER_SOURCE_DC', 1) +SMU_POWER_SOURCE_COUNT = enum_smu_power_src_type.define('SMU_POWER_SOURCE_COUNT', 2) + +enum_smu_ppt_limit_type = CEnum(ctypes.c_uint32) +SMU_DEFAULT_PPT_LIMIT = enum_smu_ppt_limit_type.define('SMU_DEFAULT_PPT_LIMIT', 0) +SMU_FAST_PPT_LIMIT = enum_smu_ppt_limit_type.define('SMU_FAST_PPT_LIMIT', 1) + +enum_smu_ppt_limit_level = CEnum(ctypes.c_int32) +SMU_PPT_LIMIT_MIN = enum_smu_ppt_limit_level.define('SMU_PPT_LIMIT_MIN', -1) +SMU_PPT_LIMIT_CURRENT = enum_smu_ppt_limit_level.define('SMU_PPT_LIMIT_CURRENT', 0) +SMU_PPT_LIMIT_DEFAULT = enum_smu_ppt_limit_level.define('SMU_PPT_LIMIT_DEFAULT', 1) +SMU_PPT_LIMIT_MAX = enum_smu_ppt_limit_level.define('SMU_PPT_LIMIT_MAX', 2) + +enum_smu_memory_pool_size = CEnum(ctypes.c_uint32) +SMU_MEMORY_POOL_SIZE_ZERO = enum_smu_memory_pool_size.define('SMU_MEMORY_POOL_SIZE_ZERO', 0) +SMU_MEMORY_POOL_SIZE_256_MB = enum_smu_memory_pool_size.define('SMU_MEMORY_POOL_SIZE_256_MB', 268435456) +SMU_MEMORY_POOL_SIZE_512_MB = enum_smu_memory_pool_size.define('SMU_MEMORY_POOL_SIZE_512_MB', 536870912) +SMU_MEMORY_POOL_SIZE_1_GB = enum_smu_memory_pool_size.define('SMU_MEMORY_POOL_SIZE_1_GB', 1073741824) +SMU_MEMORY_POOL_SIZE_2_GB = enum_smu_memory_pool_size.define('SMU_MEMORY_POOL_SIZE_2_GB', 2147483648) + +enum_smu_clk_type = CEnum(ctypes.c_uint32) +SMU_GFXCLK = enum_smu_clk_type.define('SMU_GFXCLK', 0) +SMU_VCLK = enum_smu_clk_type.define('SMU_VCLK', 1) +SMU_DCLK = enum_smu_clk_type.define('SMU_DCLK', 2) +SMU_VCLK1 = enum_smu_clk_type.define('SMU_VCLK1', 3) +SMU_DCLK1 = enum_smu_clk_type.define('SMU_DCLK1', 4) +SMU_ECLK = enum_smu_clk_type.define('SMU_ECLK', 5) +SMU_SOCCLK = enum_smu_clk_type.define('SMU_SOCCLK', 6) +SMU_UCLK = enum_smu_clk_type.define('SMU_UCLK', 7) +SMU_DCEFCLK = enum_smu_clk_type.define('SMU_DCEFCLK', 8) +SMU_DISPCLK = enum_smu_clk_type.define('SMU_DISPCLK', 9) +SMU_PIXCLK = enum_smu_clk_type.define('SMU_PIXCLK', 10) +SMU_PHYCLK = enum_smu_clk_type.define('SMU_PHYCLK', 11) +SMU_FCLK = enum_smu_clk_type.define('SMU_FCLK', 12) +SMU_SCLK = enum_smu_clk_type.define('SMU_SCLK', 13) +SMU_MCLK = enum_smu_clk_type.define('SMU_MCLK', 14) +SMU_PCIE = enum_smu_clk_type.define('SMU_PCIE', 15) +SMU_LCLK = enum_smu_clk_type.define('SMU_LCLK', 16) +SMU_OD_CCLK = enum_smu_clk_type.define('SMU_OD_CCLK', 17) +SMU_OD_SCLK = enum_smu_clk_type.define('SMU_OD_SCLK', 18) +SMU_OD_MCLK = enum_smu_clk_type.define('SMU_OD_MCLK', 19) +SMU_OD_VDDC_CURVE = enum_smu_clk_type.define('SMU_OD_VDDC_CURVE', 20) +SMU_OD_RANGE = enum_smu_clk_type.define('SMU_OD_RANGE', 21) +SMU_OD_VDDGFX_OFFSET = enum_smu_clk_type.define('SMU_OD_VDDGFX_OFFSET', 22) +SMU_OD_FAN_CURVE = enum_smu_clk_type.define('SMU_OD_FAN_CURVE', 23) +SMU_OD_ACOUSTIC_LIMIT = enum_smu_clk_type.define('SMU_OD_ACOUSTIC_LIMIT', 24) +SMU_OD_ACOUSTIC_TARGET = enum_smu_clk_type.define('SMU_OD_ACOUSTIC_TARGET', 25) +SMU_OD_FAN_TARGET_TEMPERATURE = enum_smu_clk_type.define('SMU_OD_FAN_TARGET_TEMPERATURE', 26) +SMU_OD_FAN_MINIMUM_PWM = enum_smu_clk_type.define('SMU_OD_FAN_MINIMUM_PWM', 27) +SMU_CLK_COUNT = enum_smu_clk_type.define('SMU_CLK_COUNT', 28) + +class struct_smu_user_dpm_profile(Struct): pass +struct_smu_user_dpm_profile._fields_ = [ + ('fan_mode', ctypes.c_uint32), + ('power_limit', ctypes.c_uint32), + ('fan_speed_pwm', ctypes.c_uint32), + ('fan_speed_rpm', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('user_od', ctypes.c_uint32), + ('clk_mask', (ctypes.c_uint32 * 28)), + ('clk_dependency', ctypes.c_uint32), +] +class struct_smu_table(Struct): pass +class struct_amdgpu_bo(Struct): pass +struct_smu_table._fields_ = [ + ('size', ctypes.c_uint64), + ('align', ctypes.c_uint32), + ('domain', ctypes.c_ubyte), + ('mc_address', ctypes.c_uint64), + ('cpu_addr', ctypes.c_void_p), + ('bo', ctypes.POINTER(struct_amdgpu_bo)), + ('version', ctypes.c_uint32), +] +enum_smu_perf_level_designation = CEnum(ctypes.c_uint32) +PERF_LEVEL_ACTIVITY = enum_smu_perf_level_designation.define('PERF_LEVEL_ACTIVITY', 0) +PERF_LEVEL_POWER_CONTAINMENT = enum_smu_perf_level_designation.define('PERF_LEVEL_POWER_CONTAINMENT', 1) + +class struct_smu_performance_level(Struct): pass +struct_smu_performance_level._fields_ = [ + ('core_clock', ctypes.c_uint32), + ('memory_clock', ctypes.c_uint32), + ('vddc', ctypes.c_uint32), + ('vddci', ctypes.c_uint32), + ('non_local_mem_freq', ctypes.c_uint32), + ('non_local_mem_width', ctypes.c_uint32), +] +class struct_smu_clock_info(Struct): pass +struct_smu_clock_info._fields_ = [ + ('min_mem_clk', ctypes.c_uint32), + ('max_mem_clk', ctypes.c_uint32), + ('min_eng_clk', ctypes.c_uint32), + ('max_eng_clk', ctypes.c_uint32), + ('min_bus_bandwidth', ctypes.c_uint32), + ('max_bus_bandwidth', ctypes.c_uint32), +] +class struct_smu_bios_boot_up_values(Struct): pass +struct_smu_bios_boot_up_values._fields_ = [ + ('revision', ctypes.c_uint32), + ('gfxclk', ctypes.c_uint32), + ('uclk', ctypes.c_uint32), + ('socclk', ctypes.c_uint32), + ('dcefclk', ctypes.c_uint32), + ('eclk', ctypes.c_uint32), + ('vclk', ctypes.c_uint32), + ('dclk', ctypes.c_uint32), + ('vddc', ctypes.c_uint16), + ('vddci', ctypes.c_uint16), + ('mvddc', ctypes.c_uint16), + ('vdd_gfx', ctypes.c_uint16), + ('cooling_id', ctypes.c_ubyte), + ('pp_table_id', ctypes.c_uint32), + ('format_revision', ctypes.c_uint32), + ('content_revision', ctypes.c_uint32), + ('fclk', ctypes.c_uint32), + ('lclk', ctypes.c_uint32), + ('firmware_caps', ctypes.c_uint32), +] +enum_smu_table_id = CEnum(ctypes.c_uint32) +SMU_TABLE_PPTABLE = enum_smu_table_id.define('SMU_TABLE_PPTABLE', 0) +SMU_TABLE_WATERMARKS = enum_smu_table_id.define('SMU_TABLE_WATERMARKS', 1) +SMU_TABLE_CUSTOM_DPM = enum_smu_table_id.define('SMU_TABLE_CUSTOM_DPM', 2) +SMU_TABLE_DPMCLOCKS = enum_smu_table_id.define('SMU_TABLE_DPMCLOCKS', 3) +SMU_TABLE_AVFS = enum_smu_table_id.define('SMU_TABLE_AVFS', 4) +SMU_TABLE_AVFS_PSM_DEBUG = enum_smu_table_id.define('SMU_TABLE_AVFS_PSM_DEBUG', 5) +SMU_TABLE_AVFS_FUSE_OVERRIDE = enum_smu_table_id.define('SMU_TABLE_AVFS_FUSE_OVERRIDE', 6) +SMU_TABLE_PMSTATUSLOG = enum_smu_table_id.define('SMU_TABLE_PMSTATUSLOG', 7) +SMU_TABLE_SMU_METRICS = enum_smu_table_id.define('SMU_TABLE_SMU_METRICS', 8) +SMU_TABLE_DRIVER_SMU_CONFIG = enum_smu_table_id.define('SMU_TABLE_DRIVER_SMU_CONFIG', 9) +SMU_TABLE_ACTIVITY_MONITOR_COEFF = enum_smu_table_id.define('SMU_TABLE_ACTIVITY_MONITOR_COEFF', 10) +SMU_TABLE_OVERDRIVE = enum_smu_table_id.define('SMU_TABLE_OVERDRIVE', 11) +SMU_TABLE_I2C_COMMANDS = enum_smu_table_id.define('SMU_TABLE_I2C_COMMANDS', 12) +SMU_TABLE_PACE = enum_smu_table_id.define('SMU_TABLE_PACE', 13) +SMU_TABLE_ECCINFO = enum_smu_table_id.define('SMU_TABLE_ECCINFO', 14) +SMU_TABLE_COMBO_PPTABLE = enum_smu_table_id.define('SMU_TABLE_COMBO_PPTABLE', 15) +SMU_TABLE_WIFIBAND = enum_smu_table_id.define('SMU_TABLE_WIFIBAND', 16) +SMU_TABLE_COUNT = enum_smu_table_id.define('SMU_TABLE_COUNT', 17) + +PPSMC_Result_OK = 0x1 +PPSMC_Result_Failed = 0xFF +PPSMC_Result_UnknownCmd = 0xFE +PPSMC_Result_CmdRejectedPrereq = 0xFD +PPSMC_Result_CmdRejectedBusy = 0xFC +PPSMC_MSG_TestMessage = 0x1 +PPSMC_MSG_GetSmuVersion = 0x2 +PPSMC_MSG_GfxDriverReset = 0x3 +PPSMC_MSG_GetDriverIfVersion = 0x4 +PPSMC_MSG_EnableAllSmuFeatures = 0x5 +PPSMC_MSG_DisableAllSmuFeatures = 0x6 +PPSMC_MSG_RequestI2cTransaction = 0x7 +PPSMC_MSG_GetMetricsVersion = 0x8 +PPSMC_MSG_GetMetricsTable = 0x9 +PPSMC_MSG_GetEccInfoTable = 0xA +PPSMC_MSG_GetEnabledSmuFeaturesLow = 0xB +PPSMC_MSG_GetEnabledSmuFeaturesHigh = 0xC +PPSMC_MSG_SetDriverDramAddrHigh = 0xD +PPSMC_MSG_SetDriverDramAddrLow = 0xE +PPSMC_MSG_SetToolsDramAddrHigh = 0xF +PPSMC_MSG_SetToolsDramAddrLow = 0x10 +PPSMC_MSG_SetSystemVirtualDramAddrHigh = 0x11 +PPSMC_MSG_SetSystemVirtualDramAddrLow = 0x12 +PPSMC_MSG_SetSoftMinByFreq = 0x13 +PPSMC_MSG_SetSoftMaxByFreq = 0x14 +PPSMC_MSG_GetMinDpmFreq = 0x15 +PPSMC_MSG_GetMaxDpmFreq = 0x16 +PPSMC_MSG_GetDpmFreqByIndex = 0x17 +PPSMC_MSG_SetPptLimit = 0x18 +PPSMC_MSG_GetPptLimit = 0x19 +PPSMC_MSG_DramLogSetDramAddrHigh = 0x1A +PPSMC_MSG_DramLogSetDramAddrLow = 0x1B +PPSMC_MSG_DramLogSetDramSize = 0x1C +PPSMC_MSG_GetDebugData = 0x1D +PPSMC_MSG_HeavySBR = 0x1E +PPSMC_MSG_SetNumBadHbmPagesRetired = 0x1F +PPSMC_MSG_DFCstateControl = 0x20 +PPSMC_MSG_GetGmiPwrDnHyst = 0x21 +PPSMC_MSG_SetGmiPwrDnHyst = 0x22 +PPSMC_MSG_GmiPwrDnControl = 0x23 +PPSMC_MSG_EnterGfxoff = 0x24 +PPSMC_MSG_ExitGfxoff = 0x25 +PPSMC_MSG_EnableDeterminism = 0x26 +PPSMC_MSG_DisableDeterminism = 0x27 +PPSMC_MSG_DumpSTBtoDram = 0x28 +PPSMC_MSG_STBtoDramLogSetDramAddrHigh = 0x29 +PPSMC_MSG_STBtoDramLogSetDramAddrLow = 0x2A +PPSMC_MSG_STBtoDramLogSetDramSize = 0x2B +PPSMC_MSG_SetSystemVirtualSTBtoDramAddrHigh = 0x2C +PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow = 0x2D +PPSMC_MSG_GfxDriverResetRecovery = 0x2E +PPSMC_MSG_TriggerVFFLR = 0x2F +PPSMC_MSG_SetSoftMinGfxClk = 0x30 +PPSMC_MSG_SetSoftMaxGfxClk = 0x31 +PPSMC_MSG_GetMinGfxDpmFreq = 0x32 +PPSMC_MSG_GetMaxGfxDpmFreq = 0x33 +PPSMC_MSG_PrepareForDriverUnload = 0x34 +PPSMC_MSG_ReadThrottlerLimit = 0x35 +PPSMC_MSG_QueryValidMcaCount = 0x36 +PPSMC_MSG_McaBankDumpDW = 0x37 +PPSMC_MSG_GetCTFLimit = 0x38 +PPSMC_MSG_ClearMcaOnRead = 0x39 +PPSMC_MSG_QueryValidMcaCeCount = 0x3A +PPSMC_MSG_McaBankCeDumpDW = 0x3B +PPSMC_MSG_SelectPLPDMode = 0x40 +PPSMC_MSG_RmaDueToBadPageThreshold = 0x43 +PPSMC_MSG_SelectPstatePolicy = 0x44 +PPSMC_MSG_SetPhsDetWRbwThreshold = 0x45 +PPSMC_MSG_SetPhsDetWRbwFreqHigh = 0x46 +PPSMC_MSG_SetPhsDetWRbwFreqLow = 0x47 +PPSMC_MSG_SetPhsDetWRbwHystDown = 0x48 +PPSMC_MSG_SetPhsDetWRbwAlpha = 0x49 +PPSMC_MSG_SetPhsDetOnOff = 0x4A +PPSMC_MSG_GetPhsDetResidency = 0x4B +PPSMC_Message_Count = 0x4C +PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET = 0x1 +PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET = 0x2 +PPSMC_RESET_TYPE_DRIVER_MODE_3_RESET = 0x3 +PPSMC_THROTTLING_LIMIT_TYPE_SOCKET = 0x1 +PPSMC_THROTTLING_LIMIT_TYPE_HBM = 0x2 +PPSMC_AID_THM_TYPE = 0x1 +PPSMC_CCD_THM_TYPE = 0x2 +PPSMC_XCD_THM_TYPE = 0x3 +PPSMC_HBM_THM_TYPE = 0x4 +PPSMC_PLPD_MODE_DEFAULT = 0x1 +PPSMC_PLPD_MODE_OPTIMIZED = 0x2 +SMU13_0_6_DRIVER_IF_VERSION = 0x08042024 +NUM_I2C_CONTROLLERS = 8 +I2C_CONTROLLER_ENABLED = 1 +I2C_CONTROLLER_DISABLED = 0 +MAX_SW_I2C_COMMANDS = 24 +CMDCONFIG_STOP_BIT = 0 +CMDCONFIG_RESTART_BIT = 1 +CMDCONFIG_READWRITE_BIT = 2 +CMDCONFIG_STOP_MASK = (1 << CMDCONFIG_STOP_BIT) +CMDCONFIG_RESTART_MASK = (1 << CMDCONFIG_RESTART_BIT) +CMDCONFIG_READWRITE_MASK = (1 << CMDCONFIG_READWRITE_BIT) +IH_INTERRUPT_ID_TO_DRIVER = 0xFE +IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING = 0x7 +THROTTLER_PROCHOT_BIT = 0 +THROTTLER_PPT_BIT = 1 +THROTTLER_THERMAL_SOCKET_BIT = 2 +THROTTLER_THERMAL_VR_BIT = 3 +THROTTLER_THERMAL_HBM_BIT = 4 +ClearMcaOnRead_UE_FLAG_MASK = 0x1 +ClearMcaOnRead_CE_POLL_MASK = 0x2 +int32_t = int +SMU_THERMAL_MINIMUM_ALERT_TEMP = 0 +SMU_THERMAL_MAXIMUM_ALERT_TEMP = 255 +SMU_TEMPERATURE_UNITS_PER_CENTIGRADES = 1000 +SMU_FW_NAME_LEN = 0x24 +SMU_DPM_USER_PROFILE_RESTORE = (1 << 0) +SMU_CUSTOM_FAN_SPEED_RPM = (1 << 1) +SMU_CUSTOM_FAN_SPEED_PWM = (1 << 2) +SMU_THROTTLER_PPT0_BIT = 0 +SMU_THROTTLER_PPT1_BIT = 1 +SMU_THROTTLER_PPT2_BIT = 2 +SMU_THROTTLER_PPT3_BIT = 3 +SMU_THROTTLER_SPL_BIT = 4 +SMU_THROTTLER_FPPT_BIT = 5 +SMU_THROTTLER_SPPT_BIT = 6 +SMU_THROTTLER_SPPT_APU_BIT = 7 +SMU_THROTTLER_TDC_GFX_BIT = 16 +SMU_THROTTLER_TDC_SOC_BIT = 17 +SMU_THROTTLER_TDC_MEM_BIT = 18 +SMU_THROTTLER_TDC_VDD_BIT = 19 +SMU_THROTTLER_TDC_CVIP_BIT = 20 +SMU_THROTTLER_EDC_CPU_BIT = 21 +SMU_THROTTLER_EDC_GFX_BIT = 22 +SMU_THROTTLER_APCC_BIT = 23 +SMU_THROTTLER_TEMP_GPU_BIT = 32 +SMU_THROTTLER_TEMP_CORE_BIT = 33 +SMU_THROTTLER_TEMP_MEM_BIT = 34 +SMU_THROTTLER_TEMP_EDGE_BIT = 35 +SMU_THROTTLER_TEMP_HOTSPOT_BIT = 36 +SMU_THROTTLER_TEMP_SOC_BIT = 37 +SMU_THROTTLER_TEMP_VR_GFX_BIT = 38 +SMU_THROTTLER_TEMP_VR_SOC_BIT = 39 +SMU_THROTTLER_TEMP_VR_MEM0_BIT = 40 +SMU_THROTTLER_TEMP_VR_MEM1_BIT = 41 +SMU_THROTTLER_TEMP_LIQUID0_BIT = 42 +SMU_THROTTLER_TEMP_LIQUID1_BIT = 43 +SMU_THROTTLER_VRHOT0_BIT = 44 +SMU_THROTTLER_VRHOT1_BIT = 45 +SMU_THROTTLER_PROCHOT_CPU_BIT = 46 +SMU_THROTTLER_PROCHOT_GFX_BIT = 47 +SMU_THROTTLER_PPM_BIT = 56 +SMU_THROTTLER_FIT_BIT = 57 \ No newline at end of file diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 3a3a59826f..c7b8764cf6 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -796,7 +796,7 @@ class PCIIface(PCIIfaceBase): gpus:ClassVar[list[str]] = [] def __init__(self, dev, dev_id): - super().__init__(dev, dev_id, vendor=0x1002, devices=[(0xffff, [0x744c, 0x7480, 0x7550, 0x7590])], bars=[0, 2, 5], vram_bar=0, + super().__init__(dev, dev_id, vendor=0x1002, devices=[(0xffff, [0x74a1, 0x744c, 0x7480, 0x7550, 0x7590])], bars=[0, 2, 5], vram_bar=0, va_start=AMMemoryManager.va_allocator.base, va_size=AMMemoryManager.va_allocator.size) self._setup_adev(self.pci_dev) self.pci_dev.write_config(pci.PCI_COMMAND, self.pci_dev.read_config(pci.PCI_COMMAND, 2) | pci.PCI_COMMAND_MASTER, 2) @@ -808,12 +808,18 @@ class PCIIface(PCIIfaceBase): self.ip_versions = self.dev_impl.ip_ver gfxver = int(f"{self.dev_impl.ip_ver[am.GC_HWIP][0]:02d}{self.dev_impl.ip_ver[am.GC_HWIP][1]:02d}{self.dev_impl.ip_ver[am.GC_HWIP][2]:02d}") - array_count = self.dev_impl.gc_info.gc_num_sa_per_se * self.dev_impl.gc_info.gc_num_se - self.props = {'cu_per_simd_array': (cu_per_sa:=2 * (self.dev_impl.gc_info.gc_num_wgp0_per_sa + self.dev_impl.gc_info.gc_num_wgp1_per_sa)), - 'simd_count': 2 * cu_per_sa * array_count, 'simd_per_cu': 2, 'array_count': array_count, 'gfx_target_version': gfxver, + if self.dev_impl.gc_info.header.version_major == 2: + cu_per_sa = self.dev_impl.gc_info.gc_num_cu_per_sh + max_sh_per_se = self.dev_impl.gc_info.gc_num_sh_per_se + else: + cu_per_sa = 2 * (self.dev_impl.gc_info.gc_num_wgp0_per_sa + self.dev_impl.gc_info.gc_num_wgp1_per_sa) + max_sh_per_se = self.dev_impl.gc_info.gc_num_sa_per_se + + array_count = max_sh_per_se * self.dev_impl.gc_info.gc_num_se * self.dev_impl.gfx.xccs + self.props = {'cu_per_simd_array': cu_per_sa, 'simd_count': 2 * cu_per_sa * array_count, 'simd_per_cu': 2, 'array_count': array_count, 'max_slots_scratch_cu': self.dev_impl.gc_info.gc_max_scratch_slots_per_cu, 'max_waves_per_simd': self.dev_impl.gc_info.gc_max_waves_per_simd, - 'simd_arrays_per_engine': self.dev_impl.gc_info.gc_num_sa_per_se, 'lds_size_in_kb': self.dev_impl.gc_info.gc_lds_size, - 'num_xcc': self.dev_impl.gfx.xccs} + 'simd_arrays_per_engine': max_sh_per_se, 'lds_size_in_kb': self.dev_impl.gc_info.gc_lds_size, 'num_xcc': self.dev_impl.gfx.xccs, + 'gfx_target_version': {90403: 90402}.get(gfxver, gfxver)} def create_queue(self, queue_type, ring, gart, rptr, wptr, eop_buffer=None, cwsr_buffer=None, ctl_stack_size=0, ctx_save_restore_size=0, xcc_id=0): assert cwsr_buffer is None, "no cwsr buffer for am" diff --git a/tinygrad/runtime/support/am/amdev.py b/tinygrad/runtime/support/am/amdev.py index d7e2d4476c..8f867fd5a6 100644 --- a/tinygrad/runtime/support/am/amdev.py +++ b/tinygrad/runtime/support/am/amdev.py @@ -41,40 +41,62 @@ class AMFirmware: self.ucode_start: dict[str, int] = {} self.descs: list[tuple[list[int], memoryview]] = [] - blob, hdr = self.load_fw(f"smu_{fmt_ver(am.MP1_HWIP)}.bin", am.struct_smc_firmware_header_v1_0) - self.smu_psp_desc = self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SMU) + # SMU firmware + blob, hdr = self.load_fw(f"smu_{fmt_ver(am.MP1_HWIP)}.bin", versioned_header="struct_smc_firmware_header") + if self.adev.ip_ver[am.GC_HWIP] >= (11,0,0): + self.smu_psp_desc = self.desc(blob, hdr.v1_0.header.ucode_array_offset_bytes, hdr.v1_0.header.ucode_size_bytes, am.GFX_FW_TYPE_SMU) + else: + p2stables = (am.struct_smc_soft_pptable_entry * hdr.pptable_count).from_buffer(blob[hdr.pptable_entry_offset:]) + for p2stable in p2stables: + if p2stable.id == (__P2S_TABLE_ID_X:=0x50325358): + self.descs += [self.desc(blob, p2stable.ppt_offset_bytes, p2stable.ppt_size_bytes, am.GFX_FW_TYPE_P2S_TABLE)] # SDMA firmware - blob, hdr = self.load_fw(f"sdma_{fmt_ver(am.SDMA0_HWIP)}.bin", versioned_header='struct_sdma_firmware_header') - if hdr.header.header_version_major < 3: + blob, hdr = self.load_fw(f"sdma_{fmt_ver(am.SDMA0_HWIP)}.bin", versioned_header="struct_sdma_firmware_header") + if hdr.header.header_version_major == 1: + self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SDMA0)] + elif hdr.header.header_version_major == 2: self.descs += [self.desc(blob, hdr.ctl_ucode_offset, hdr.ctl_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH1)] self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ctx_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH0)] else: self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH0)] # PFP, ME, MEC firmware for (fw_name, fw_cnt) in ([('PFP', 1), ('ME', 1)] if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else []) + [('MEC', 1)]: - blob, hdr = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_{fw_name.lower()}.bin", am.struct_gfx_firmware_header_v2_0) + blob, hdr = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_{fw_name.lower()}.bin", versioned_header="struct_gfx_firmware_header") - # Code part - self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ucode_size_bytes, getattr(am, f'GFX_FW_TYPE_RS64_{fw_name}'))] - - # Stack - stack_fws = [getattr(am, f'GFX_FW_TYPE_RS64_{fw_name}_P{fwnum}_STACK') for fwnum in range(fw_cnt)] - self.descs += [self.desc(blob, hdr.data_offset_bytes, hdr.data_size_bytes, *stack_fws)] - self.ucode_start[fw_name] = hdr.ucode_start_addr_lo | (hdr.ucode_start_addr_hi << 32) + ucode_off = hdr.header.ucode_array_offset_bytes + if hdr.header.header_version_major == 1: + # Code + self.descs += [self.desc(blob, ucode_off, hdr.header.ucode_size_bytes - hdr.jt_size * 4, getattr(am, f'GFX_FW_TYPE_CP_{fw_name}'))] + # JT + self.descs += [self.desc(blob, ucode_off + hdr.jt_offset * 4, hdr.jt_size * 4, getattr(am, f'GFX_FW_TYPE_CP_{fw_name}_ME1'))] + else: + # Code + self.descs += [self.desc(blob, ucode_off, hdr.ucode_size_bytes, getattr(am, f'GFX_FW_TYPE_RS64_{fw_name}'))] + # Stack + stack_fws = [getattr(am, f'GFX_FW_TYPE_RS64_{fw_name}_P{fwnum}_STACK') for fwnum in range(fw_cnt)] + self.descs += [self.desc(blob, hdr.data_offset_bytes, hdr.data_size_bytes, *stack_fws)] + self.ucode_start[fw_name] = hdr.ucode_start_addr_lo | (hdr.ucode_start_addr_hi << 32) # IMU firmware - blob, hdr = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_imu.bin", am.struct_imu_firmware_header_v1_0) - imu_i_off, imu_i_sz, imu_d_sz = hdr.header.ucode_array_offset_bytes, hdr.imu_iram_ucode_size_bytes, hdr.imu_dram_ucode_size_bytes - self.descs += [self.desc(blob, imu_i_off, imu_i_sz, am.GFX_FW_TYPE_IMU_I), self.desc(blob, imu_i_off + imu_i_sz, imu_d_sz, am.GFX_FW_TYPE_IMU_D)] + if self.adev.ip_ver[am.GC_HWIP] >= (11,0,0): + blob, hdr = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_imu.bin", am.struct_imu_firmware_header_v1_0) + imu_i_off, imu_i_sz, imu_d_sz = hdr.header.ucode_array_offset_bytes, hdr.imu_iram_ucode_size_bytes, hdr.imu_dram_ucode_size_bytes + self.descs += [self.desc(blob, imu_i_off, imu_i_sz, am.GFX_FW_TYPE_IMU_I), self.desc(blob, imu_i_off+imu_i_sz, imu_d_sz, am.GFX_FW_TYPE_IMU_D)] # RLC firmware - blob, hdr0, _hdr1, hdr2, hdr3 = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_rlc.bin", am.struct_rlc_firmware_header_v2_0, + blob, hdr0, hdr1, hdr2, hdr3 = self.load_fw(f"gc_{fmt_ver(am.GC_HWIP)}_rlc.bin", am.struct_rlc_firmware_header_v2_0, am.struct_rlc_firmware_header_v2_1, am.struct_rlc_firmware_header_v2_2, am.struct_rlc_firmware_header_v2_3) - for mem,fmem in [('IRAM', 'iram'), ('DRAM_BOOT', 'dram')]: - off, sz = getattr(hdr2, f'rlc_{fmem}_ucode_offset_bytes'), getattr(hdr2, f'rlc_{fmem}_ucode_size_bytes') - self.descs += [self.desc(blob, off, sz, getattr(am, f'GFX_FW_TYPE_RLC_{mem}'))] + if hdr0.header.header_version_minor == 1: + for mem,fmem in [('LIST_SRM_CNTL', 'list_cntl'), ('LIST_GPM_MEM', 'list_gpm'), ('LIST_SRM_MEM', 'list_srm')]: + off, sz = getattr(hdr1, f'save_restore_{fmem}_offset_bytes'), getattr(hdr1, f'save_restore_{fmem}_size_bytes') + self.descs += [self.desc(blob, off, sz, getattr(am, f'GFX_FW_TYPE_RLC_RESTORE_{mem}'))] + + if hdr0.header.header_version_minor >= 2: + for mem,fmem in [('IRAM', 'iram'), ('DRAM_BOOT', 'dram')]: + off, sz = getattr(hdr2, f'rlc_{fmem}_ucode_offset_bytes'), getattr(hdr2, f'rlc_{fmem}_ucode_size_bytes') + self.descs += [self.desc(blob, off, sz, getattr(am, f'GFX_FW_TYPE_RLC_{mem}'))] if hdr0.header.header_version_minor == 3: for mem in ['P', 'V']: @@ -107,7 +129,7 @@ class AMPageTableEntry: def address(self, entry_id:int) -> int: assert self.entries[entry_id] & am.AMDGPU_PTE_SYSTEM == 0, "should not be system address" return self.adev.xgmi2paddr(self.entries[entry_id] & 0x0000FFFFFFFFF000) - def is_page(self, entry_id:int) -> bool: return self.lv == am.AMDGPU_VM_PTB or self.adev.gmc.is_pte_huge_page(self.entries[entry_id]) + def is_page(self, entry_id:int) -> bool: return self.lv == am.AMDGPU_VM_PTB or self.adev.gmc.is_pte_huge_page(self.lv, self.entries[entry_id]) def supports_huge_page(self, paddr:int): return self.lv >= am.AMDGPU_VM_PDB2 class AMMemoryManager(MemoryManager): @@ -121,7 +143,7 @@ class AMMemoryManager(MemoryManager): class AMDev(PCIDevImplBase): Version = 0xA0000006 - def __init__(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None): + def __init__(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None, reset_mode=False): self.pci_dev, self.devfmt, self.dma_regions = pci_dev, pci_dev.pcibus, dma_regions self.vram, self.doorbell64, self.mmio = self.pci_dev.map_bar(0), self.pci_dev.map_bar(2, fmt='Q'), self.pci_dev.map_bar(5, fmt='I') @@ -147,7 +169,11 @@ class AMDev(PCIDevImplBase): # Init hw for IP blocks where it is needed if not self.partial_boot: - if self.psp.is_sos_alive() and self.smu.is_smu_alive(): self.smu.mode1_reset() + if self.psp.is_sos_alive() and self.smu.is_smu_alive(): + if self.gmc.xgmi_seg_sz > 0: + if reset_mode: return # in reset mode, do not raise + raise RuntimeError("Malformed state. Use extra/amdpci/hive_reset.py to reset the hive") + self.smu.mode1_reset() for ip in [self.soc, self.gmc, self.ih, self.psp, self.smu]: ip.init_hw() if DEBUG >= 2: print(f"am {self.devfmt}: {ip.__class__.__name__} initialized") @@ -199,14 +225,14 @@ class AMDev(PCIDevImplBase): def reg(self, reg:str) -> AMRegister: return self.__dict__[reg] def rreg(self, reg:int) -> int: - val = self.indirect_rreg(reg * 4) if reg > len(self.mmio) else self.mmio[reg] + val = self.indirect_rreg(reg) if reg > len(self.mmio) else self.mmio[reg] if AM_DEBUG >= 4 and getattr(self, '_prev_rreg', None) != (reg, val): print(f"am {self.devfmt}: Reading register {reg:#x} with value {val:#x}") self._prev_rreg = (reg, val) return val def wreg(self, reg:int, val:int): if AM_DEBUG >= 4: print(f"am {self.devfmt}: Writing register {reg:#x} with value {val:#x}") - if reg > len(self.mmio): self.indirect_wreg(reg * 4, val) + if reg > len(self.mmio): self.indirect_wreg(reg, val) else: self.mmio[reg] = val def wreg_pair(self, reg_base:str, lo_suffix:str, hi_suffix:str, val:int, inst:int=0): @@ -214,13 +240,17 @@ class AMDev(PCIDevImplBase): self.reg(f"{reg_base}{hi_suffix}").write(val >> 32, inst=inst) def indirect_rreg(self, reg:int) -> int: - self.reg("regBIF_BX_PF0_RSMU_INDEX").write(reg) + self.reg("regBIF_BX_PF0_RSMU_INDEX").write(reg * 4) return self.reg("regBIF_BX_PF0_RSMU_DATA").read() def indirect_wreg(self, reg:int, val:int): - self.reg("regBIF_BX_PF0_RSMU_INDEX").write(reg) + self.reg("regBIF_BX_PF0_RSMU_INDEX").write(reg * 4) self.reg("regBIF_BX_PF0_RSMU_DATA").write(val) + def indirect_wreg_pcie(self, reg:int, val:int, aid:int=0): + self.reg("regBIF_BX0_PCIE_INDEX2").write(reg * 4 + ((((aid & 0b11) << 32) | (1 << 34)) if aid > 0 else 0)) + self.reg("regBIF_BX0_PCIE_DATA2").write(val) + def _read_vram(self, addr, size) -> bytes: assert addr % 4 == 0 and size % 4 == 0, f"Invalid address {addr:#x} or size {size:#x}" res = [] @@ -268,6 +298,7 @@ class AMDev(PCIDevImplBase): def _build_regs(self): mods = [("mp", am.MP0_HWIP), ("hdp", am.HDP_HWIP), ("gc", am.GC_HWIP), ("mmhub", am.MMHUB_HWIP), ("osssys", am.OSSSYS_HWIP), ("nbio" if self.ip_ver[am.GC_HWIP] < (12,0,0) else "nbif", am.NBIO_HWIP)] + if self.ip_ver[am.SDMA0_HWIP] == (4,4,2): mods += [("sdma", am.SDMA0_HWIP)] for prefix, hwip in mods: self.__dict__.update(import_asic_regs(prefix, self.ip_ver[hwip], cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[hwip]))) diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index 428a0aa876..5058f6f626 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -18,6 +18,7 @@ class AM_SOC(AM_IP): if self.adev.ip_ver[am.NBIO_HWIP] == (7,9,0): self.adev.regXCC_DOORBELL_FENCE.write(0x0) self.adev.regBIFC_GFX_INT_MONITOR_MASK.write(0x7ff) + self.adev.regBIFC_DOORBELL_ACCESS_EN_PF.write(0xfffff) else: self.adev.regRCC_DEV0_EPF2_STRAP2.update(strap_no_soft_reset_dev0_f2=0x0) self.adev.regRCC_DEV0_EPF0_RCC_DOORBELL_APER_EN.write(0x1) def set_clockgating_state(self): @@ -36,10 +37,10 @@ class AM_GMC(AM_IP): self.vmhubs = len(self.adev.regs_offset[am.MMHUB_HWIP]) # XGMI (for supported systems) - xgmi_phys_id = self.adev.regMMMC_VM_XGMI_LFB_CNTL.read_bitfields()['pf_lfb_region'] if hasattr(self.adev, 'regMMMC_VM_XGMI_LFB_CNTL') else 0 - xgmi_seg_sz = (self.adev.regMMMC_VM_XGMI_LFB_SIZE.read_bitfields()['pf_lfb_size'] << 24) if hasattr(self.adev, 'regMMMC_VM_XGMI_LFB_SIZE') else 0 + self.xgmi_phys_id = self.adev.regMMMC_VM_XGMI_LFB_CNTL.read_bitfields()['pf_lfb_region'] if hasattr(self.adev, 'regMMMC_VM_XGMI_LFB_CNTL') else 0 + self.xgmi_seg_sz = self.adev.regMMMC_VM_XGMI_LFB_SIZE.read_bitfields()['pf_lfb_size']<<24 if hasattr(self.adev, 'regMMMC_VM_XGMI_LFB_SIZE') else 0 - self.paddr_base = xgmi_phys_id * xgmi_seg_sz + self.paddr_base = self.xgmi_phys_id * self.xgmi_seg_sz self.fb_base = (self.adev.regMMMC_VM_FB_LOCATION_BASE.read() & 0xFFFFFF) << 24 self.fb_end = (self.adev.regMMMC_VM_FB_LOCATION_TOP.read() & 0xFFFFFF) << 24 @@ -51,6 +52,8 @@ class AM_GMC(AM_IP): self.vm_base = self.adev.mm.va_base self.vm_end = min(self.vm_base + (1 << self.adev.mm.va_bits) - 1, 0x7fffffffffff) + self.trans_futher = self.adev.ip_ver[am.GC_HWIP] < (10, 0, 0) + # GFX11/GFX12 has 44-bit address space self.address_space_mask = (1 << 44) - 1 @@ -69,32 +72,30 @@ class AM_GMC(AM_IP): # Can't issue TLB invalidation if the hub isn't initialized. if not self.hub_initted[ip]: return - if ip == "MM": wait_cond(lambda: self.adev.regMMVM_INVALIDATE_ENG17_SEM.read() & 0x1, value=1, msg="mm flush_tlb timeout") + for inst in range(self.adev.gmc.vmhubs if ip == "MM" else self.adev.gfx.xccs): + if ip == "MM": wait_cond(lambda: self.adev.regMMVM_INVALIDATE_ENG17_SEM.read(inst=inst) & 0x1, value=1, msg="mm flush_tlb timeout") - self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_REQ").write(flush_type=flush_type, per_vmid_invalidate_req=(1 << vmid), invalidate_l2_ptes=1, - invalidate_l2_pde0=1, invalidate_l2_pde1=1, invalidate_l2_pde2=1, invalidate_l1_ptes=1, clear_protection_fault_status_addr=0) + self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_REQ").write(flush_type=flush_type, per_vmid_invalidate_req=(1 << vmid), invalidate_l2_ptes=1, + invalidate_l2_pde0=1, invalidate_l2_pde1=1, invalidate_l2_pde2=1, invalidate_l1_ptes=1, clear_protection_fault_status_addr=0, inst=inst) - wait_cond(lambda: self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_ACK").read() & (1 << vmid), value=(1 << vmid), msg="flush_tlb timeout") + wait_cond(lambda: self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_ACK").read(inst=inst) & (1 << vmid), value=(1 << vmid), msg="flush_tlb timeout") - if ip == "MM": - self.adev.regMMVM_INVALIDATE_ENG17_SEM.write(0x0) - self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.update(reserved_cache_private_invalidation=1) + if ip == "MM": self.adev.regMMVM_INVALIDATE_ENG17_SEM.write(0x0, inst=inst) + if self.adev.ip_ver[am.GC_HWIP] >= (11,0,0) and ip == "MM": + self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.update(reserved_cache_private_invalidation=1, inst=inst) - # Read back the register to ensure the invalidation is complete - self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.read() + # Read back the register to ensure the invalidation is complete + self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.read(inst=inst) def enable_vm_addressing(self, page_table, ip:Literal["MM", "GC"], vmid, inst): self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_START_ADDR", "_LO32", "_HI32", self.vm_base >> 12, inst=inst) self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_END_ADDR", "_LO32", "_HI32", self.vm_end >> 12, inst=inst) self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_BASE_ADDR", "_LO32", "_HI32", self.adev.paddr2xgmi(page_table.paddr) | 1, inst=inst) - self.adev.reg(f"reg{ip}VM_CONTEXT{vmid}_CNTL").write(0x1800000, pde0_protection_fault_enable_interrupt=1, pde0_protection_fault_enable_default=1, - dummy_page_protection_fault_enable_interrupt=1, dummy_page_protection_fault_enable_default=1, - range_protection_fault_enable_interrupt=1, range_protection_fault_enable_default=1, - valid_protection_fault_enable_interrupt=1, valid_protection_fault_enable_default=1, - read_protection_fault_enable_interrupt=1, read_protection_fault_enable_default=1, - write_protection_fault_enable_interrupt=1, write_protection_fault_enable_default=1, - execute_protection_fault_enable_interrupt=1, execute_protection_fault_enable_default=1, - enable_context=1, page_table_depth=(3 - page_table.lv), inst=inst) + + fault_flags = {f'{x}_protection_fault_enable_interrupt':1 for x in ['pde0', 'dummy_page', 'range', 'valid', 'read', 'write', 'execute']} + en_def_flags = {f'{x}_protection_fault_enable_default':1 for x in ['pde0', 'dummy_page', 'range', 'valid', 'read', 'write', 'execute']} + self.adev.reg(f"reg{ip}VM_CONTEXT{vmid}_CNTL").write(0x1800000, **fault_flags, **en_def_flags, enable_context=1, + page_table_depth=((2 if self.trans_futher else 3) - page_table.lv), page_table_block_size=9 if self.trans_futher else 0, inst=inst) def init_hub(self, ip:Literal["MM", "GC"], inst_cnt:int): # Init system apertures @@ -112,15 +113,15 @@ class AM_GMC(AM_IP): # Init TLB and cache self.adev.reg(f"reg{ip}MC_VM_MX_L1_TLB_CNTL").update(enable_l1_tlb=1, system_access_mode=3, enable_advanced_driver_model=1, - system_aperture_unmapped_access=0, eco_bits=0, mtype=self.adev.soc.module.MTYPE_UC, inst=inst) + system_aperture_unmapped_access=0, mtype=self.adev.soc.module.MTYPE_UC, inst=inst) self.adev.reg(f"reg{ip}VM_L2_CNTL").update(enable_l2_cache=1, enable_l2_fragment_processing=0, enable_default_page_out_to_system_memory=1, l2_pde0_cache_tag_generation_mode=0, pde_fault_classification=0, context1_identity_access_mode=1, identity_mode_fragment_size=0, inst=inst) self.adev.reg(f"reg{ip}VM_L2_CNTL2").update(invalidate_all_l1_tlbs=1, invalidate_l2_cache=1, inst=inst) - self.adev.reg(f"reg{ip}VM_L2_CNTL3").write(bank_select=9, l2_cache_bigk_fragment_size=6,l2_cache_4k_associativity=1, - l2_cache_bigk_associativity=1, inst=inst) + self.adev.reg(f"reg{ip}VM_L2_CNTL3").write(l2_cache_4k_associativity=1, l2_cache_bigk_associativity=1, + bank_select=12 if self.trans_futher else 9, l2_cache_bigk_fragment_size=9 if self.trans_futher else 6, inst=inst) self.adev.reg(f"reg{ip}VM_L2_CNTL4").write(l2_cache_4k_partition_count=1, inst=inst) - self.adev.reg(f"reg{ip}VM_L2_CNTL5").write(walker_priority_client_id=0x1ff, inst=inst) + if self.adev.ip_ver[am.GC_HWIP] >= (10,0,0): self.adev.reg(f"reg{ip}VM_L2_CNTL5").write(walker_priority_client_id=0x1ff, inst=inst) self.enable_vm_addressing(self.adev.mm.root_page_table, ip, vmid=0, inst=inst) @@ -139,11 +140,18 @@ class AM_GMC(AM_IP): if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0): extra |= am.AMDGPU_PTE_MTYPE_GFX12(0, self.adev.soc.module.MTYPE_UC if uncached else 0) extra |= (am.AMDGPU_PDE_PTE_GFX12 if not is_table and pte_lv != am.AMDGPU_VM_PTB else (am.AMDGPU_PTE_IS_PTE if not is_table else 0)) - else: + elif self.adev.ip_ver[am.GC_HWIP] >= (10,0,0): extra |= am.AMDGPU_PTE_MTYPE_NV10(0, self.adev.soc.module.MTYPE_UC if uncached else 0) extra |= (am.AMDGPU_PDE_PTE if not is_table and pte_lv != am.AMDGPU_VM_PTB else 0) + else: + extra |= am.AMDGPU_PTE_MTYPE_VG10(0, self.adev.soc.module.MTYPE_UC if uncached else 0) + if is_table and pte_lv == am.AMDGPU_VM_PDB1: extra |= am.AMDGPU_PDE_BFS(0x9) + if is_table and pte_lv == am.AMDGPU_VM_PDB0: extra |= am.AMDGPU_PTE_TF + if not is_table and pte_lv not in {am.AMDGPU_VM_PTB, am.AMDGPU_VM_PDB0}: extra |= am.AMDGPU_PDE_PTE return extra - def is_pte_huge_page(self, pte): return pte & (am.AMDGPU_PDE_PTE_GFX12 if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else am.AMDGPU_PDE_PTE) + def is_pte_huge_page(self, pte_lv, pte): + if self.adev.ip_ver[am.GC_HWIP] < (10,0,0): return (pte & am.AMDGPU_PDE_PTE) if pte_lv != am.AMDGPU_VM_PDB0 else not (pte & am.AMDGPU_PTE_TF) + return pte & (am.AMDGPU_PDE_PTE_GFX12 if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else am.AMDGPU_PDE_PTE) def on_interrupt(self): for ip in ["MM", "GC"]: @@ -168,6 +176,7 @@ class AM_SMU(AM_IP): def mode1_reset(self): if DEBUG >= 2: print(f"am {self.adev.devfmt}: mode1 reset") if self.adev.ip_ver[am.MP0_HWIP] >= (14,0,0): self._send_msg(__DEBUGSMC_MSG_Mode1Reset:=2, 0, debug=True) + elif self.adev.ip_ver[am.MP0_HWIP] == (13,0,6): self._send_msg(self.smu_mod.PPSMC_MSG_GfxDriverReset, 1) else: self._send_msg(self.smu_mod.PPSMC_MSG_Mode1Reset, 0) time.sleep(0.5) # 500ms @@ -177,6 +186,8 @@ class AM_SMU(AM_IP): def read_metrics(self): return self.read_table(self.smu_mod.SmuMetricsExternal_t, self.smu_mod.TABLE_SMU_METRICS) def set_clocks(self, level): + if self.adev.ip_ver[am.MP0_HWIP] == (13,0,6): return # TODO + if not hasattr(self, 'clcks'): self.clcks = {} for clck in [self.smu_mod.PPCLK_GFXCLK, self.smu_mod.PPCLK_UCLK, self.smu_mod.PPCLK_FCLK, self.smu_mod.PPCLK_SOCCLK]: @@ -211,18 +222,26 @@ class AM_GFX(AM_IP): # NOTE: Golden reg for gfx11. No values for this reg provided. The kernel just ors 0x20000000 to this reg. for xcc in range(self.xccs): self.adev.regTCP_CNTL.write(self.adev.regTCP_CNTL.read() | 0x20000000, inst=xcc) + for xcc in range(self.xccs): self.adev.regRLC_CNTL.write(0x1, inst=xcc) + for xcc in range(self.xccs): self.adev.regRLC_SRM_CNTL.update(srm_enable=1, auto_incr_addr=1, inst=xcc) + for xcc in range(self.xccs): self.adev.regRLC_SPM_MC_CNTL.write(0xf, inst=xcc) + if self.adev.ip_ver[am.NBIO_HWIP] != (7,9,0): self.adev.soc.doorbell_enable(port=0, awid=0x3, awaddr_31_28_value=0x3) self.adev.soc.doorbell_enable(port=3, awid=0x6, awaddr_31_28_value=0x3) for xcc in range(self.xccs): + if self.adev.ip_ver[am.GC_HWIP] == (9,4,3): + self.adev.regGB_ADDR_CONFIG.write(0x2a114042, inst=xcc) # Golden value for mi300 + self.adev.regTCP_UTCL1_CNTL2.update(spare=1, inst=xcc) + self.adev.regGRBM_CNTL.update(read_timeout=0xff, inst=xcc) for i in range(0, 16): self._grbm_select(vmid=i, inst=xcc) - self.adev.regSH_MEM_CONFIG.write(address_mode=self.adev.soc.module.SH_MEM_ADDRESS_MODE_64, - alignment_mode=self.adev.soc.module.SH_MEM_ALIGNMENT_MODE_UNALIGNED, initial_inst_prefetch=3, inst=xcc) + self.adev.regSH_MEM_CONFIG.write(**({'initial_inst_prefetch':3} if self.adev.ip_ver[am.GC_HWIP][0] >= 10 else {}), + address_mode=self.adev.soc.module.SH_MEM_ADDRESS_MODE_64, alignment_mode=self.adev.soc.module.SH_MEM_ALIGNMENT_MODE_UNALIGNED, inst=xcc) # Configure apertures: # LDS: 0x10000000'00000000 - 0x10000001'00000000 (4GB) @@ -231,27 +250,34 @@ class AM_GFX(AM_IP): self._grbm_select(inst=xcc) # Configure MEC doorbell range - self.adev.regCP_MEC_DOORBELL_RANGE_LOWER.write(0x0, inst=xcc) - self.adev.regCP_MEC_DOORBELL_RANGE_UPPER.write(0x450, inst=xcc) + self.adev.regCP_MEC_DOORBELL_RANGE_LOWER.write(0x100 * xcc, inst=xcc) + self.adev.regCP_MEC_DOORBELL_RANGE_UPPER.write(0x100 * xcc + 0xf8, inst=xcc) # Enable MEC - self.adev.regCP_MEC_RS64_CNTL.update(mec_invalidate_icache=0, mec_pipe0_reset=0, mec_pipe0_active=1, mec_halt=0, inst=xcc) + if self.adev.ip_ver[am.GC_HWIP] < (10,0,0): self.adev.regCP_MEC_CNTL.write(0x0, inst=xcc) + else: self.adev.regCP_MEC_RS64_CNTL.update(mec_invalidate_icache=0, mec_pipe0_reset=0, mec_pipe0_active=1, mec_halt=0, inst=xcc) # NOTE: Wait for MEC to be ready. The kernel does udelay here as well. time.sleep(0.05) + # Set 1 partition + if self.xccs > 1 and not self.adev.partial_boot: self.adev.psp._spatial_partition_cmd(1) + def fini_hw(self): for xcc in range(self.xccs): self._grbm_select(me=1, pipe=0, queue=0, inst=xcc) if self.adev.regCP_HQD_ACTIVE.read(inst=xcc) & 1: self.adev.regCP_HQD_DEQUEUE_REQUEST.write(0x2, inst=xcc) # 1 - DRAIN_PIPE; 2 - RESET_WAVES self._grbm_select(inst=xcc) - for xcc in range(self.xccs): self.adev.regGCVM_CONTEXT0_CNTL.write(0, inst=xcc) + + # TODO: fix warm boot on mi300 + if self.adev.ip_ver[am.GC_HWIP] != (9,4,3): + for xcc in range(self.xccs): self.adev.regGCVM_CONTEXT0_CNTL.write(0, inst=xcc) def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, eop_addr:int, eop_size:int, doorbell:int, pipe:int, queue:int, aql:bool): for xcc in range(self.xccs if aql else 1): mqd = self.adev.mm.valloc(0x1000, uncached=True, contiguous=True) - struct_t = getattr(am, f"struct_v{self.adev.ip_ver[am.GC_HWIP][0]}_compute_mqd") + struct_t = getattr(am, f"struct_v{self.adev.ip_ver[am.GC_HWIP][0]}{'_compute' if self.adev.ip_ver[am.GC_HWIP][0] >= 10 else ''}_mqd") mqd_struct = struct_t(header=0xC0310800, cp_mqd_base_addr_lo=lo32(mqd.va_addr), cp_mqd_base_addr_hi=hi32(mqd.va_addr), cp_hqd_persistent_state=self.adev.regCP_HQD_PERSISTENT_STATE.encode(preload_size=0x55, preload_req=1), cp_hqd_pipe_priority=0x2, cp_hqd_queue_priority=0xf, cp_hqd_quantum=0x111, @@ -260,12 +286,13 @@ class AM_GFX(AM_IP): cp_hqd_pq_wptr_poll_addr_lo=lo32(wptr_addr), cp_hqd_pq_wptr_poll_addr_hi=hi32(wptr_addr), cp_hqd_pq_doorbell_control=self.adev.regCP_HQD_PQ_DOORBELL_CONTROL.encode(doorbell_offset=doorbell*2, doorbell_en=1), cp_hqd_pq_control=self.adev.regCP_HQD_PQ_CONTROL.encode(rptr_block_size=5, unord_dispatch=0, queue_size=(ring_size//4).bit_length()-2, - **({'queue_full_en':1, 'slot_based_wptr':2, 'no_update_rptr':1} if aql else {})), + **({'queue_full_en':1, 'slot_based_wptr':2, 'no_update_rptr':xcc==0} if aql else {})), cp_hqd_ib_control=self.adev.regCP_HQD_IB_CONTROL.encode(min_ib_avail_size=0x3), cp_hqd_hq_status0=0x20004000, cp_mqd_control=self.adev.regCP_MQD_CONTROL.encode(priv_state=1), cp_hqd_vmid=0, cp_hqd_aql_control=int(aql), cp_hqd_eop_base_addr_lo=lo32(eop_addr>>8), cp_hqd_eop_base_addr_hi=hi32(eop_addr>>8), - cp_hqd_eop_control=self.adev.regCP_HQD_EOP_CONTROL.encode(eop_size=(eop_size//4).bit_length()-2)) - for se in range(8): setattr(mqd_struct, f'compute_static_thread_mgmt_se{se}', 0xffffffff) + cp_hqd_eop_control=self.adev.regCP_HQD_EOP_CONTROL.encode(eop_size=(eop_size//4).bit_length()-2), + **({'compute_tg_chunk_size':1, 'compute_current_logic_xcc_id':xcc} if aql and self.xccs > 1 else {})) + for se in range(8 if self.adev.ip_ver[am.GC_HWIP][0] >= 10 else 4): setattr(mqd_struct, f'compute_static_thread_mgmt_se{se}', 0xffffffff) # Copy mqd into memory self.adev.vram.view(mqd.paddrs[0][0], ctypes.sizeof(mqd_struct))[:] = memoryview(mqd_struct).cast('B') @@ -293,11 +320,13 @@ class AM_GFX(AM_IP): self.adev.regCP_RB_WPTR_POLL_CNTL.update(poll_frequency=0x100, idle_poll_count=0x90, inst=xcc) self.adev.regCP_INT_CNTL.update(cntx_busy_int_enable=1, cntx_empty_int_enable=1, cmp_busy_int_enable=1, gfx_idle_int_enable=1, inst=xcc) - self.adev.regSDMA0_RLC_CGCG_CTRL.update(cgcg_int_enable=1, inst=xcc) - self.adev.regSDMA1_RLC_CGCG_CTRL.update(cgcg_int_enable=1, inst=xcc) + if self.adev.ip_ver[am.GC_HWIP] >= (10,0,0): + self.adev.regSDMA0_RLC_CGCG_CTRL.update(cgcg_int_enable=1, inst=xcc) + self.adev.regSDMA1_RLC_CGCG_CTRL.update(cgcg_int_enable=1, inst=xcc) - self.adev.regRLC_CGTT_MGCG_OVERRIDE.update(perfmon_clock_state=1, gfxip_fgcg_override=0, gfxip_repeater_fgcg_override=0, - grbm_cgtt_sclk_override=0, rlc_cgtt_sclk_override=0, gfxip_mgcg_override=0, gfxip_cgls_override=0, gfxip_cgcg_override=0, inst=xcc) + feats_gfx11 = {'perfmon_clock_state':1, 'gfxip_repeater_fgcg_override':0} if self.adev.ip_ver[am.GC_HWIP] >= (11,0,0) else {} + self.adev.regRLC_CGTT_MGCG_OVERRIDE.update(**feats_gfx11, gfxip_fgcg_override=0, grbm_cgtt_sclk_override=0, rlc_cgtt_sclk_override=0, + gfxip_mgcg_override=0, gfxip_cgls_override=0, gfxip_cgcg_override=0, inst=xcc) self.adev.regRLC_SAFE_MODE.write(message=0, cmd=1, inst=xcc) @@ -314,10 +343,13 @@ class AM_GFX(AM_IP): self.adev.reg(f"regCP_{cntl_reg}_CNTL").update(**{f"{eng_name.lower()}_pipe{pipe}_reset": 0 for pipe in range(pipe_cnt)}, inst=xcc) for xcc in range(self.adev.gfx.xccs): + if self.adev.ip_ver[am.GC_HWIP] < (10,0,0): + self.adev.regCP_MEC_CNTL.update(mec_invalidate_icache=1, mec_me1_pipe0_reset=1, mec_me2_pipe0_reset=1, mec_me1_halt=1,mec_me2_halt=1,inst=xcc) if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0): _config_helper(eng_name="PFP", cntl_reg="ME", eng_reg="PFP", pipe_cnt=1, xcc=xcc) _config_helper(eng_name="ME", cntl_reg="ME", eng_reg="ME", pipe_cnt=1, xcc=xcc) - _config_helper(eng_name="MEC", cntl_reg="MEC_RS64", eng_reg="MEC_RS64", pipe_cnt=1, me=1, xcc=xcc) + if self.adev.ip_ver[am.GC_HWIP] >= (10,0,0): + _config_helper(eng_name="MEC", cntl_reg="MEC_RS64", eng_reg="MEC_RS64", pipe_cnt=1, me=1, xcc=xcc) class AM_IH(AM_IP): def init_sw(self): @@ -339,9 +371,10 @@ class AM_IH(AM_IP): self.adev.reg(f"regIH_DOORBELL_RPTR{suf}").write(offset=(am.AMDGPU_NAVI10_DOORBELL_IH + ring_id) * 2, enable=1) - self.adev.regIH_STORM_CLIENT_LIST_CNTL.update(client18_is_storm_client=1) - self.adev.regIH_INT_FLOOD_CNTL.update(flood_cntl_enable=1) - self.adev.regIH_MSI_STORM_CTRL.update(delay=3) + if self.adev.ip_ver[am.OSSSYS_HWIP] != (4,4,2): + self.adev.regIH_STORM_CLIENT_LIST_CNTL.update(client18_is_storm_client=1) + self.adev.regIH_INT_FLOOD_CNTL.update(flood_cntl_enable=1) + self.adev.regIH_MSI_STORM_CTRL.update(delay=3) # toggle interrupts for _, rwptr_vm, suf, ring_id in self.rings: @@ -363,14 +396,19 @@ class AM_IH(AM_IP): class AM_SDMA(AM_IP): def init_sw(self): self.sdma_name = "F32" if self.adev.ip_ver[am.SDMA0_HWIP] < (7,0,0) else "MCU" def init_hw(self): - for pipe in range(2): - self.adev.reg(f"regSDMA{pipe}_WATCHDOG_CNTL").update(queue_hang_count=100) # 10s, 100ms per unit - self.adev.reg(f"regSDMA{pipe}_UTCL1_CNTL").update(resp_mode=3, redo_delay=9) + for pipe_id in range(1): + pipe = "" if self.adev.ip_ver[am.SDMA0_HWIP] < (5,0,0) else str(pipe_id) - # rd=noa, wr=bypass - self.adev.reg(f"regSDMA{pipe}_UTCL1_PAGE").update(rd_l2_policy=0x2, wr_l2_policy=0x3, **({'llc_noalloc':1} if self.sdma_name == "F32" else {})) - self.adev.reg(f"regSDMA{pipe}_{self.sdma_name}_CNTL").update(halt=0, **{f"{'th1_' if self.sdma_name == 'F32' else ''}reset":0}) - self.adev.reg(f"regSDMA{pipe}_CNTL").update(ctxempty_int_enable=1, trap_enable=1) + if self.adev.ip_ver[am.SDMA0_HWIP] >= (6,0,0): + self.adev.reg(f"regSDMA{pipe}_WATCHDOG_CNTL").update(queue_hang_count=100) # 10s, 100ms per unit + self.adev.reg(f"regSDMA{pipe}_UTCL1_CNTL").update(resp_mode=3, redo_delay=9) + + # rd=noa, wr=bypass + self.adev.reg(f"regSDMA{pipe}_UTCL1_PAGE").update(rd_l2_policy=2, wr_l2_policy=3, **({'llc_noalloc':1} if self.sdma_name == "F32" else {})) + self.adev.reg(f"regSDMA{pipe}_{self.sdma_name}_CNTL").update(halt=0, **{f"{'th1_' if self.sdma_name == 'F32' else ''}reset":0}) + + self.adev.reg(f"regSDMA{pipe}_CNTL").update(ctxempty_int_enable=1, trap_enable=1, + **({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {})) if self.adev.ip_ver[am.NBIO_HWIP] == (7,9,0): self.adev.regDOORBELL0_CTRL_ENTRY_1.write(bif_doorbell1_range_offset_entry=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, @@ -379,26 +417,31 @@ class AM_SDMA(AM_IP): else: self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4) def fini_hw(self): - self.adev.regSDMA0_QUEUE0_RB_CNTL.update(rb_enable=0) - self.adev.regSDMA0_QUEUE0_IB_CNTL.update(ib_enable=0) - self.adev.regGRBM_SOFT_RESET.write(soft_reset_sdma0=1) - time.sleep(0.01) - self.adev.regGRBM_SOFT_RESET.write(0x0) + reg, inst = ("regSDMA_GFX", 0) if self.adev.ip_ver[am.SDMA0_HWIP] == (4,4,2) else ("regSDMA0_QUEUE0", 0) + + self.adev.reg(f"{reg}_RB_CNTL").update(rb_enable=0, inst=inst) + self.adev.reg(f"{reg}_IB_CNTL").update(ib_enable=0, inst=inst) + if self.adev.ip_ver[am.SDMA0_HWIP] >= (6,0,0): + self.adev.regGRBM_SOFT_RESET.write(soft_reset_sdma0=1) + time.sleep(0.01) + self.adev.regGRBM_SOFT_RESET.write(0x0) def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, doorbell:int, pipe:int, queue:int): # Setup the ring - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_MINOR_PTR_UPDATE").write(0x1) - self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_RPTR", "", "_HI", 0) - self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_WPTR", "", "_HI", 0) - self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_BASE", "", "_HI", ring_addr >> 8) - self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_RPTR_ADDR", "_LO", "_HI", rptr_addr) - self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_WPTR_POLL_ADDR", "_LO", "_HI", wptr_addr) - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_DOORBELL_OFFSET").update(offset=doorbell * 2) - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_DOORBELL").update(enable=1) - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_MINOR_PTR_UPDATE").write(0x0) - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_RB_CNTL").write(rb_vmid=0, rptr_writeback_enable=1, rptr_writeback_timer=4, - **{f'{self.sdma_name.lower()}_wptr_poll_enable':1}, rb_size=(ring_size//4).bit_length()-1, rb_enable=1, rb_priv=1) - self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_IB_CNTL").update(ib_enable=1) + reg, inst = ("regSDMA_GFX", pipe*4+queue) if self.adev.ip_ver[am.SDMA0_HWIP] == (4,4,2) else (f"regSDMA{pipe}_QUEUE{queue}", 0) + + self.adev.reg(f"{reg}_MINOR_PTR_UPDATE").write(0x1, inst=inst) + self.adev.wreg_pair(f"{reg}_RB_RPTR", "", "_HI", 0, inst=inst) + self.adev.wreg_pair(f"{reg}_RB_WPTR", "", "_HI", 0, inst=inst) + self.adev.wreg_pair(f"{reg}_RB_BASE", "", "_HI", ring_addr >> 8, inst=inst) + self.adev.wreg_pair(f"{reg}_RB_RPTR_ADDR", "_LO", "_HI", rptr_addr, inst=inst) + self.adev.wreg_pair(f"{reg}_RB_WPTR_POLL_ADDR", "_LO", "_HI", wptr_addr, inst=inst) + self.adev.reg(f"{reg}_DOORBELL_OFFSET").update(offset=doorbell * 2, inst=inst) + self.adev.reg(f"{reg}_DOORBELL").update(enable=1, inst=inst) + self.adev.reg(f"{reg}_MINOR_PTR_UPDATE").write(0x0, inst=inst) + self.adev.reg(f"{reg}_RB_CNTL").write(**({f'{self.sdma_name.lower()}_wptr_poll_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] != (4,4,2) else {}), + rb_vmid=0, rptr_writeback_enable=1, rptr_writeback_timer=4, rb_enable=1, rb_priv=1, rb_size=(ring_size//4).bit_length()-1, inst=inst) + self.adev.reg(f"{reg}_IB_CNTL").update(ib_enable=1, inst=inst) class AM_PSP(AM_IP): def init_sw(self): @@ -435,14 +478,16 @@ class AM_PSP(AM_IP): while not self.is_sos_alive(): time.sleep(0.01) self._ring_create() - self._tmr_init() + if am.PSP_FW_TYPE_PSP_TOC in self.adev.fw.sos_fw: self._tmr_init() # SMU fw should be loaded before TMR. if hasattr(self.adev.fw, 'smu_psp_desc'): self._load_ip_fw_cmd(*self.adev.fw.smu_psp_desc) if not self.boot_time_tmr or not self.autoload_tmr: self._tmr_load_cmd() for psp_desc in self.adev.fw.descs: self._load_ip_fw_cmd(*psp_desc) - self._rlc_autoload_cmd() + + if self.adev.ip_ver[am.GC_HWIP] >= (11,0,0): self._rlc_autoload_cmd() + else: self._load_ip_fw_cmd([am.GFX_FW_TYPE_REG_LIST], self.adev.fw.sos_fw[am.PSP_FW_TYPE_PSP_RL]) def is_sos_alive(self): return self.adev.reg(f"{self.reg_pref}_81").read() != 0x0 @@ -537,4 +582,9 @@ class AM_PSP(AM_IP): cmd.cmd.cmd_load_toc.toc_size = toc_size return self._ring_submit(cmd) + def _spatial_partition_cmd(self, mode): + cmd = am.struct_psp_gfx_cmd_resp(cmd_id=am.GFX_CMD_ID_SRIOV_SPATIAL_PART) + cmd.cmd.cmd_spatial_part.mode = mode + return self._ring_submit(cmd) + def _rlc_autoload_cmd(self): return self._ring_submit(am.struct_psp_gfx_cmd_resp(cmd_id=am.GFX_CMD_ID_AUTOLOAD_RLC)) diff --git a/tinygrad/runtime/support/amd.py b/tinygrad/runtime/support/amd.py index 8a478db8c5..67b966b792 100644 --- a/tinygrad/runtime/support/amd.py +++ b/tinygrad/runtime/support/amd.py @@ -79,7 +79,12 @@ def import_pmc(ip) -> dict[str, tuple[str, int]]: def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[str, AMDReg]: def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:] def _extract_regs(txt): - return {m.group(1): int(m.group(2), 0) for line in txt.splitlines() if (m:=re.match(r'#define\s+(\S+)\s+(0x[\da-fA-F]+|\d+)', line))} + x = {} + for k,v in {m.group(1): int(m.group(2), 0) for line in txt.splitlines() if (m:=re.match(r'#define\s+(\S+)\s+(0x[\da-fA-F]+|\d+)', line))}.items(): + if k.startswith('VM_') or k.startswith('MC_'): x[prefix.upper()[:2]+k] = v + elif k.startswith('regVM_') or k.startswith('regMC_'): x["reg"+prefix.upper()[:2]+k[3:]] = v + else: x[k] = v + return x def _download_file(ver, suff) -> str: dir_prefix = {"osssys": "oss"}.get(prefix, prefix) fetch_name, file_name = f"{prefix}_{'_'.join(map(str, ver))}_{suff}.h", f"{prefix}_{'_'.join(map(str, version))}_{suff}.h" @@ -98,6 +103,7 @@ def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[st for field_name, field_mask in sh_masks.items(): if not ('__' in field_name and field_name.endswith('_MASK')): continue reg_name, reg_field_name = field_name[:-len('_MASK')].split('__') + if reg_name.startswith('MC_') or reg_name.startswith('VM_'): reg_name = f"{prefix.upper()[:2]}{reg_name}" fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1) # NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them