diff --git a/extra/nv_gpu_driver/vbios.h b/extra/nv_gpu_driver/vbios.h index 6721caf7e4..18dfec63d8 100644 --- a/extra/nv_gpu_driver/vbios.h +++ b/extra/nv_gpu_driver/vbios.h @@ -27,6 +27,20 @@ #include "gpu/vbios/bios_types.h" #define FALCON_APPLICATION_INTERFACE_ENTRY_ID_DMEMMAPPER (0x4) +typedef struct +{ + NvU8 version; + NvU8 headerSize; + NvU8 entrySize; + NvU8 entryCount; +} __attribute__((packed)) FALCON_APPLICATION_INTERFACE_HEADER_V1; + +typedef struct +{ + NvU32 id; + NvU32 dmemOffset; +} __attribute__((packed)) FALCON_APPLICATION_INTERFACE_ENTRY_V1; + typedef struct { NvU32 signature; diff --git a/tinygrad/runtime/autogen/nv/nv.py b/tinygrad/runtime/autogen/nv/nv.py index d7695c7582..992c72556b 100644 --- a/tinygrad/runtime/autogen/nv/nv.py +++ b/tinygrad/runtime/autogen/nv/nv.py @@ -7157,6 +7157,28 @@ BCRT30_RSA3K_SIG_SIZE = 384 # macro FWSECLIC_READ_VBIOS_STRUCT_FLAGS = (2) # macro FWSECLIC_FRTS_REGION_MEDIA_FB = (2) # macro FWSECLIC_FRTS_REGION_SIZE_1MB_IN_4K = (0x100) # macro +class struct_c__SA_FALCON_APPLICATION_INTERFACE_HEADER_V1(Structure): + pass + +struct_c__SA_FALCON_APPLICATION_INTERFACE_HEADER_V1._pack_ = 1 # source:True +struct_c__SA_FALCON_APPLICATION_INTERFACE_HEADER_V1._fields_ = [ + ('version', ctypes.c_ubyte), + ('headerSize', ctypes.c_ubyte), + ('entrySize', ctypes.c_ubyte), + ('entryCount', ctypes.c_ubyte), +] + +FALCON_APPLICATION_INTERFACE_HEADER_V1 = struct_c__SA_FALCON_APPLICATION_INTERFACE_HEADER_V1 +class struct_c__SA_FALCON_APPLICATION_INTERFACE_ENTRY_V1(Structure): + pass + +struct_c__SA_FALCON_APPLICATION_INTERFACE_ENTRY_V1._pack_ = 1 # source:True +struct_c__SA_FALCON_APPLICATION_INTERFACE_ENTRY_V1._fields_ = [ + ('id', ctypes.c_uint32), + ('dmemOffset', ctypes.c_uint32), +] + +FALCON_APPLICATION_INTERFACE_ENTRY_V1 = struct_c__SA_FALCON_APPLICATION_INTERFACE_ENTRY_V1 class struct_c__SA_FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3(Structure): pass @@ -7345,6 +7367,8 @@ __all__ = \ 'FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_FRTS', 'FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_SB', 'FALCON_APPLICATION_INTERFACE_ENTRY_ID_DMEMMAPPER', + 'FALCON_APPLICATION_INTERFACE_ENTRY_V1', + 'FALCON_APPLICATION_INTERFACE_HEADER_V1', 'FALCON_UCODE_DESC_HEADER', 'FALCON_UCODE_DESC_HEADER_FORMAT', 'FALCON_UCODE_DESC_V3', 'FALCON_UCODE_DESC_V3_44_FMT', 'FALCON_UCODE_DESC_V3_SIZE_44', @@ -8326,6 +8350,8 @@ __all__ = \ 'struct_c__SA_BIT_DATA_FALCON_DATA_V2', 'struct_c__SA_BUSINFO', 'struct_c__SA_EcidManufacturingInfo', 'struct_c__SA_FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3', + 'struct_c__SA_FALCON_APPLICATION_INTERFACE_ENTRY_V1', + 'struct_c__SA_FALCON_APPLICATION_INTERFACE_HEADER_V1', 'struct_c__SA_FALCON_UCODE_DESC_HEADER', 'struct_c__SA_FALCON_UCODE_DESC_V3', 'struct_c__SA_FALCON_UCODE_TABLE_ENTRY_V1', diff --git a/tinygrad/runtime/support/nv/ip.py b/tinygrad/runtime/support/nv/ip.py index 751160d2b3..b53d5a18c1 100644 --- a/tinygrad/runtime/support/nv/ip.py +++ b/tinygrad/runtime/support/nv/ip.py @@ -121,8 +121,13 @@ class NV_FLCN(NV_IP): def __patch(cmd_id, cmd): patched_image = bytearray(image) + hdr = nv.FALCON_APPLICATION_INTERFACE_HEADER_V1.from_buffer_copy(image[(app_hdr_off:=self.desc_v3.IMEMLoadSize+self.desc_v3.InterfaceOffset):]) + ents = (nv.FALCON_APPLICATION_INTERFACE_ENTRY_V1 * hdr.entryCount).from_buffer_copy(image[app_hdr_off + ctypes.sizeof(hdr):]) + for i in range(hdr.entryCount): + if ents[i].id == nv.FALCON_APPLICATION_INTERFACE_ENTRY_ID_DMEMMAPPER: dmem_offset = ents[i].dmemOffset + # Patch image - dmem = nv.FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3.from_buffer_copy(image[(dmem_mapper_offset:=self.desc_v3.IMEMLoadSize+0xae0):]) + dmem = nv.FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3.from_buffer_copy(image[(dmem_mapper_offset:=self.desc_v3.IMEMLoadSize+dmem_offset):]) dmem.init_cmd = cmd_id patched_image[dmem_mapper_offset : dmem_mapper_offset+len(bytes(dmem))] = bytes(dmem) patched_image[(cmd_off:=self.desc_v3.IMEMLoadSize+dmem.cmd_in_buffer_offset) : cmd_off+len(cmd)] = cmd @@ -164,7 +169,7 @@ class NV_FLCN(NV_IP): mbx = self.execute_hs(self.sec2, self.booter_image_sysmem[0], code_off=self.booter_code_off, data_off=self.booter_data_off, imemPa=0x0, imemVa=self.booter_code_off, imemSz=self.booter_code_sz, dmemPa=0x0, dmemVa=0x0, dmemSz=self.booter_data_sz, pkc_off=0x10, engid=1, ucodeid=3, mailbox=self.nvdev.gsp.wpr_meta_sysmem) - assert mbx[0] == 0x0, f"Booster failed to execute, mailbox is {mbx[0]:08x}, {mbx[1]:08x}" + assert mbx[0] == 0x0, f"Booter failed to execute, mailbox is {mbx[0]:08x}, {mbx[1]:08x}" self.nvdev.NV_PFALCON_FALCON_OS.with_base(self.falcon).write(0x0) assert self.nvdev.NV_PRISCV_RISCV_CPUCTL.with_base(self.falcon).read_bitfields()['active_stat'] == 1, "GSP Core is not active"