mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-09 14:15:22 -05:00
amd: correct handling of relocations (#11863)
* amd: correct handling of relocations * ops * add
This commit is contained in:
@@ -464,14 +464,14 @@ class AMDProgram(HCQProgram):
|
||||
# TODO; this API needs the type signature of the function and global_size/local_size
|
||||
self.dev, self.name, self.lib = dev, name, lib
|
||||
|
||||
image, sections, _ = elf_loader(self.lib)
|
||||
image, sections, relocs = elf_loader(self.lib)
|
||||
|
||||
rodata_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".rodata"), -1)
|
||||
text_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".text"), -1)
|
||||
assert rodata_entry >= 0 and text_entry >= 0, ".text or .rodata section not found"
|
||||
assert rodata_entry >= 0, ".rodata section not found"
|
||||
|
||||
# Relo for kernel_code_entry_byte_offset for AMD_LLVM. Comgr doesn't need that, but keep shared code path.
|
||||
image[rodata_entry+0x10:rodata_entry+0x10+8] = struct.pack('<q', text_entry - rodata_entry)
|
||||
for apply_image_offset, rel_sym_offset, typ, addent in relocs:
|
||||
if typ == 5: image[apply_image_offset:apply_image_offset+8] = struct.pack('<q', rel_sym_offset - apply_image_offset + addent) # R_AMDGPU_REL64
|
||||
else: raise RuntimeError(f"unknown AMD reloc {typ}")
|
||||
|
||||
self.lib_gpu = self.dev.allocator.alloc(round_up(image.nbytes, 0x1000), buf_spec:=BufferSpec(cpu_access=True, nolru=True))
|
||||
self.dev.allocator._copyin(self.lib_gpu, image)
|
||||
|
||||
Reference in New Issue
Block a user