amd: correct handling of relocations (#11863)

* amd: correct handling of relocations * ops * add
2026-02-09 14:15:22 -05:00 · 2025-08-27 01:26:45 +03:00
parent d261458ecd
commit 62df6c39af
1 changed files with 5 additions and 5 deletions
--- a/tinygrad/runtime/ops_amd.py
+++ b/tinygrad/runtime/ops_amd.py
@@ -464,14 +464,14 @@ class AMDProgram(HCQProgram):
    # TODO; this API needs the type signature of the function and global_size/local_size
    self.dev, self.name, self.lib = dev, name, lib

-    image, sections, _ = elf_loader(self.lib)
+    image, sections, relocs = elf_loader(self.lib)

    rodata_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".rodata"), -1)
-    text_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".text"), -1)
-    assert rodata_entry >= 0 and text_entry >= 0, ".text or .rodata section not found"
+    assert rodata_entry >= 0, ".rodata section not found"

-    # Relo for kernel_code_entry_byte_offset for AMD_LLVM. Comgr doesn't need that, but keep shared code path.
-    image[rodata_entry+0x10:rodata_entry+0x10+8] = struct.pack('<q', text_entry - rodata_entry)
+    for apply_image_offset, rel_sym_offset, typ, addent in relocs:
+      if typ == 5: image[apply_image_offset:apply_image_offset+8] = struct.pack('<q', rel_sym_offset - apply_image_offset + addent) # R_AMDGPU_REL64
+      else: raise RuntimeError(f"unknown AMD reloc {typ}")

    self.lib_gpu = self.dev.allocator.alloc(round_up(image.nbytes, 0x1000), buf_spec:=BufferSpec(cpu_access=True, nolru=True))
    self.dev.allocator._copyin(self.lib_gpu, image)