amd: correct handling of relocations (#11863)

* amd: correct handling of relocations

* ops

* add
This commit is contained in:
nimlgen
2025-08-27 01:26:45 +03:00
committed by GitHub
parent d261458ecd
commit 62df6c39af

View File

@@ -464,14 +464,14 @@ class AMDProgram(HCQProgram):
# TODO; this API needs the type signature of the function and global_size/local_size
self.dev, self.name, self.lib = dev, name, lib
image, sections, _ = elf_loader(self.lib)
image, sections, relocs = elf_loader(self.lib)
rodata_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".rodata"), -1)
text_entry = next((sh.header.sh_addr for sh in sections if sh.name == ".text"), -1)
assert rodata_entry >= 0 and text_entry >= 0, ".text or .rodata section not found"
assert rodata_entry >= 0, ".rodata section not found"
# Relo for kernel_code_entry_byte_offset for AMD_LLVM. Comgr doesn't need that, but keep shared code path.
image[rodata_entry+0x10:rodata_entry+0x10+8] = struct.pack('<q', text_entry - rodata_entry)
for apply_image_offset, rel_sym_offset, typ, addent in relocs:
if typ == 5: image[apply_image_offset:apply_image_offset+8] = struct.pack('<q', rel_sym_offset - apply_image_offset + addent) # R_AMDGPU_REL64
else: raise RuntimeError(f"unknown AMD reloc {typ}")
self.lib_gpu = self.dev.allocator.alloc(round_up(image.nbytes, 0x1000), buf_spec:=BufferSpec(cpu_access=True, nolru=True))
self.dev.allocator._copyin(self.lib_gpu, image)