remove smem_prefix_for_cast for amd (#9651)

This commit is contained in:
Ignacio Sica
2025-03-31 23:03:35 +08:00
committed by GitHub
parent f7cb2e8da3
commit f277f407f2

View File

@@ -442,6 +442,7 @@ class AMDRenderer(CStyleLanguage):
Ops.EXP2: lambda x,dtype: f"__ocml_exp2_f{ {dtypes.half:16, dtypes.double:64}.get(dtype, 32)}({x})",
Ops.SQRT: lambda x,dtype: f"__ocml_sqrt_f{ {dtypes.half:16, dtypes.double:64}.get(dtype, 32)}({x})" }
smem_prefix = "__attribute__((shared))"
smem_prefix_for_cast: bool = False
barrier = '__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");' + '__builtin_amdgcn_s_barrier();' + \
'__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");'
float4 = "make_float4"