share hip and cuda (#2972)

This commit is contained in:
qazal
2024-01-02 16:34:24 +02:00
committed by GitHub
parent bd6a0c90a0
commit 01cdd6596f

View File

@@ -264,17 +264,13 @@ class CUDALanguage(CStyleLanguage):
"""
CUDARenderer = functools.partial(uops_to_cstyle, CUDALanguage())
class HIPLanguage(CStyleLanguage):
class HIPLanguage(CUDALanguage):
kernel_prefix = "#include <hip/hip_common.h>\n#define INFINITY (__builtin_inff())\n#define NAN (__builtin_nanf(\"\"))" + """
typedef float float8 __attribute__((ext_vector_type(8)));
__device__ float8 make_float8(float x, float y, float z, float w, float a, float b, float c, float d) { return {x, y, z, w, a, b, c, d}; }
extern "C" __global__
"""
launch_bounds = True
smem_prefix = "__shared__ "
smem_prefix_for_cast=False
barrier = "__syncthreads();"
float4 = "make_float4"
uses_ptr_arithmetic=True
half_prekernel = "#include <hip/hip_fp16.h>\n" + """
typedef union { struct { half x, y, z, w; } __attribute__((aligned(8))); half data[4]; } half4;
@@ -286,10 +282,6 @@ __device__ half16 make_half16(half x, half y, half z, half w, half a, half b, ha
half e, half f, half g, half h, half i, half j, half k, half l) {
return {x, y, z, w, a, b, c, d, e, f, g, h, i, j, k, l}; }
"""
gid = [f'blockIdx.{chr(120+i)}' for i in range(3)]
lid = [f'threadIdx.{chr(120+i)}' for i in range(3)]
xid = [f'(blockIdx.{chr(120+i)}*blockDim.{chr(120+i)}+threadIdx.{chr(120+i)})' for i in range(3)]
code_for_op = {**CStyleLanguage().code_for_op, **code_for_op_half}
HIPRenderer = functools.partial(uops_to_cstyle, HIPLanguage())
# TODO: how much of this can be merged with above?