mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-18 18:35:12 -05:00
Fix cuda (#836)
* disabled float4 ALU ops for CUDA, small fix to add half_prekernel before kernel_prefix * added supports_float4_alu option, and disabled for ops_cuda
This commit is contained in:
@@ -146,7 +146,7 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
|
||||
val = f"vload_half({args.idx.render(render_cl)}, {bufnames[args.i]})"
|
||||
else:
|
||||
if newvar.ltype == LocalTypes.float4:
|
||||
val = f"{lang.float4}((({lang.smem_prefix if isinstance(bufs[args.i], LocalBuffer) else lang.buffer_prefix}{bufs[args.i].dtype.name}4*){bufnames[args.i]})[{(args.idx//4).render(render_cl)}])"
|
||||
val = f"({newvar.ltype.name})((({lang.smem_prefix if isinstance(bufs[args.i], LocalBuffer) else lang.buffer_prefix}{bufs[args.i].dtype.name}4*){bufnames[args.i]})[{(args.idx//4).render(render_cl)}])"
|
||||
else:
|
||||
val = f"{bufnames[args.i]}[{args.idx.render(render_cl)}]"
|
||||
# NOTE: if min and max are both 0, it should be a CONST in the Linearizer
|
||||
@@ -182,12 +182,15 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
|
||||
[', '.join([f'{t} {bufnames[i]}' for i,t in buftypes] + lang.extra_args)] +
|
||||
[") {\n"] + list(prekernel) + ['\n'.join(kernel), "\n}"])
|
||||
|
||||
if lang.half_prekernel:
|
||||
prg =''.join([f"{lang.half_prekernel}", "\n", prg])
|
||||
return prg, global_size, local_size
|
||||
|
||||
class CStyleCodegen(Linearizer):
|
||||
lang: ClassVar[CStyleLanguage] = CStyleLanguage()
|
||||
supports_constant_folding: bool = True
|
||||
supports_float4: bool = True
|
||||
supports_float4_alu: bool = True
|
||||
|
||||
# for renaming
|
||||
kernel_cnt: Final[DefaultDict[str, int]] = collections.defaultdict(int)
|
||||
|
||||
Reference in New Issue
Block a user