Fix cuda errors when running llama example (#749)

2026-01-25 06:48:22 -05:00 · 2023-04-16 22:52:10 +02:00
parent 0b5a0b9ba4
commit 4e17d27d09
2 changed files with 3 additions and 1 deletions
--- a/tinygrad/codegen/cstyle.py
+++ b/tinygrad/codegen/cstyle.py
@@ -116,6 +116,8 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
      assert newvar is not None
      if args == -math.inf:
        kk(f"{newvar.render(True)} = -INFINITY;")
+      elif newvar.ltype == LocalTypes.float4:
+        kk(f"{newvar.render(True)} = {{ {args}f, {args}f, {args}f, {args}f }};")
      else:
        kk(f"{newvar.render(True)} = {args}f;")
    elif uop == UOps.ALU:
--- a/tinygrad/runtime/ops_cuda.py
+++ b/tinygrad/runtime/ops_cuda.py
@@ -32,7 +32,7 @@ class CUDAProgram:
    if wait:
      start, end = cuda.Event(), cuda.Event()
      start.record()
-    self.prg(*[x._cl for x in args], block=tuple(local_size), grid=tuple(global_size))
+    self.prg(*[x._buf for x in args], block=tuple(local_size), grid=tuple(global_size))
    if wait:
      end.record()
      end.synchronize()