From 4ed0f216b5a8b816919e707b8251be634b6f295a Mon Sep 17 00:00:00 2001 From: wozeparrot Date: Mon, 3 Nov 2025 18:09:09 -0800 Subject: [PATCH] fix: make max_matmul run again (#13085) --- extra/gemm/max_matmul.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/extra/gemm/max_matmul.py b/extra/gemm/max_matmul.py index 5041497839..0d1bb9e7c5 100644 --- a/extra/gemm/max_matmul.py +++ b/extra/gemm/max_matmul.py @@ -1,17 +1,11 @@ import numpy as np, os from tinygrad.helpers import getenv, flat_mv from tinygrad import dtypes -from typing import Optional, List, Tuple, cast, Dict, Final, DefaultDict, Self from tinygrad.engine.realize import get_program # for copied uops -from tinygrad.codegen.opt.kernel import Kernel, KernelOptError -from tinygrad.uop.ops import UOp, Ops, BinaryOps, UnaryOps, TernaryOps, KernelInfo -from tinygrad.codegen.opt.search import Opt, OptOps -from tinygrad import Device, dtypes, Tensor -from tinygrad.dtype import PtrDType, DType, DTYPES_DICT -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View +from tinygrad import dtypes +from tinygrad.dtype import DTYPES_DICT script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -53,12 +47,6 @@ def randoms(): nc = nc.astype(np.bfloat16 if DTYPE_IN == dtypes.bfloat16 else np.float16) return na, nb, nc -def ast_to_cuda_prog(compiler, ast, opts): - k = Kernel(ast) - k.apply_opts(opts) - p = get_program(k.ast, k.opts, k.applied_opts) - return CUDAProgram(device, p.function_name, compiler.compile(p.src)) - if __name__ == "__main__": print(f"gemm variation: {GEMM_VARIATION=} {M=} {N=} {K=} {DTYPE_IN=} {DTYPE_OUT=} {DTYPE_ACC=}") prog, global_size, local_size = None, None, None @@ -189,11 +177,11 @@ if __name__ == "__main__": tms = [] na, nb, nc = randoms() - cudaalloc.copyin(a, bytearray(na)) - cudaalloc.copyin(b, bytearray(nb)) + cudaalloc._copyin(a, memoryview(bytearray(na))) + cudaalloc._copyin(b, memoryview(bytearray(nb))) for i in range(CNT): tms.append(prog(*args, **kwargs)) - cudaalloc.copyout(flat_mv(nc.data), c) + cudaalloc._copyout(flat_mv(nc.data), c) comp = na.astype(np.float32) @ nb.astype(np.float32) result = nc.reshape(M, N).astype(np.float32)