mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
remove ACCUM_FP32 in simple_matmul.py (#3045)
* remove ACCUM_FP32 in simple_matmul.py accumate for half inputs is always in float * move test llama compile speed to metal
This commit is contained in:
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -254,8 +254,6 @@ jobs:
|
||||
# run: npm install puppeteer
|
||||
# - name: Run WEBGPU Efficientnet
|
||||
# run: node test/web/test_webgpu.js
|
||||
# - name: Test LLaMA compile speed
|
||||
# run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
|
||||
|
||||
testmetal:
|
||||
name: Metal Tests
|
||||
@@ -291,6 +289,8 @@ jobs:
|
||||
run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py
|
||||
- name: Test tensor core ops
|
||||
run: METAL=1 TC=2 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
|
||||
- name: Test LLaMA compile speed
|
||||
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
|
||||
|
||||
testhipcompilation:
|
||||
name: HIP Compilation Tests
|
||||
|
||||
@@ -8,7 +8,8 @@ a, b = Tensor.rand(N, N, dtype=dtype_in).realize(), Tensor.rand(N, N, dtype=dtyp
|
||||
for i in range(CNT):
|
||||
if i > 0 and getenv("RAND", 0) != 0:
|
||||
a, b = Tensor.rand(N, N, dtype=dtype_in).realize(), Tensor.rand(N, N, dtype=dtype_in).realize()
|
||||
c = (a.reshape(N, 1, N) * b.permute(1,0).reshape(1, N, N)).float().sum(axis=2).realize() if getenv("ACCUM_FP32") else (a @ b).realize()
|
||||
# NOTE: accumulate is in float32
|
||||
c = (a @ b).realize()
|
||||
comp = a.numpy().astype(np.float32) @ b.numpy().astype(np.float32)
|
||||
nc = c.numpy()
|
||||
np.testing.assert_allclose(nc, comp, atol=1e-4, rtol=3e-2)
|
||||
|
||||
Reference in New Issue
Block a user