remove ACCUM_FP32 in simple_matmul.py (#3045)

* remove ACCUM_FP32 in simple_matmul.py

accumate for half inputs is always in float

* move test llama compile speed to metal
This commit is contained in:
chenyu
2024-01-08 17:37:57 -05:00
committed by GitHub
parent 47d67da830
commit 1d730b8853
2 changed files with 4 additions and 3 deletions

View File

@@ -254,8 +254,6 @@ jobs:
# run: npm install puppeteer
# - name: Run WEBGPU Efficientnet
# run: node test/web/test_webgpu.js
# - name: Test LLaMA compile speed
# run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
testmetal:
name: Metal Tests
@@ -291,6 +289,8 @@ jobs:
run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py
- name: Test tensor core ops
run: METAL=1 TC=2 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
- name: Test LLaMA compile speed
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
testhipcompilation:
name: HIP Compilation Tests

View File

@@ -8,7 +8,8 @@ a, b = Tensor.rand(N, N, dtype=dtype_in).realize(), Tensor.rand(N, N, dtype=dtyp
for i in range(CNT):
if i > 0 and getenv("RAND", 0) != 0:
a, b = Tensor.rand(N, N, dtype=dtype_in).realize(), Tensor.rand(N, N, dtype=dtype_in).realize()
c = (a.reshape(N, 1, N) * b.permute(1,0).reshape(1, N, N)).float().sum(axis=2).realize() if getenv("ACCUM_FP32") else (a @ b).realize()
# NOTE: accumulate is in float32
c = (a @ b).realize()
comp = a.numpy().astype(np.float32) @ b.numpy().astype(np.float32)
nc = c.numpy()
np.testing.assert_allclose(nc, comp, atol=1e-4, rtol=3e-2)