[OPS] enable flash_attention_v2 TMA (#2544)

This commit is contained in:
runseny
2023-10-26 14:31:17 +08:00
committed by GitHub
parent 2323adb387
commit 4c816c2f59
3 changed files with 48 additions and 40 deletions

View File

@@ -165,7 +165,7 @@ flash_attention_data = {
(4, 48, 4096, 64, False, False, 'forward', 'bfloat16'): 0.266,
(4, 48, 1024, 16, False, False, 'forward', 'float32'): 0.098,
(4, 48, 4096, 64, False, False, 'backward', 'float16'): 0.159,
(4, 48, 4096, 64, False, False, 'backward', 'bfloat16'): 0.136,
(4, 48, 4096, 64, False, False, 'backward', 'bfloat16'): 0.159,
(4, 48, 1024, 16, False, False, 'backward', 'float32'): 0.088,
}
}

View File

@@ -43,8 +43,6 @@ def test_op(Z, H, N_CTX, D_HEAD, dtype, causal, seq_par):
ref_dq, q.grad = q.grad.clone(), None
# # triton implementation
tri_out = triton.ops.attention(q, k, v, causal, sm_scale, seq_par)
# temporary env var control begin
os.putenv("ENABLE_TMA", "0")
tri_out.backward(dout)
tri_dv, v.grad = v.grad.clone(), None
tri_dk, k.grad = k.grad.clone(), None
@@ -55,5 +53,3 @@ def test_op(Z, H, N_CTX, D_HEAD, dtype, causal, seq_par):
torch.testing.assert_close(ref_dv, tri_dv, atol=atol, rtol=0)
torch.testing.assert_close(ref_dk, tri_dk, atol=atol, rtol=0)
torch.testing.assert_close(ref_dq, tri_dq, atol=atol, rtol=0)
# temporary env var control end
os.putenv("ENABLE_TMA", enable_tma)