[OPTIMIZER] Tweak warpsPerCTA based on the shape of MMA output (#2485) (#2525)

Reverts openai/triton#2497
This commit is contained in:
Philippe Tillet
2023-10-24 06:50:58 +02:00
committed by GitHub
parent 50add54334
commit 8f467f1ea9
2 changed files with 27 additions and 14 deletions

View File

@@ -146,15 +146,15 @@ flash_attention_data = {
(4, 48, 4096, 64, True, True, 'forward', 'float16'): 0.542,
(4, 48, 4096, 64, True, True, 'forward', 'bfloat16'): 0.471,
(4, 48, 1024, 16, True, True, 'forward', 'float32'): 0.155,
(4, 48, 4096, 64, True, True, 'backward', 'float16'): 0.203,
(4, 48, 4096, 64, True, True, 'backward', 'bfloat16'): 0.202,
(4, 48, 1024, 16, True, True, 'backward', 'float32'): 0.108,
(4, 48, 4096, 64, True, True, 'backward', 'float16'): 0.235,
(4, 48, 4096, 64, True, True, 'backward', 'bfloat16'): 0.230,
(4, 48, 1024, 16, True, True, 'backward', 'float32'): 0.146,
(4, 48, 4096, 64, True, False, 'forward', 'float16'): 0.306,
(4, 48, 4096, 64, True, False, 'forward', 'bfloat16'): 0.266,
(4, 48, 1024, 16, True, False, 'forward', 'float32'): 0.098,
(4, 48, 4096, 64, True, False, 'backward', 'float16'): 0.134,
(4, 48, 4096, 64, True, False, 'backward', 'bfloat16'): 0.135,
(4, 48, 1024, 16, True, False, 'backward', 'float32'): 0.066,
(4, 48, 1024, 16, True, False, 'backward', 'float32'): 0.098,
(4, 48, 4096, 64, False, True, 'forward', 'float16'): 0.541,
(4, 48, 4096, 64, False, True, 'forward', 'bfloat16'): 0.471,
(4, 48, 1024, 16, False, True, 'forward', 'float32'): 0.150,