Add OptimizeEpilogue pass. (#346)

* optimize_epilogue

* Add config

* Remove licenses

* Comment out Hopper specific parameters when printing out configs

* Add benchmark parameters from flash-attention repo

* Add Z and H in the key of autotuner

---------

Co-authored-by: Lixun Zhang <lixun.zhang@amd.com>
This commit is contained in:
oplavsic
2023-11-03 22:46:24 +01:00
committed by GitHub
parent cb02a0b346
commit c65f1e6211
5 changed files with 40 additions and 46 deletions

View File

@@ -208,11 +208,12 @@ class Config:
for k, v in self.kwargs.items():
res.append(f'{k}: {v}')
res.append(f'num_warps: {self.num_warps}')
res.append(f'num_ctas: {self.num_ctas}')
## Comment out Hopper specific parameters
#res.append(f'num_ctas: {self.num_ctas}')
res.append(f'num_stages: {self.num_stages}')
res.append(
f'enable_warp_specialization: {self.enable_warp_specialization}')
res.append(f'enable_persistent: {self.enable_persistent}')
#res.append(
# f'enable_warp_specialization: {self.enable_warp_specialization}')
#res.append(f'enable_persistent: {self.enable_persistent}')
return ', '.join(res)