Relocate quantized matmul reassociation flag (#2047)

* Remove quantized matmul reassociation flag

This flag should be a model/use-case specific addition, not a default CPU compile flag.
This commit is contained in:
Ean Garvey
2023-12-20 14:48:40 -06:00
committed by GitHub
parent 788cc9157c
commit fa95ed30d1
2 changed files with 1 additions and 1 deletions

View File

@@ -106,6 +106,7 @@ class LanguageModel:
frontend="torch",
external_weight_file=self.external_weight_file,
write_to=self.vmfb_name,
extra_args=["--iree-global-opt-enable-quantized-matmul-reassociation"],
)
# TODO: delete the temp file

View File

@@ -43,7 +43,6 @@ def get_iree_device_args(device, extra_args=[]):
get_iree_cpu_args()
+ u_kernel_flag
+ stack_size_flag
+ ["--iree-global-opt-enable-quantized-matmul-reassociation"]
)
if device == "cuda":
from shark.iree_utils.gpu_utils import get_iree_gpu_args