mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-08 21:38:04 -05:00
Relocate quantized matmul reassociation flag (#2047)
* Remove quantized matmul reassociation flag This flag should be a model/use-case specific addition, not a default CPU compile flag.
This commit is contained in:
@@ -106,6 +106,7 @@ class LanguageModel:
|
||||
frontend="torch",
|
||||
external_weight_file=self.external_weight_file,
|
||||
write_to=self.vmfb_name,
|
||||
extra_args=["--iree-global-opt-enable-quantized-matmul-reassociation"],
|
||||
)
|
||||
# TODO: delete the temp file
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ def get_iree_device_args(device, extra_args=[]):
|
||||
get_iree_cpu_args()
|
||||
+ u_kernel_flag
|
||||
+ stack_size_flag
|
||||
+ ["--iree-global-opt-enable-quantized-matmul-reassociation"]
|
||||
)
|
||||
if device == "cuda":
|
||||
from shark.iree_utils.gpu_utils import get_iree_gpu_args
|
||||
|
||||
Reference in New Issue
Block a user