Relocate quantized matmul reassociation flag (#2047)

* Remove quantized matmul reassociation flag This flag should be a model/use-case specific addition, not a default CPU compile flag.
2026-04-03 03:00:17 -04:00 · 2023-12-20 14:48:40 -06:00
parent 788cc9157c
commit fa95ed30d1
2 changed files with 1 additions and 1 deletions
--- a/apps/shark_studio/api/llm.py
+++ b/apps/shark_studio/api/llm.py
@@ -106,6 +106,7 @@ class LanguageModel:
            frontend="torch",
            external_weight_file=self.external_weight_file,
            write_to=self.vmfb_name,
+            extra_args=["--iree-global-opt-enable-quantized-matmul-reassociation"],
        )
        # TODO: delete the temp file