Update matmul reassoc flags for cpu llama2.

This commit is contained in:
Ean Garvey
2024-01-10 14:57:44 -06:00
committed by GitHub
parent d35288e893
commit 97e112091f

View File

@@ -2076,8 +2076,8 @@ class UnshardedVicuna(VicunaBase):
f"{'://' + str(self.device_id) if self.device_id is not None else ''}"
)
if "cpu" in self.device:
self.extra_args.extend("--iree-llvmcpu-enable-quantized-matmul-reassociation")
self.extra_args.extend("--iree-global-opt-enable-quantized-matmul-reassociation")
self.extra_args.extend(["--iree-llvmcpu-enable-quantized-matmul-reassociation"])
self.extra_args.extend(["--iree-global-opt-enable-quantized-matmul-reassociation"])
shark_module = SharkInference(
mlir_module=combined_module,