mirror of
https://github.com/nod-ai/AMD-SHARK-Studio.git
synced 2026-04-03 03:00:17 -04:00
Update matmul reassoc flags for cpu llama2.
This commit is contained in:
@@ -2076,8 +2076,8 @@ class UnshardedVicuna(VicunaBase):
|
||||
f"{'://' + str(self.device_id) if self.device_id is not None else ''}"
|
||||
)
|
||||
if "cpu" in self.device:
|
||||
self.extra_args.extend("--iree-llvmcpu-enable-quantized-matmul-reassociation")
|
||||
self.extra_args.extend("--iree-global-opt-enable-quantized-matmul-reassociation")
|
||||
self.extra_args.extend(["--iree-llvmcpu-enable-quantized-matmul-reassociation"])
|
||||
self.extra_args.extend(["--iree-global-opt-enable-quantized-matmul-reassociation"])
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=combined_module,
|
||||
|
||||
Reference in New Issue
Block a user