fix quant imports

2026-04-03 03:00:17 -04:00 · 2023-08-24 23:28:09 -05:00
parent 8298865bda
commit a638d1d5d4
1 changed files with 5 additions and 0 deletions
--- a/apps/language_models/src/model_wrappers/vicuna_model.py
+++ b/apps/language_models/src/model_wrappers/vicuna_model.py
@@ -314,6 +314,11 @@ class SecondVicuna13B(torch.nn.Module):
            model_path, low_cpu_mem_usage=True, **kwargs
        )
        if precision in ["int4", "int8"]:
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
+            from brevitas_examples.llm.llm_quant.run_utils import (
+                get_model_impl,
+            )
+
            print("Second Vicuna applying weight quantization..")
            weight_bit_width = 4 if precision == "int4" else 8
            quantize_model(