fix quant imports

This commit is contained in:
Ean Garvey
2023-08-24 23:28:09 -05:00
parent 8298865bda
commit a638d1d5d4

View File

@@ -314,6 +314,11 @@ class SecondVicuna13B(torch.nn.Module):
model_path, low_cpu_mem_usage=True, **kwargs
)
if precision in ["int4", "int8"]:
from brevitas_examples.llm.llm_quant.quantize import quantize_model
from brevitas_examples.llm.llm_quant.run_utils import (
get_model_impl,
)
print("Second Vicuna applying weight quantization..")
weight_bit_width = 4 if precision == "int4" else 8
quantize_model(