Update brevitas requirement (#1677)

also clean up useless args

Co-authored-by: powderluv <powderluv@users.noreply.github.com>
This commit is contained in:
jinchen62
2023-07-19 22:03:32 -07:00
committed by GitHub
parent db3f222933
commit 3662224c04
4 changed files with 5 additions and 28 deletions

View File

@@ -706,20 +706,13 @@ class ShardedVicuna(SharkLLMBase):
quantize_model(
get_model_impl(vicuna_model).layers,
dtype=torch.float32,
weight_quant_type="asym",
weight_bit_width=weight_bit_width,
weight_param_method="stats",
weight_scale_precision="float",
weight_quant_type="asym",
weight_quant_granularity="per_group",
weight_group_size=self.weight_group_size,
quantize_weight_zero_point=False,
input_bit_width=None,
input_scale_type="float",
input_param_method="stats",
input_quant_type="asym",
input_quant_granularity="per_tensor",
quantize_input_zero_point=False,
seqlen=2048,
)
print("Weight quantization applied.")

View File

@@ -26,20 +26,13 @@ class FirstVicuna(torch.nn.Module):
quantize_model(
get_model_impl(self.model).layers,
dtype=torch.float32,
weight_quant_type="asym",
weight_bit_width=weight_bit_width,
weight_param_method="stats",
weight_scale_precision="float",
weight_quant_type="asym",
weight_quant_granularity="per_group",
weight_group_size=weight_group_size,
quantize_weight_zero_point=False,
input_bit_width=None,
input_scale_type="float",
input_param_method="stats",
input_quant_type="asym",
input_quant_granularity="per_tensor",
quantize_input_zero_point=False,
seqlen=2048,
)
print("Weight quantization applied.")
@@ -75,20 +68,13 @@ class SecondVicuna(torch.nn.Module):
quantize_model(
get_model_impl(self.model).layers,
dtype=torch.float32,
weight_quant_type="asym",
weight_bit_width=weight_bit_width,
weight_param_method="stats",
weight_scale_precision="float",
weight_quant_type="asym",
weight_quant_granularity="per_group",
weight_group_size=weight_group_size,
quantize_weight_zero_point=False,
input_bit_width=None,
input_scale_type="float",
input_param_method="stats",
input_quant_type="asym",
input_quant_granularity="per_tensor",
quantize_input_zero_point=False,
seqlen=2048,
)
print("Weight quantization applied.")

View File

@@ -39,5 +39,5 @@ joblib # for langchain
pefile
pyinstaller
# low precision vicuna
brevitas @ git+https://github.com/Xilinx/brevitas.git@llm
# vicuna quantization
brevitas @ git+https://github.com/Xilinx/brevitas.git@dev

View File

@@ -159,5 +159,3 @@ if [[ -z "${CONDA_PREFIX}" && "$SKIP_VENV" != "1" ]]; then
echo "${Green}Before running examples activate venv with:"
echo " ${Green}source $VENV_DIR/bin/activate"
fi
$PYTHON -m pip install git+https://github.com/Xilinx/brevitas.git@llm