mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-09 22:07:55 -05:00
Update brevitas requirement (#1677)
also clean up useless args Co-authored-by: powderluv <powderluv@users.noreply.github.com>
This commit is contained in:
@@ -706,20 +706,13 @@ class ShardedVicuna(SharkLLMBase):
|
|||||||
quantize_model(
|
quantize_model(
|
||||||
get_model_impl(vicuna_model).layers,
|
get_model_impl(vicuna_model).layers,
|
||||||
dtype=torch.float32,
|
dtype=torch.float32,
|
||||||
weight_quant_type="asym",
|
|
||||||
weight_bit_width=weight_bit_width,
|
weight_bit_width=weight_bit_width,
|
||||||
weight_param_method="stats",
|
weight_param_method="stats",
|
||||||
weight_scale_precision="float",
|
weight_scale_precision="float",
|
||||||
|
weight_quant_type="asym",
|
||||||
weight_quant_granularity="per_group",
|
weight_quant_granularity="per_group",
|
||||||
weight_group_size=self.weight_group_size,
|
weight_group_size=self.weight_group_size,
|
||||||
quantize_weight_zero_point=False,
|
quantize_weight_zero_point=False,
|
||||||
input_bit_width=None,
|
|
||||||
input_scale_type="float",
|
|
||||||
input_param_method="stats",
|
|
||||||
input_quant_type="asym",
|
|
||||||
input_quant_granularity="per_tensor",
|
|
||||||
quantize_input_zero_point=False,
|
|
||||||
seqlen=2048,
|
|
||||||
)
|
)
|
||||||
print("Weight quantization applied.")
|
print("Weight quantization applied.")
|
||||||
|
|
||||||
|
|||||||
@@ -26,20 +26,13 @@ class FirstVicuna(torch.nn.Module):
|
|||||||
quantize_model(
|
quantize_model(
|
||||||
get_model_impl(self.model).layers,
|
get_model_impl(self.model).layers,
|
||||||
dtype=torch.float32,
|
dtype=torch.float32,
|
||||||
weight_quant_type="asym",
|
|
||||||
weight_bit_width=weight_bit_width,
|
weight_bit_width=weight_bit_width,
|
||||||
weight_param_method="stats",
|
weight_param_method="stats",
|
||||||
weight_scale_precision="float",
|
weight_scale_precision="float",
|
||||||
|
weight_quant_type="asym",
|
||||||
weight_quant_granularity="per_group",
|
weight_quant_granularity="per_group",
|
||||||
weight_group_size=weight_group_size,
|
weight_group_size=weight_group_size,
|
||||||
quantize_weight_zero_point=False,
|
quantize_weight_zero_point=False,
|
||||||
input_bit_width=None,
|
|
||||||
input_scale_type="float",
|
|
||||||
input_param_method="stats",
|
|
||||||
input_quant_type="asym",
|
|
||||||
input_quant_granularity="per_tensor",
|
|
||||||
quantize_input_zero_point=False,
|
|
||||||
seqlen=2048,
|
|
||||||
)
|
)
|
||||||
print("Weight quantization applied.")
|
print("Weight quantization applied.")
|
||||||
|
|
||||||
@@ -75,20 +68,13 @@ class SecondVicuna(torch.nn.Module):
|
|||||||
quantize_model(
|
quantize_model(
|
||||||
get_model_impl(self.model).layers,
|
get_model_impl(self.model).layers,
|
||||||
dtype=torch.float32,
|
dtype=torch.float32,
|
||||||
weight_quant_type="asym",
|
|
||||||
weight_bit_width=weight_bit_width,
|
weight_bit_width=weight_bit_width,
|
||||||
weight_param_method="stats",
|
weight_param_method="stats",
|
||||||
weight_scale_precision="float",
|
weight_scale_precision="float",
|
||||||
|
weight_quant_type="asym",
|
||||||
weight_quant_granularity="per_group",
|
weight_quant_granularity="per_group",
|
||||||
weight_group_size=weight_group_size,
|
weight_group_size=weight_group_size,
|
||||||
quantize_weight_zero_point=False,
|
quantize_weight_zero_point=False,
|
||||||
input_bit_width=None,
|
|
||||||
input_scale_type="float",
|
|
||||||
input_param_method="stats",
|
|
||||||
input_quant_type="asym",
|
|
||||||
input_quant_granularity="per_tensor",
|
|
||||||
quantize_input_zero_point=False,
|
|
||||||
seqlen=2048,
|
|
||||||
)
|
)
|
||||||
print("Weight quantization applied.")
|
print("Weight quantization applied.")
|
||||||
|
|
||||||
|
|||||||
@@ -39,5 +39,5 @@ joblib # for langchain
|
|||||||
pefile
|
pefile
|
||||||
pyinstaller
|
pyinstaller
|
||||||
|
|
||||||
# low precision vicuna
|
# vicuna quantization
|
||||||
brevitas @ git+https://github.com/Xilinx/brevitas.git@llm
|
brevitas @ git+https://github.com/Xilinx/brevitas.git@dev
|
||||||
|
|||||||
@@ -159,5 +159,3 @@ if [[ -z "${CONDA_PREFIX}" && "$SKIP_VENV" != "1" ]]; then
|
|||||||
echo "${Green}Before running examples activate venv with:"
|
echo "${Green}Before running examples activate venv with:"
|
||||||
echo " ${Green}source $VENV_DIR/bin/activate"
|
echo " ${Green}source $VENV_DIR/bin/activate"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$PYTHON -m pip install git+https://github.com/Xilinx/brevitas.git@llm
|
|
||||||
|
|||||||
Reference in New Issue
Block a user