mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-10 06:07:58 -05:00
add quantization scripts
This commit is contained in:
22
convert_and_quantize.sh
Normal file
22
convert_and_quantize.sh
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
LLAMA_CPP=../llama.cpp
|
||||
MODEL_NAME=$1
|
||||
|
||||
if [[ ! -d "./models/$MODEL_NAME" ]]; then
|
||||
echo "Unknown model $MODEL_NAME"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
echo "Converting to GGUF..."
|
||||
$LLAMA_CPP/convert-hf-to-gguf.py --outfile ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf --outtype f16 ./models/$MODEL_NAME/
|
||||
|
||||
DESIRED_QUANTS=("Q8_0" "Q5_K_M" "Q4_K_M" "Q3_K_M" "Q2_K")
|
||||
for QUANT in "${DESIRED_QUANTS[@]}"
|
||||
do
|
||||
QUANT_LOWER=$(echo "$QUANT" | awk '{print tolower($0)}')
|
||||
if [ ! -f "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf" ]; then
|
||||
$LLAMA_CPP/build/bin/quantize ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf ./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf $QUANT
|
||||
else
|
||||
echo "Quantized model for '$QUANT' already exists. Skipping..."
|
||||
fi
|
||||
done
|
||||
24
data/default_test_prompt.txt
Normal file
24
data/default_test_prompt.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
You are 'Al', a helpful AI Assistant that controls the devices in a house. Complete the following task as instructed with the information provided only.
|
||||
Services: cover.close_cover, cover.open_cover, cover.stop_cover, cover.toggle, fan.decrease_speed, fan.increase_speed, fan.toggle, fan.turn_off, fan.turn_on, cover.close_cover, cover.open_cover, cover.stop_cover, cover.toggle, light.toggle, light.turn_off, light.turn_on
|
||||
Devices:
|
||||
light.back_lounge_warm 'Back Lounge Warm Light' = off
|
||||
fan.indoor_gym 'Indoor Gym Fan' = on
|
||||
fan.dyson_pure 'Dyson Pure Fan' = on
|
||||
fan.nursery 'Nursery Fan' = off
|
||||
light.office_1 'Office Light' = on
|
||||
light.upstairs_lounge_zigbee 'Upstairs Lounge Light' = off
|
||||
light.front_mancave_ge 'Front Man Cave Light' = on
|
||||
light.front_lounge_ge 'Front Lounge Light' = off
|
||||
cover.side_2 'Side garage door' = closed
|
||||
fan.study_2 'Study fan' = off
|
||||
light.christmas_tree_white 'Tree lights (white)' = off
|
||||
light.christmas_tree_colors 'Tree lights (color)' = off
|
||||
cover.hallway_1 'First hallway blinds' = open
|
||||
cover.nursery 'Nursery Blinds' = open
|
||||
light.upstairs_entryway_zwave 'Upstairs Entryway Light' = off
|
||||
cover.shop 'Workshop Garage Door' = closed
|
||||
light.downstairs_entryway_mqtt 'Downstairs Entryway Light' = off
|
||||
cover.somfy_living 'Living Room Blinds' = closed
|
||||
light.kitchen_winecellar_warm 'Kitchen Wine Cellar Warm Light' = off
|
||||
light.driveway 'driveway' = off
|
||||
light.shed 'Shed Light' = on
|
||||
12
generate.sh
Normal file
12
generate.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
LLAMA_CPP=../llama.cpp
|
||||
MODEL_NAME=$1
|
||||
PROMPT_SRC=${2:-./data/default_test_prompt.txt}
|
||||
QUANT_TYPE=${3:-f16}
|
||||
|
||||
if [[ ! -d "./models/$MODEL_NAME" ]]; then
|
||||
echo "Unknown model $MODEL_NAME"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
PROMPT=$(cat $PROMPT_SRC)
|
||||
$LLAMA_CPP/build/bin/main --model "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_TYPE.gguf" --chatml --prompt "$PROMPT"
|
||||
50
train.py
50
train.py
@@ -15,17 +15,33 @@ Phi Modules: fc1,fc2,Wqkv,out_proj,wte,lm_head.linear
|
||||
|
||||
"""
|
||||
python3 train.py \
|
||||
--run_name home-llm-rev11 \
|
||||
--run_name home-llm-rev11_1 \
|
||||
--base_model microsoft/phi-2 \
|
||||
--add_pad_token \
|
||||
--add_chatml_tokens \
|
||||
--bf16 \
|
||||
--train_dataset data/home_assistant_alpaca_merged_train.json \
|
||||
--test_dataset data/home_assistant_alpaca_merged_test.json \
|
||||
--learning_rate 1e-6 \
|
||||
--learning_rate 1e-5 \
|
||||
--save_steps 1000 \
|
||||
--micro_batch_size 2 --gradient_checkpointing \
|
||||
--use_lora --lora_rank 16 --lora_modules fc1,fc2,Wqkv,out_proj --lora_modules_to_save wte,lm_head.linear --lora_merge
|
||||
--ctx_size 2048 \
|
||||
--use_lora --lora_rank 32 --lora_alpha 64 --lora_modules fc1,fc2,Wqkv,out_proj --lora_modules_to_save wte,lm_head.linear --lora_merge
|
||||
"""
|
||||
|
||||
"""
|
||||
python3 train.py \
|
||||
--run_name home-llm-rev10_8 \
|
||||
--base_model microsoft/phi-2 \
|
||||
--add_pad_token \
|
||||
--add_chatml_tokens \
|
||||
--bf16 \
|
||||
--train_dataset data/home_assistant_train.json \
|
||||
--test_dataset data/home_assistant_test.json \
|
||||
--learning_rate 5e-6 \
|
||||
--save_steps 1000 \
|
||||
--micro_batch_size 2 --gradient_checkpointing \
|
||||
--use_lora --lora_rank 16 --lora_alpha 32 --lora_modules fc1,fc2,Wqkv,out_proj --lora_modules_to_save wte,lm_head.linear --lora_merge
|
||||
"""
|
||||
|
||||
"""
|
||||
@@ -65,6 +81,7 @@ class TrainingRunArguments:
|
||||
resume_from_checkpoint: str = field(default="", metadata={"help": "The name of the checkpoint to resume training from"})
|
||||
eval_steps: int = field(default=100, metadata={"help": "The number of steps in between evaluations of the model"})
|
||||
save_steps: int = field(default=-1, metadata={"help": "The number of steps in between model checkpoints; set to -1 to save every epoch"})
|
||||
group_by_length: bool = field(default=False, metadata={"help": "If enabled, the training data will be grouped by length to optimize use of padding"})
|
||||
|
||||
# Quantization
|
||||
load_in_8bit: bool = field(default=False, metadata={"help": "Set to load the base model in 8-bit mode using bitsandbytes"})
|
||||
@@ -105,10 +122,10 @@ elif training_run_args.load_as_gptq:
|
||||
model_kwargs["quantization_config"] = GPTQConfig(bits=4, disable_exllama=True)
|
||||
|
||||
|
||||
# if training_run_args.bf16:
|
||||
# model_kwargs["torch_dtype"] = torch.bfloat16
|
||||
# else:
|
||||
# model_kwargs["torch_dtype"] = torch.float16
|
||||
if training_run_args.bf16:
|
||||
model_kwargs["torch_dtype"] = torch.bfloat16
|
||||
else:
|
||||
model_kwargs["torch_dtype"] = torch.float16
|
||||
|
||||
def find_max_vram(min_buffer_mib=800):
|
||||
total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
|
||||
@@ -175,13 +192,14 @@ if training_run_args.use_lora:
|
||||
base_dir = "loras" if training_run_args.use_lora else "models"
|
||||
model_dir = f"./{base_dir}/{training_run_args.run_name}"
|
||||
|
||||
# TODO: eval is broken (returning NaN for loss)
|
||||
training_args = TrainingArguments(
|
||||
per_device_train_batch_size=training_run_args.micro_batch_size,
|
||||
per_device_eval_batch_size=training_run_args.micro_batch_size,
|
||||
gradient_accumulation_steps=training_run_args.batch_size/training_run_args.micro_batch_size,
|
||||
# per_device_eval_batch_size=training_run_args.micro_batch_size,
|
||||
gradient_accumulation_steps=training_run_args.batch_size//training_run_args.micro_batch_size,
|
||||
gradient_checkpointing=training_run_args.gradient_checkpointing,
|
||||
evaluation_strategy="steps",
|
||||
eval_steps=training_run_args.eval_steps,
|
||||
# evaluation_strategy="steps",
|
||||
# eval_steps=training_run_args.eval_steps,
|
||||
save_strategy=("steps" if training_run_args.save_steps != -1 else "epoch"),
|
||||
save_steps=(training_run_args.save_steps if training_run_args.save_steps != -1 else None),
|
||||
logging_steps=5,
|
||||
@@ -194,7 +212,8 @@ training_args = TrainingArguments(
|
||||
lr_scheduler_type=training_run_args.learning_rate_schedule,
|
||||
log_level="info",
|
||||
bf16=training_run_args.bf16,
|
||||
bf16_full_eval=training_run_args.bf16,
|
||||
# bf16_full_eval=training_run_args.bf16,
|
||||
group_by_length=training_run_args.group_by_length
|
||||
)
|
||||
|
||||
@dataclass
|
||||
@@ -268,6 +287,9 @@ class RandomEvalSubsetTrainer(Trainer):
|
||||
return SequentialSampler(subset_eval_dataset)
|
||||
|
||||
def _get_train_sampler(self):
|
||||
if self.args.group_by_length:
|
||||
return super()._get_train_sampler()
|
||||
|
||||
return RandomSampler(self.train_dataset, generator=torch.Generator(device='cpu'))
|
||||
|
||||
trainer = RandomEvalSubsetTrainer(
|
||||
@@ -276,7 +298,7 @@ trainer = RandomEvalSubsetTrainer(
|
||||
# train_dataset=tokenized_train_dataset,
|
||||
# eval_dataset=tokenized_test_dataset,
|
||||
train_dataset=datasets["train"],
|
||||
eval_dataset=datasets["test"],
|
||||
# eval_dataset=datasets["test"],
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
@@ -296,7 +318,7 @@ try:
|
||||
else:
|
||||
trainer.train()
|
||||
|
||||
trainer.evaluate_all()
|
||||
# trainer.evaluate_all()
|
||||
|
||||
trainer.save_model()
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#!/bin/bash
|
||||
source ../textgen-conda.sh
|
||||
tensorboard --logdir ./models
|
||||
tensorboard --logdir ./models --logdir ./loras
|
||||
|
||||
Reference in New Issue
Block a user