mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-08 05:14:02 -05:00
add imatrix quants and k8s training job
This commit is contained in:
@@ -1,36 +1,53 @@
|
||||
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
LLAMA_CPP=../llama.cpp
|
||||
MODEL_NAME=$1
|
||||
OUT_TYPE=${2:-"f16"}
|
||||
MODELS_DIR=${3:-"./models"}
|
||||
LLAMA_CPP=${4:-"./llama.cpp"}
|
||||
|
||||
if [[ ! -d "./models/$MODEL_NAME" ]]; then
|
||||
if [[ ! -d "$MODELS_DIR/$MODEL_NAME" ]]; then
|
||||
echo "Unknown model $MODEL_NAME"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ -f "./models/$MODEL_NAME/gguf_overrides.json" ]; then
|
||||
OVERRIDES="--metadata ./models/$MODEL_NAME/gguf_overrides.json"
|
||||
echo "Using metadata from ./models/$MODEL_NAME/gguf_overrides.json"
|
||||
if [ -f "$MODELS_DIR/$MODEL_NAME/gguf_overrides.json" ]; then
|
||||
OVERRIDES="--metadata $MODELS_DIR/$MODEL_NAME/gguf_overrides.json"
|
||||
echo "Using metadata from $MODELS_DIR/$MODEL_NAME/gguf_overrides.json"
|
||||
else
|
||||
OVERRIDES=""
|
||||
fi
|
||||
|
||||
echo "Converting to GGUF..."
|
||||
if [ ! -f "./models/$MODEL_NAME/$MODEL_NAME.f16.gguf" ]; then
|
||||
$LLAMA_CPP/convert_hf_to_gguf.py --outfile ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf --outtype f16 ./models/$MODEL_NAME/ $OVERRIDES
|
||||
if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf" ]; then
|
||||
$LLAMA_CPP/convert_hf_to_gguf.py --outfile $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf --outtype $OUT_TYPE $MODELS_DIR/$MODEL_NAME/ $OVERRIDES
|
||||
else
|
||||
echo "Converted model for already exists. Skipping..."
|
||||
fi
|
||||
|
||||
echo "Generate imatrix for model..."
|
||||
if [ ! -f "groups_merged.txt" ]; then
|
||||
echo "Downloading groups_merged.txt..."
|
||||
wget https://huggingface.co/datasets/froggeric/imatrix/resolve/main/groups_merged.txt
|
||||
fi
|
||||
|
||||
DESIRED_QUANTS=("Q8_0" "Q5_K_M" "Q4_0" "Q4_1" "Q4_K_M")
|
||||
if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf" ]; then
|
||||
$LLAMA_CPP/build/bin/llama-imatrix -m $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf -ngl 999 -c 512 -f groups_merged.txt -o $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf
|
||||
else
|
||||
echo "Imatrix model already exists. Skipping..."
|
||||
fi
|
||||
|
||||
DESIRED_QUANTS=("Q8_0" "Q6_K" "Q5_K_M" "Q4_0" "Q4_1" "Q3_K_M" "IQ4_NL" "IQ4_XS")
|
||||
for QUANT in "${DESIRED_QUANTS[@]}"
|
||||
do
|
||||
echo "Quantizing to $QUANT..."
|
||||
QUANT_LOWER=$(echo "$QUANT" | awk '{print tolower($0)}')
|
||||
if [ ! -f "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf" ]; then
|
||||
$LLAMA_CPP/build/bin/llama-quantize ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf ./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf $QUANT
|
||||
if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf" ]; then
|
||||
$LLAMA_CPP/build/bin/llama-quantize --imatrix $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf $QUANT
|
||||
else
|
||||
echo "Quantized model for '$QUANT' already exists. Skipping..."
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "All done!"
|
||||
71
train/training-job.yml
Normal file
71
train/training-job.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
generateName: training-job-
|
||||
namespace: ai
|
||||
labels:
|
||||
app: training-job
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: training-job
|
||||
spec:
|
||||
containers:
|
||||
- name: axolotl
|
||||
image: axolotlai/axolotl-cloud:main-py3.11-cu128-2.8.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- axolotl
|
||||
- train
|
||||
- /workspace/configs/gemma3-270m.yml
|
||||
env:
|
||||
- name: AXOLOTL_DO_NOT_TRACK
|
||||
value: "1"
|
||||
volumeMounts:
|
||||
- name: training-runs
|
||||
mountPath: /workspace/data/training-runs
|
||||
- name: training-data
|
||||
mountPath: /workspace/data/datasets
|
||||
- name: training-configs
|
||||
mountPath: /workspace/configs
|
||||
- name: hf-cache
|
||||
mountPath: /workspace/data/huggingface-cache
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 2
|
||||
- name: tensorboard
|
||||
image: python:3.11-slim
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- "pip3 install tensorboard && tensorboard --logdir=/workspace/data/training-runs --host=0.0.0.0 --port 8080"
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: tensorboard
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- name: training-runs
|
||||
mountPath: /workspace/data/training-runs
|
||||
runtimeClassName: nvidia
|
||||
nodeSelector:
|
||||
nvidia.com/gpu: "true"
|
||||
restartPolicy: OnFailure
|
||||
volumes:
|
||||
- name: training-runs
|
||||
hostPath:
|
||||
path: /mnt/data/training-runs
|
||||
type: DirectoryOrCreate
|
||||
- name: training-data
|
||||
hostPath:
|
||||
path: /mnt/data/training-data
|
||||
type: DirectoryOrCreate
|
||||
- name: training-configs
|
||||
hostPath:
|
||||
path: /mnt/data/training-configs
|
||||
type: DirectoryOrCreate
|
||||
- name: hf-cache
|
||||
hostPath:
|
||||
path: /mnt/data/hf-cache
|
||||
type: DirectoryOrCreate
|
||||
Reference in New Issue
Block a user