From edc09c40e7bc00dcce30568c0ffcfde3dbc067f6 Mon Sep 17 00:00:00 2001 From: Alex O'Connell Date: Sat, 13 Dec 2025 18:25:17 -0500 Subject: [PATCH] add imatrix quants and k8s training job --- scripts/convert_and_quantize.sh | 39 +++++++++++++----- train/training-job.yml | 71 +++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 train/training-job.yml diff --git a/scripts/convert_and_quantize.sh b/scripts/convert_and_quantize.sh index 534144c..2526ad0 100755 --- a/scripts/convert_and_quantize.sh +++ b/scripts/convert_and_quantize.sh @@ -1,36 +1,53 @@ + #!/bin/bash set -e -LLAMA_CPP=../llama.cpp MODEL_NAME=$1 +OUT_TYPE=${2:-"f16"} +MODELS_DIR=${3:-"./models"} +LLAMA_CPP=${4:-"./llama.cpp"} -if [[ ! -d "./models/$MODEL_NAME" ]]; then +if [[ ! -d "$MODELS_DIR/$MODEL_NAME" ]]; then echo "Unknown model $MODEL_NAME" exit -1 fi -if [ -f "./models/$MODEL_NAME/gguf_overrides.json" ]; then - OVERRIDES="--metadata ./models/$MODEL_NAME/gguf_overrides.json" - echo "Using metadata from ./models/$MODEL_NAME/gguf_overrides.json" +if [ -f "$MODELS_DIR/$MODEL_NAME/gguf_overrides.json" ]; then + OVERRIDES="--metadata $MODELS_DIR/$MODEL_NAME/gguf_overrides.json" + echo "Using metadata from $MODELS_DIR/$MODEL_NAME/gguf_overrides.json" else OVERRIDES="" fi echo "Converting to GGUF..." -if [ ! -f "./models/$MODEL_NAME/$MODEL_NAME.f16.gguf" ]; then - $LLAMA_CPP/convert_hf_to_gguf.py --outfile ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf --outtype f16 ./models/$MODEL_NAME/ $OVERRIDES +if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf" ]; then + $LLAMA_CPP/convert_hf_to_gguf.py --outfile $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf --outtype $OUT_TYPE $MODELS_DIR/$MODEL_NAME/ $OVERRIDES else echo "Converted model for already exists. Skipping..." fi +echo "Generate imatrix for model..." +if [ ! -f "groups_merged.txt" ]; then + echo "Downloading groups_merged.txt..." + wget https://huggingface.co/datasets/froggeric/imatrix/resolve/main/groups_merged.txt +fi -DESIRED_QUANTS=("Q8_0" "Q5_K_M" "Q4_0" "Q4_1" "Q4_K_M") +if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf" ]; then + $LLAMA_CPP/build/bin/llama-imatrix -m $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf -ngl 999 -c 512 -f groups_merged.txt -o $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf +else + echo "Imatrix model already exists. Skipping..." +fi + +DESIRED_QUANTS=("Q8_0" "Q6_K" "Q5_K_M" "Q4_0" "Q4_1" "Q3_K_M" "IQ4_NL" "IQ4_XS") for QUANT in "${DESIRED_QUANTS[@]}" do + echo "Quantizing to $QUANT..." QUANT_LOWER=$(echo "$QUANT" | awk '{print tolower($0)}') - if [ ! -f "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf" ]; then - $LLAMA_CPP/build/bin/llama-quantize ./models/$MODEL_NAME/$MODEL_NAME.f16.gguf ./models/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf $QUANT + if [ ! -f "$MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf" ]; then + $LLAMA_CPP/build/bin/llama-quantize --imatrix $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.imatrix.gguf $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$OUT_TYPE.gguf $MODELS_DIR/$MODEL_NAME/$MODEL_NAME.$QUANT_LOWER.gguf $QUANT else echo "Quantized model for '$QUANT' already exists. Skipping..." fi -done \ No newline at end of file +done + +echo "All done!" \ No newline at end of file diff --git a/train/training-job.yml b/train/training-job.yml new file mode 100644 index 0000000..3015ca4 --- /dev/null +++ b/train/training-job.yml @@ -0,0 +1,71 @@ +apiVersion: batch/v1 +kind: Job +metadata: + generateName: training-job- + namespace: ai + labels: + app: training-job +spec: + template: + metadata: + labels: + app: training-job + spec: + containers: + - name: axolotl + image: axolotlai/axolotl-cloud:main-py3.11-cu128-2.8.0 + imagePullPolicy: IfNotPresent + command: + - axolotl + - train + - /workspace/configs/gemma3-270m.yml + env: + - name: AXOLOTL_DO_NOT_TRACK + value: "1" + volumeMounts: + - name: training-runs + mountPath: /workspace/data/training-runs + - name: training-data + mountPath: /workspace/data/datasets + - name: training-configs + mountPath: /workspace/configs + - name: hf-cache + mountPath: /workspace/data/huggingface-cache + resources: + limits: + nvidia.com/gpu: 2 + - name: tensorboard + image: python:3.11-slim + imagePullPolicy: IfNotPresent + command: + - bash + - -c + - "pip3 install tensorboard && tensorboard --logdir=/workspace/data/training-runs --host=0.0.0.0 --port 8080" + ports: + - containerPort: 8080 + name: tensorboard + protocol: TCP + volumeMounts: + - name: training-runs + mountPath: /workspace/data/training-runs + runtimeClassName: nvidia + nodeSelector: + nvidia.com/gpu: "true" + restartPolicy: OnFailure + volumes: + - name: training-runs + hostPath: + path: /mnt/data/training-runs + type: DirectoryOrCreate + - name: training-data + hostPath: + path: /mnt/data/training-data + type: DirectoryOrCreate + - name: training-configs + hostPath: + path: /mnt/data/training-configs + type: DirectoryOrCreate + - name: hf-cache + hostPath: + path: /mnt/data/hf-cache + type: DirectoryOrCreate \ No newline at end of file