diff --git a/TODO.md b/TODO.md index 0ff83ec..7d71b82 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,7 @@ # TODO -- [ ] new model based on qwen3 0.6b, 1.7b and 4b - [ ] add examples of 'fixing' a failed tool call to the dataset +- [ ] add proper 'refusals' to the dataset (i.e. tool/device not available or device is already in the desired state) +- [ ] new model based on qwen3 0.6b, 1.7b and 4b - [x] new model based on gemma3 270m - [x] support AI task API - [ ] vision support for remote backends diff --git a/repository.yaml b/repository.yaml deleted file mode 100644 index 060ec16..0000000 --- a/repository.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# https://developers.home-assistant.io/docs/add-ons/repository#repository-configuration -name: text-generation-webui - Home Assistant Add-on -url: 'https://github.com/acon96/home-llm' -maintainer: acon96 diff --git a/train/README.md b/train/README.md new file mode 100644 index 0000000..09026b3 --- /dev/null +++ b/train/README.md @@ -0,0 +1,3 @@ +# Training Home LLM Models + +This directory contains resources and instructions for training Home LLM models. Currently, it is recommended to use axolotl via a Docker container for training. There are various examples of model configurations provided in the `config/` folder. Additionally, you can refer to the [Axolotl documentation](https://docs.axolotl.ai/) for more detailed guidance on setting up and running training sessions. \ No newline at end of file diff --git a/train/functiongemma-270m.yml b/train/configs/functiongemma-270m.yml similarity index 100% rename from train/functiongemma-270m.yml rename to train/configs/functiongemma-270m.yml diff --git a/train/gemma3-270m.yml b/train/configs/gemma3-270m.yml similarity index 100% rename from train/gemma3-270m.yml rename to train/configs/gemma3-270m.yml diff --git a/evaluate.py b/train/evaluate.py similarity index 100% rename from evaluate.py rename to train/evaluate.py diff --git a/train/run.sh b/train/run.sh deleted file mode 100644 index b0a061e..0000000 --- a/train/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -docker run -d --rm \ - --gpus all \ - -p 8888:8888 \ - -v /mnt/data/training-runs:/workspace/data/axolotl-artifacts \ - -v /mnt/data/training-data:/workspace/data/datasets \ - -v /mnt/data/training-configs:/workspace/configs \ - -v /mnt/data/hf-cache:/workspace/data/huggingface-cache \ - axolotlai/axolotl-cloud:main-py3.11-cu128-2.8.0 \ No newline at end of file diff --git a/train/train.sh b/train/train.sh new file mode 100644 index 0000000..58c965f --- /dev/null +++ b/train/train.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +MODEL_NAME=${1} +REMOTE_SERVER=${2} + +if [ -z "$MODEL_NAME" ] || [ -z "$REMOTE_SERVER" ]; then + echo "Usage: $0 " + exit 1 +fi + +scp configs/${MODEL_NAME}.yml ${REMOTE_SERVER}:/mnt/data/training-configs/ +cat training-job.yml | sed "s/MODEL_NAME/${MODEL_NAME}/g" | kubectl create -f - diff --git a/train/training-job.yml b/train/training-job.yml index 8860dc3..9c305b8 100644 --- a/train/training-job.yml +++ b/train/training-job.yml @@ -1,11 +1,12 @@ apiVersion: batch/v1 kind: Job metadata: - generateName: training-job- + generateName: training-job-MODEL_NAME namespace: ai labels: app: training-job spec: + ttlSecondsAfterFinished: 604800 # 7 days (7 * 24 * 60 * 60) template: metadata: labels: @@ -18,7 +19,7 @@ spec: command: - axolotl - train - - /workspace/configs/functiongemma-270m.yml + - /workspace/configs/MODEL_NAME.yml env: - name: AXOLOTL_DO_NOT_TRACK value: "1" @@ -38,7 +39,7 @@ spec: mountPath: /workspace/data/huggingface-cache resources: limits: - nvidia.com/gpu: 2 + nvidia.com/gpu: 2 # number of GPUs to assign to this pod initContainers: - name: tensorboard image: python:3.11-slim