{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "469a9a97-0f6b-475f-8aef-a796c1c5244f", "metadata": {}, "outputs": [], "source": [ "%pip install -r requirements.txt\n", "\n", "import os, re\n", "from train import TrainingRunArguments, do_training_run\n", "\n", "def get_next_run_name(model):\n", " pattern = re.compile(model + r\"-rev(\\d+)$\")\n", " max_rev = 0\n", "\n", " for folder in os.listdir(\"models/\"):\n", " match = pattern.search(folder)\n", " if match:\n", " max_rev = max(max_rev, int(match.group(1)))\n", "\n", " return f\"{model}-rev{max_rev + 1}\"\n", "\n", "os.environ[\"HF_HOME\"] = \"/workspace/\"" ] }, { "cell_type": "markdown", "id": "ed0807bf", "metadata": {}, "source": [ "## Generate Data" ] }, { "cell_type": "code", "execution_count": null, "id": "aaafce74", "metadata": {}, "outputs": [], "source": [ "%pip install -r data/requirements.txt\n", "from data.generate_home_assistant_data import main as generate_data\n", "\n", "generate_data([\"--train\", \"--test\", \"--large\", \"--sharegpt\", \"--language\", \"english\", \"german\", \"french\", \"spanish\", \"polish\"])" ] }, { "cell_type": "markdown", "id": "ff011772", "metadata": {}, "source": [ "## Llama 3.2 1B" ] }, { "cell_type": "code", "execution_count": null, "id": "48839ce2-1939-4d7f-817c-97b047bafd42", "metadata": {}, "outputs": [], "source": [ "# python3 train.py \\\n", "# --run_name Home-Llama-3.2-1B-rev1 \\\n", "# --base_model meta-llama/Llama-3.2-1B-Instruct \\\n", "# --bf16 \\\n", "# --train_dataset data/home_assistant_train.jsonl \\\n", "# --test_dataset data/home_assistant_test.jsonl \\\n", "# --learning_rate 2e-5 --learning_rate_warmup 0.03 --batch_size 64 --epochs 1 \\\n", "# --micro_batch_size 2 \\\n", "# --ctx_size 2048 \\\n", "# --save_steps 200 --save_total_limit 1 --eval_steps 200 --logging_steps 2\n", "\n", "do_training_run(TrainingRunArguments(\n", " run_name=get_next_run_name(\"Home-Llama-3.2-1B\"),\n", " base_model=\"meta-llama/Llama-3.2-1B-Instruct\",\n", " bf16=True,\n", " train_dataset=\"data/home_assistant_train.jsonl\",\n", " test_dataset=\"data/home_assistant_test.jsonl\",\n", " learning_rate=2e-5, learning_rate_warmup=0.03, \n", " batch_size=64, micro_batch_size=2, epochs=1,\n", " ctx_size=2048,\n", " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] }, { "cell_type": "markdown", "id": "e71572c4", "metadata": {}, "source": [ "# Qwen3 1.7b" ] }, { "cell_type": "code", "execution_count": null, "id": "fdf2b998", "metadata": {}, "outputs": [], "source": [ "# HYPERPARAMETERS ARE NOT TUNED YET\n", "# TODO: impelment no think in template\n", "do_training_run(TrainingRunArguments(\n", " run_name=get_next_run_name(\"Home-Qwen-3-1.7B\"),\n", " base_model=\"Qwen/Qwen3-1.7B\",\n", " bf16=True,\n", " train_dataset=\"data/home_assistant_train.jsonl\",\n", " test_dataset=\"data/home_assistant_test.jsonl\",\n", " learning_rate=2e-5, learning_rate_warmup=0.03, \n", " batch_size=64, micro_batch_size=2, epochs=1,\n", " ctx_size=2048,\n", " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] }, { "cell_type": "markdown", "id": "f2f49f10", "metadata": {}, "source": [ "# Qwen 2.5 0.6B & 1.5B" ] }, { "cell_type": "code", "execution_count": null, "id": "ca9e2dec", "metadata": {}, "outputs": [], "source": [ "# HYPERPARAMETERS ARE NOT TUNED YET\n", "do_training_run(TrainingRunArguments(\n", " run_name=get_next_run_name(\"Home-Qwen-2.5-0.6B\"),\n", " base_model=\"Qwen/Qwen2.5-0.6B-Instruct\",\n", " bf16=True,\n", " train_dataset=\"data/home_assistant_train.jsonl\",\n", " test_dataset=\"data/home_assistant_test.jsonl\",\n", " learning_rate=2e-5, learning_rate_warmup=0.03, \n", " batch_size=64, micro_batch_size=2, epochs=1,\n", " ctx_size=2048,\n", " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] }, { "cell_type": "code", "execution_count": null, "id": "6d875365", "metadata": {}, "outputs": [], "source": [ "# HYPERPARAMETERS ARE NOT TUNED YET\n", "do_training_run(TrainingRunArguments(\n", " run_name=get_next_run_name(\"Home-Qwen-2.5-1.5B\"),\n", " base_model=\"Qwen/Qwen2.5-1.5B-Instruct\",\n", " bf16=True,\n", " train_dataset=\"data/home_assistant_train.jsonl\",\n", " test_dataset=\"data/home_assistant_test.jsonl\",\n", " learning_rate=2e-5, learning_rate_warmup=0.03, \n", " batch_size=64, micro_batch_size=2, epochs=1,\n", " ctx_size=2048,\n", " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] }, { "cell_type": "markdown", "id": "21865d91", "metadata": {}, "source": [ "# Gemma 3 1B" ] }, { "cell_type": "code", "execution_count": null, "id": "27db01c0", "metadata": {}, "outputs": [], "source": [ "# HYPERPARAMETERS ARE NOT TUNED YET\n", "do_training_run(TrainingRunArguments(\n", " run_name=get_next_run_name(\"Home-Gemma-3-1B\"),\n", " base_model=\"google/gemma-3-1b-it\",\n", " bf16=True,\n", " train_dataset=\"data/home_assistant_train.jsonl\",\n", " test_dataset=\"data/home_assistant_test.jsonl\",\n", " learning_rate=2e-5, learning_rate_warmup=0.03, \n", " batch_size=64, micro_batch_size=2, epochs=1,\n", " ctx_size=2048,\n", " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }