Files
home-llm/train.ipynb
2025-06-05 00:14:24 -04:00

221 lines
6.4 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "469a9a97-0f6b-475f-8aef-a796c1c5244f",
"metadata": {},
"outputs": [],
"source": [
"%pip install -r requirements.txt\n",
"\n",
"import os, re\n",
"from train import TrainingRunArguments, do_training_run\n",
"\n",
"def get_next_run_name(model):\n",
" pattern = re.compile(model + r\"-rev(\\d+)$\")\n",
" max_rev = 0\n",
"\n",
" for folder in os.listdir(\"models/\"):\n",
" match = pattern.search(folder)\n",
" if match:\n",
" max_rev = max(max_rev, int(match.group(1)))\n",
"\n",
" return f\"{model}-rev{max_rev + 1}\"\n",
"\n",
"os.environ[\"HF_HOME\"] = \"/workspace/\""
]
},
{
"cell_type": "markdown",
"id": "ed0807bf",
"metadata": {},
"source": [
"## Generate Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aaafce74",
"metadata": {},
"outputs": [],
"source": [
"%pip install -r data/requirements.txt\n",
"from data.generate_home_assistant_data import main as generate_data\n",
"\n",
"generate_data([\"--train\", \"--test\", \"--large\", \"--sharegpt\", \"--language\", \"english\", \"german\", \"french\", \"spanish\", \"polish\"])"
]
},
{
"cell_type": "markdown",
"id": "ff011772",
"metadata": {},
"source": [
"## Llama 3.2 1B"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48839ce2-1939-4d7f-817c-97b047bafd42",
"metadata": {},
"outputs": [],
"source": [
"# python3 train.py \\\n",
"# --run_name Home-Llama-3.2-1B-rev1 \\\n",
"# --base_model meta-llama/Llama-3.2-1B-Instruct \\\n",
"# --bf16 \\\n",
"# --train_dataset data/home_assistant_train.jsonl \\\n",
"# --test_dataset data/home_assistant_test.jsonl \\\n",
"# --learning_rate 2e-5 --learning_rate_warmup 0.03 --batch_size 64 --epochs 1 \\\n",
"# --micro_batch_size 2 \\\n",
"# --ctx_size 2048 \\\n",
"# --save_steps 200 --save_total_limit 1 --eval_steps 200 --logging_steps 2\n",
"\n",
"do_training_run(TrainingRunArguments(\n",
" run_name=get_next_run_name(\"Home-Llama-3.2-1B\"),\n",
" base_model=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
" bf16=True,\n",
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
" batch_size=64, micro_batch_size=2, epochs=1,\n",
" ctx_size=2048,\n",
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
"))"
]
},
{
"cell_type": "markdown",
"id": "e71572c4",
"metadata": {},
"source": [
"# Qwen3 1.7b"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdf2b998",
"metadata": {},
"outputs": [],
"source": [
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
"# TODO: impelment no think in template\n",
"do_training_run(TrainingRunArguments(\n",
" run_name=get_next_run_name(\"Home-Qwen-3-1.7B\"),\n",
" base_model=\"Qwen/Qwen3-1.7B\",\n",
" bf16=True,\n",
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
" batch_size=64, micro_batch_size=2, epochs=1,\n",
" ctx_size=2048,\n",
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
"))"
]
},
{
"cell_type": "markdown",
"id": "f2f49f10",
"metadata": {},
"source": [
"# Qwen 2.5 0.6B & 1.5B"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ca9e2dec",
"metadata": {},
"outputs": [],
"source": [
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
"do_training_run(TrainingRunArguments(\n",
" run_name=get_next_run_name(\"Home-Qwen-2.5-0.6B\"),\n",
" base_model=\"Qwen/Qwen2.5-0.6B-Instruct\",\n",
" bf16=True,\n",
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
" batch_size=64, micro_batch_size=2, epochs=1,\n",
" ctx_size=2048,\n",
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d875365",
"metadata": {},
"outputs": [],
"source": [
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
"do_training_run(TrainingRunArguments(\n",
" run_name=get_next_run_name(\"Home-Qwen-2.5-1.5B\"),\n",
" base_model=\"Qwen/Qwen2.5-1.5B-Instruct\",\n",
" bf16=True,\n",
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
" batch_size=64, micro_batch_size=2, epochs=1,\n",
" ctx_size=2048,\n",
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
"))"
]
},
{
"cell_type": "markdown",
"id": "21865d91",
"metadata": {},
"source": [
"# Gemma 3 1B"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27db01c0",
"metadata": {},
"outputs": [],
"source": [
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
"do_training_run(TrainingRunArguments(\n",
" run_name=get_next_run_name(\"Home-Gemma-3-1B\"),\n",
" base_model=\"google/gemma-3-1b-it\",\n",
" bf16=True,\n",
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
" batch_size=64, micro_batch_size=2, epochs=1,\n",
" ctx_size=2048,\n",
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}