mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-08 21:28:05 -05:00
221 lines
6.4 KiB
Plaintext
221 lines
6.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "469a9a97-0f6b-475f-8aef-a796c1c5244f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install -r requirements.txt\n",
|
|
"\n",
|
|
"import os, re\n",
|
|
"from train import TrainingRunArguments, do_training_run\n",
|
|
"\n",
|
|
"def get_next_run_name(model):\n",
|
|
" pattern = re.compile(model + r\"-rev(\\d+)$\")\n",
|
|
" max_rev = 0\n",
|
|
"\n",
|
|
" for folder in os.listdir(\"models/\"):\n",
|
|
" match = pattern.search(folder)\n",
|
|
" if match:\n",
|
|
" max_rev = max(max_rev, int(match.group(1)))\n",
|
|
"\n",
|
|
" return f\"{model}-rev{max_rev + 1}\"\n",
|
|
"\n",
|
|
"os.environ[\"HF_HOME\"] = \"/workspace/\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ed0807bf",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Generate Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "aaafce74",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install -r data/requirements.txt\n",
|
|
"from data.generate_home_assistant_data import main as generate_data\n",
|
|
"\n",
|
|
"generate_data([\"--train\", \"--test\", \"--large\", \"--sharegpt\", \"--language\", \"english\", \"german\", \"french\", \"spanish\", \"polish\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ff011772",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Llama 3.2 1B"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "48839ce2-1939-4d7f-817c-97b047bafd42",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# python3 train.py \\\n",
|
|
"# --run_name Home-Llama-3.2-1B-rev1 \\\n",
|
|
"# --base_model meta-llama/Llama-3.2-1B-Instruct \\\n",
|
|
"# --bf16 \\\n",
|
|
"# --train_dataset data/home_assistant_train.jsonl \\\n",
|
|
"# --test_dataset data/home_assistant_test.jsonl \\\n",
|
|
"# --learning_rate 2e-5 --learning_rate_warmup 0.03 --batch_size 64 --epochs 1 \\\n",
|
|
"# --micro_batch_size 2 \\\n",
|
|
"# --ctx_size 2048 \\\n",
|
|
"# --save_steps 200 --save_total_limit 1 --eval_steps 200 --logging_steps 2\n",
|
|
"\n",
|
|
"do_training_run(TrainingRunArguments(\n",
|
|
" run_name=get_next_run_name(\"Home-Llama-3.2-1B\"),\n",
|
|
" base_model=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
|
|
" bf16=True,\n",
|
|
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
|
|
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
|
|
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
|
|
" batch_size=64, micro_batch_size=2, epochs=1,\n",
|
|
" ctx_size=2048,\n",
|
|
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
|
|
"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e71572c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Qwen3 1.7b"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fdf2b998",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
|
|
"# TODO: impelment no think in template\n",
|
|
"do_training_run(TrainingRunArguments(\n",
|
|
" run_name=get_next_run_name(\"Home-Qwen-3-1.7B\"),\n",
|
|
" base_model=\"Qwen/Qwen3-1.7B\",\n",
|
|
" bf16=True,\n",
|
|
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
|
|
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
|
|
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
|
|
" batch_size=64, micro_batch_size=2, epochs=1,\n",
|
|
" ctx_size=2048,\n",
|
|
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
|
|
"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f2f49f10",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Qwen 2.5 0.6B & 1.5B"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ca9e2dec",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
|
|
"do_training_run(TrainingRunArguments(\n",
|
|
" run_name=get_next_run_name(\"Home-Qwen-2.5-0.6B\"),\n",
|
|
" base_model=\"Qwen/Qwen2.5-0.6B-Instruct\",\n",
|
|
" bf16=True,\n",
|
|
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
|
|
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
|
|
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
|
|
" batch_size=64, micro_batch_size=2, epochs=1,\n",
|
|
" ctx_size=2048,\n",
|
|
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
|
|
"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6d875365",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
|
|
"do_training_run(TrainingRunArguments(\n",
|
|
" run_name=get_next_run_name(\"Home-Qwen-2.5-1.5B\"),\n",
|
|
" base_model=\"Qwen/Qwen2.5-1.5B-Instruct\",\n",
|
|
" bf16=True,\n",
|
|
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
|
|
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
|
|
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
|
|
" batch_size=64, micro_batch_size=2, epochs=1,\n",
|
|
" ctx_size=2048,\n",
|
|
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
|
|
"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "21865d91",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Gemma 3 1B"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "27db01c0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# HYPERPARAMETERS ARE NOT TUNED YET\n",
|
|
"do_training_run(TrainingRunArguments(\n",
|
|
" run_name=get_next_run_name(\"Home-Gemma-3-1B\"),\n",
|
|
" base_model=\"google/gemma-3-1b-it\",\n",
|
|
" bf16=True,\n",
|
|
" train_dataset=\"data/home_assistant_train.jsonl\",\n",
|
|
" test_dataset=\"data/home_assistant_test.jsonl\",\n",
|
|
" learning_rate=2e-5, learning_rate_warmup=0.03, \n",
|
|
" batch_size=64, micro_batch_size=2, epochs=1,\n",
|
|
" ctx_size=2048,\n",
|
|
" save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n",
|
|
"))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|