diff --git a/train.ipynb b/train.ipynb index fb5f823..41e6d15 100644 --- a/train.ipynb +++ b/train.ipynb @@ -168,7 +168,7 @@ }, { "cell_type": "markdown", - "id": "21865d91", + "id": "a4297f4e", "metadata": {}, "source": [ "# Gemma 3 1B" @@ -194,6 +194,35 @@ " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", "))" ] + }, + { + "cell_type": "markdown", + "id": "21865d91", + "metadata": {}, + "source": [ + "# Gemma 3 270m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c6e8bea", + "metadata": {}, + "outputs": [], + "source": [ + "# HYPERPARAMETERS ARE NOT TUNED YET\n", + "do_training_run(TrainingRunArguments(\n", + " run_name=get_next_run_name(\"Home-Gemma-3-270m\"),\n", + " base_model=\"google/gemma-3-270m\",\n", + " bf16=True,\n", + " train_dataset=\"data/home_assistant_train.jsonl\",\n", + " test_dataset=\"data/home_assistant_test.jsonl\",\n", + " learning_rate=2e-5, learning_rate_warmup=0.03, \n", + " batch_size=64, micro_batch_size=2, epochs=1,\n", + " ctx_size=8192,\n", + " save_steps=200, save_total_limit=1, eval_steps=200, logging_steps=2,\n", + "))" + ] } ], "metadata": {