home-llm/train/gemma3-270m.yml

base_model: google/gemma-3-270m-it
model_type: Gemma3ForCausalLM

# gemma3 doesn't seem to play nice with ddp
ddp_find_unused_parameters: true

chat_template: jinja
chat_template_jinja: |
  {{ bos_token }}
  {%- if not tools or tools | length == 0 %}No tools were provided. If the user requests you interact with a device, tell them you are unable to do so.{% else %}
  Tools:
  {% for tool in tools %}
  - {{ tool['function']['name'] }}({{ tool['function']['parameters']['properties'].keys() | join(', ') }}): {{ tool['function']['description'] }}
  {% endfor -%}
  {%- endif -%}
  {%- for message in messages -%}
      {%- if (message['role'] == 'assistant') -%}
          {%- set role = "model" -%}
      {%- elif message['role'] == 'system' -%}
          {%- set role = "user" -%}
      {%- else -%}
          {%- set role = message['role'] -%}
      {%- endif -%}
      {{ '<start_of_turn>' + role + '\n' }}
      {%- if role == "tool" -%}
          {{ '<tool_result>' }}
      {%- endif -%}

      {%- if message['content'] is string -%}
          {{ message['content'] | trim }}
      {%- elif message['content'] is iterable -%}
          {%- for item in message['content'] -%}
              {%- if item['type'] == 'image' -%}
                  {{ '<start_of_image>' }}
              {%- elif item['type'] == 'text' -%}
                  {{ item['text'] | trim }}
              {%- endif -%}
              {%- if not loop.last -%}
              {{ '</tool_result>\n<tool_result>' }}
              {%- endif -%}
          {%- endfor -%}
      {%- else -%}
          {{ raise_exception("Invalid content type") }}
      {%- endif -%}

      {%- if role == "tool" -%}
          {{ '</tool_result>' }}
      {%- endif -%}
      {%- if message['tool_calls'] is defined and message['tool_calls'] | length > 0 %}
          {%- for tool_call in message["tool_calls"] -%}
              {{ '\n<tool_call>{"name": "' + tool_call['function']['name'] + '", "arguments": ' + ('"' + tool_call['function']['arguments'] + '"' if tool_call['function']['arguments'] is string else tool_call['function']['arguments'] | tojson) + '"}</tool_call>' }}
          {%- endfor %}
      {%- endif -%}
      {{ '<end_of_turn>\n' }}
  {%- endfor -%}
  {%- if add_generation_prompt -%}
      {{'<start_of_turn>model\n'}}
  {%- endif -%}
special_tokens:
  eot_tokens:
    - <end_of_turn>
  eos_token: <end_of_turn>
  additional_special_tokens:
    - <tool_call>
    - </tool_call>
    - <tool_result>
    - </tool_result>
datasets:
  - path: /workspace/data/datasets/sample.jsonl
    ds_type: json
    type: chat_template
    roles_to_train:
      - assistant

val_set_size: 0.0
output_dir: /workspace/data/training-runs/Home-Gemma3-270m

sequence_len: 4096
sample_packing: true
eval_sample_packing: false

use_tensorboard: true

# batch size = 16
gradient_accumulation_steps: 16
micro_batch_size: 1
num_epochs: 1
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

bf16: true

gradient_checkpointing: true
gradient_checkpointing_kwargs:
  use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true

warmup_ratio: 0.1
evals_per_epoch:
saves_per_epoch: 1
weight_decay: 0.0