base_model: google/gemma-3-270m-it model_type: Gemma3ForCausalLM # gemma3 doesn't seem to play nice with ddp ddp_find_unused_parameters: true chat_template: jinja chat_template_jinja: | {{ bos_token }} {%- if not tools or tools | length == 0 %}No tools were provided. If the user requests you interact with a device, tell them you are unable to do so.{% else %} Tools: {% for tool in tools %} - {{ tool['function']['name'] }}({{ tool['function']['parameters']['properties'].keys() | join(', ') }}): {{ tool['function']['description'] }} {% endfor -%} {%- endif -%} {%- for message in messages -%} {%- if (message['role'] == 'assistant') -%} {%- set role = "model" -%} {%- elif message['role'] == 'system' -%} {%- set role = "user" -%} {%- else -%} {%- set role = message['role'] -%} {%- endif -%} {{ '' + role + '\n' }} {%- if role == "tool" -%} {{ '' }} {%- endif -%} {%- if message['content'] is string -%} {{ message['content'] | trim }} {%- elif message['content'] is iterable -%} {%- for item in message['content'] -%} {%- if item['type'] == 'image' -%} {{ '' }} {%- elif item['type'] == 'text' -%} {{ item['text'] | trim }} {%- endif -%} {%- if not loop.last -%} {{ '\n' }} {%- endif -%} {%- endfor -%} {%- else -%} {{ raise_exception("Invalid content type") }} {%- endif -%} {%- if role == "tool" -%} {{ '' }} {%- endif -%} {%- if message['tool_calls'] is defined and message['tool_calls'] | length > 0 %} {%- for tool_call in message["tool_calls"] -%} {{ '\n{"name": "' + tool_call['function']['name'] + '", "arguments": ' + ('"' + tool_call['function']['arguments'] + '"' if tool_call['function']['arguments'] is string else tool_call['function']['arguments'] | tojson) + '"}' }} {%- endfor %} {%- endif -%} {{ '\n' }} {%- endfor -%} {%- if add_generation_prompt -%} {{'model\n'}} {%- endif -%} special_tokens: eot_tokens: - eos_token: additional_special_tokens: - - - - datasets: - path: /workspace/data/datasets/sample.jsonl ds_type: json type: chat_template roles_to_train: - assistant val_set_size: 0.0 output_dir: /workspace/data/training-runs/Home-Gemma3-270m sequence_len: 4096 sample_packing: true eval_sample_packing: false use_tensorboard: true # batch size = 16 gradient_accumulation_steps: 16 micro_batch_size: 1 num_epochs: 1 optimizer: adamw_bnb_8bit lr_scheduler: cosine learning_rate: 0.0002 bf16: true gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false resume_from_checkpoint: logging_steps: 1 flash_attention: true warmup_ratio: 0.1 evals_per_epoch: saves_per_epoch: 1 weight_decay: 0.0