mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-08 21:28:05 -05:00
lets try functiongemma instead
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
{%- if not tools or tools | length == 0 %}No tools were provided. If the user requests you interact with a device, tell them you are unable to do so.{% else %}
|
||||
Tools:
|
||||
{% for tool in tools %}
|
||||
- {{ tool['name'] }}({{ tool['parameters']['properties'].keys() | join(', ') }}): {{ tool['description'] }}
|
||||
- {{ tool['function']['name'] }}({{ tool['function']['parameters']['properties'].keys() | join(', ') }}): {{ tool['function']['description'] }}
|
||||
{% endfor -%}
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
@@ -13,8 +13,7 @@ Tools:
|
||||
{%- else -%}
|
||||
{%- set role = message['role'] -%}
|
||||
{%- endif -%}
|
||||
{{ '<start_of_turn>' + role + '
|
||||
' }}
|
||||
{{ '<start_of_turn>' + role + '\n' }}
|
||||
{%- if role == "tool" -%}
|
||||
{{ '<tool_result>' }}
|
||||
{%- endif -%}
|
||||
@@ -41,13 +40,7 @@ Tools:
|
||||
{%- endif -%}
|
||||
{%- if message['tool_calls'] is defined and message['tool_calls'] | length > 0 %}
|
||||
{%- for tool_call in message["tool_calls"] -%}
|
||||
{{ '\n<tool_call>{"name": "' + tool_call['name'] + '", "arguments": ' + ('"' + tool_call['arguments'] + '"' if tool_call['arguments'] is string else tool_call['arguments'] | tojson) + '"}</tool_call>' }}
|
||||
{{ '\n<tool_call>{"name": "' + tool_call['function']['name'] + '", "arguments": ' + ('"' + tool_call['function']['arguments'] + '"' if tool_call['function']['arguments'] is string else tool_call['function']['arguments'] | tojson) + '"}</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif -%}
|
||||
{{ '<end_of_turn>
|
||||
' }}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{'<start_of_turn>model
|
||||
'}}
|
||||
{%- endif -%}
|
||||
{{ '<end_of_turn>\n' }}
|
||||
43
train/functiongemma-270m.yml
Normal file
43
train/functiongemma-270m.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
base_model: google/functiongemma-270m-it
|
||||
model_type: Gemma3ForCausalLM
|
||||
|
||||
# gemma3 doesn't seem to play nice with ddp
|
||||
ddp_find_unused_parameters: true
|
||||
|
||||
datasets:
|
||||
- path: /workspace/data/datasets/sample.jsonl
|
||||
ds_type: json
|
||||
type: chat_template
|
||||
roles_to_train:
|
||||
- assistant
|
||||
|
||||
val_set_size: 0.0
|
||||
output_dir: /workspace/data/training-runs/Home-Gemma3-270m
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
eval_sample_packing: false
|
||||
|
||||
use_tensorboard: true
|
||||
|
||||
# batch size = 16
|
||||
gradient_accumulation_steps: 16
|
||||
micro_batch_size: 1
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
bf16: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch:
|
||||
saves_per_epoch: 1
|
||||
weight_decay: 0.0
|
||||
@@ -10,7 +10,7 @@ chat_template_jinja: |
|
||||
{%- if not tools or tools | length == 0 %}No tools were provided. If the user requests you interact with a device, tell them you are unable to do so.{% else %}
|
||||
Tools:
|
||||
{% for tool in tools %}
|
||||
- {{ tool['name'] }}({{ tool['parameters']['properties'].keys() | join(', ') }}): {{ tool['description'] }}
|
||||
- {{ tool['function']['name'] }}({{ tool['function']['parameters']['properties'].keys() | join(', ') }}): {{ tool['function']['description'] }}
|
||||
{% endfor -%}
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
@@ -21,8 +21,7 @@ chat_template_jinja: |
|
||||
{%- else -%}
|
||||
{%- set role = message['role'] -%}
|
||||
{%- endif -%}
|
||||
{{ '<start_of_turn>' + role + '
|
||||
' }}
|
||||
{{ '<start_of_turn>' + role + '\n' }}
|
||||
{%- if role == "tool" -%}
|
||||
{{ '<tool_result>' }}
|
||||
{%- endif -%}
|
||||
@@ -49,15 +48,13 @@ chat_template_jinja: |
|
||||
{%- endif -%}
|
||||
{%- if message['tool_calls'] is defined and message['tool_calls'] | length > 0 %}
|
||||
{%- for tool_call in message["tool_calls"] -%}
|
||||
{{ '\n<tool_call>{"name": "' + tool_call['name'] + '", "arguments": ' + ('"' + tool_call['arguments'] + '"' if tool_call['arguments'] is string else tool_call['arguments'] | tojson) + '"}</tool_call>' }}
|
||||
{{ '\n<tool_call>{"name": "' + tool_call['function']['name'] + '", "arguments": ' + ('"' + tool_call['function']['arguments'] + '"' if tool_call['function']['arguments'] is string else tool_call['function']['arguments'] | tojson) + '"}</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif -%}
|
||||
{{ '<end_of_turn>
|
||||
' }}
|
||||
{{ '<end_of_turn>\n' }}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{'<start_of_turn>model
|
||||
'}}
|
||||
{{'<start_of_turn>model\n'}}
|
||||
{%- endif -%}
|
||||
special_tokens:
|
||||
eot_tokens:
|
||||
|
||||
Reference in New Issue
Block a user