From e1284324cdacbdddf0172d18bafc82357ce62bd3 Mon Sep 17 00:00:00 2001
From: Witold Gren <witold.gren@gmail.com>
Date: Sat, 17 Aug 2024 02:01:23 +0200
Subject: [PATCH] fix typo in script arguments (#205)

---
 docs/Training.md | 24 +++++++++++++++++++++++-
 train.py         | 42 ++++++++++++++++++++++++------------------
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/docs/Training.md b/docs/Training.md
index e5e9bf2..a9dca60 100644
--- a/docs/Training.md
+++ b/docs/Training.md
@@ -325,7 +325,23 @@ tokens with leading whitespace: [220, 128009, 128006, 78191, 128007, 271]
 tokens with leading newline: [198, 128009, 128006, 78191, 128007, 271]
 ```
 
-after that you shuld add extra parameters to you script for properly set those params. This is Supervised Fine Tuning params for correctly hide requests for the model. Few examples you can find below:
+after that you shuld add extra parameters to you script for properly set those params. This is Supervised Fine Tuning params for correctly hide requests for the model. You probably see several examples of tokens which script give you, it is worth looking for the indicated tokens in your log and checking which value will be correct (You should be able to find them). In case above the correct values is:
+
+```
+prefix_ids =  [128006, 78191, 128007, 271]
+suffix_ids = [128009, 128006, 78191, 128007, 271]
+```
+
+to run script you should use this params:
+
+```
+python3 train.py \
+    ...
+    --prefix_ids 128006,78191,128007,271 \
+    --suffix_ids 128009,128006,78191,128007,271
+```
+
+Few other examples you can find below:
 
 tinyllama:
 ```console
@@ -350,3 +366,9 @@ python3 train.py \
     --prefix_ids 43883,20255,13 \
     --suffix_ids 43882,29871,13
 ```
+
+## Worth reading:
+
+* [Fine-tune Llama 3.1 Ultra-Efficiently with Unsloth](https://mlabonne.github.io/blog/posts/2024-07-29_Finetune_Llama31.html)
+* [Fine-tune Mistral-7b with Direct Preference Optimization](https://mlabonne.github.io/blog/posts/Fine_tune_Mistral_7b_with_DPO.html)
+* [Quantize Llama models with GGUF and llama.cpp](https://mlabonne.github.io/blog/posts/Quantize_Llama_2_models_using_ggml.html)
\ No newline at end of file
diff --git a/train.py b/train.py
index 4847c36..ac4921b 100644
--- a/train.py
+++ b/train.py
@@ -53,10 +53,10 @@ class TrainingRunArguments:
     
     # lora config
     use_lora: bool = field(default=False, metadata={"help": "If set, then the trained model will be a LoRA"})
-    lora_rank: int = field(default=4)
-    lora_alpha: int = field(default=32)
+    lora_rank: int = field(default=4, metadata={"help": "Rank which determines LoRA matrix size. Rank typically starts at 8 but can go up to 256. Higher ranks can store more information but increase the computational and memory cost of LoRA."})
+    lora_alpha: int = field(default=32, metadata={"help": "Alpha a scaling factor for updates. Alpha directly impacts the adapters contribution and is often set to 1x or 2x the rank value."})
     lora_dropout: float = field(default=0.05)
-    lora_modules: str = field(default=None)
+    lora_modules: str = field(default=None, metadata={"help": "Target modules: LoRA can be applied to various model components, including attention mechanisms (Q, K, V matrices), output projections, feed-forward blocks, and linear output layers. While initially focused on attention mechanisms, extending LoRA to other components has shown benefits. However, adapting more modules increases the number of trainable parameters and memory needs."})
     lora_modules_to_save: str = field(default=None, metadata={"help": "Additional modules to save"})
     lora_merge: bool = field(default=False, metadata={"help": "If set, the Lora will be merged back into the base model an saved"})
 
@@ -72,8 +72,8 @@ class TrainingRunArguments:
     sync_to_bucket: str = field(default=None, metadata={"help": "If set, checkpoints will be synced to the s3 bucket specified by this argument"})
     flops_baseline: str = field(default=None, metadata={"help": "The baseline flops for the GPUs used for the training run. Outputs MFU"})
 
-    prefix_ids = str = field(default=None, metadata={"help": "Determine the prefix tokens that surround the response from the assistant for SFT if model can not correctly recognise response."})
-    suffix_ids = str = field(default=None, metadata={"help": "Determine the suffix tokens that surround the response from the assistant for SFT if model can not correctly recognise response."})
+    prefix_ids:str = field(default=None, metadata={"help": "Determine the prefix tokens that surround the response from the assistant for SFT if model can not correctly recognise response."})
+    suffix_ids:str = field(default=None, metadata={"help": "Determine the suffix tokens that surround the response from the assistant for SFT if model can not correctly recognise response."})
 
 
 class UploadToS3Callback(TrainerCallback):
@@ -307,7 +307,9 @@ class DataCollatorForSupervisedFineTuning(object):
         
         self.tokenizer = tokenizer
         if not prefix_ids and not suffix_ids:
-            assistant_prompt = tokenizer.apply_chat_template(conversation=[{"role": "assistant", "content":  r"%%%%%%%%%%%%%%%%"}], tokenize=False).split( r"%%%%%%%%%%%%%%%%")
+            assistant_prompt = tokenizer.apply_chat_template(
+                conversation=[{"role": "assistant", "content":  r"%%%%%%%%%%%%%%%%"}], 
+                tokenize=False).split( r"%%%%%%%%%%%%%%%%")
             self.response_prefix = assistant_prompt[0]
             self.response_suffix = assistant_prompt[1]
 
@@ -422,11 +424,13 @@ def tokenize_sharegpt_example(batch):
     result = []
     for example in batch["conversations"]:
         conversation = [ { "role": x["from"], "content": x["value"] }  for x in example ]
-        result.append(tokenizer.apply_chat_template(
-            conversation=conversation,
-            max_length=training_run_args.ctx_size,
-            truncation=True,
-        ))
+        result.append(
+            tokenizer.apply_chat_template(
+                conversation=conversation,
+                max_length=training_run_args.ctx_size,
+                truncation=True,
+            )
+        )
 
     return {"input_ids": result}
 
@@ -438,13 +442,15 @@ def template_dpo_example(batch):
             { "role": "system", "content": example[0] },
             { "role": "user", "content": example[1] },
         ]
-        result.append(tokenizer.apply_chat_template(
-            conversation=conversation,
-            max_length=training_run_args.ctx_size,
-            truncation=True,
-            tokenize=False,
-            add_generation_prompt=True
-        ))
+        result.append(
+            tokenizer.apply_chat_template(
+                conversation=conversation,
+                max_length=training_run_args.ctx_size,
+                truncation=True,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+        )
 
     return {"prompt": result}