support many checkpoints in evaluate

This commit is contained in:
Alex O'Connell
2024-03-23 10:09:42 -04:00
parent d51c172c07
commit 0a6b41d5bc
3 changed files with 137 additions and 83 deletions

View File

@@ -138,4 +138,32 @@
- 550:
- 600: 0.9473684210526315
- 650: 0.9387651821862348
- Final: 0.9463562753036437
- Final: 0.9463562753036437
## stablelm-2-1_6b-zephyr
# rev3
- full fine tune
- epochs: 1
- 2048 train ctx
- batch size 32
- learning rate 1e-5
- weight decay 0.1
- gradient clipping 1.0
- dataset size: medium
+ evaluation results:
- 100: 0.35779352226720645
- 200: 0.5247975708502024
- 300: 0.5339068825910931
- 400: 0.6280364372469636
- 500: 0.6923076923076923
- 600: 0.7064777327935222
- 700: 0.7135627530364372
- 800: 0.7044534412955465
- 900: 0.707995951417004
- 1000: 0.718117408906882
- Final: 0.7145748987854251
# rev4
- dataset size: large

View File

@@ -10,23 +10,11 @@ from tqdm import tqdm
CTX_SIZE = 2048
"""
python3 evaluate.py stablehome-3b-rev7/checkpoint-50 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-100 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-150 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-200 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-250 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-300 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-350 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-400 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-450 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-500 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-550 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-600 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7/checkpoint-650 --batch-size 4 --lora && \
python3 evaluate.py stablehome-3b-rev7 --batch-size 4 --lora
python3 evaluate.py stablehome-1_6b-rev3 --batch-size 8 --all-checkpoints
python3 evaluate.py tinyhome-1b-rev1 --batch-size 8 --all-checkpoints
"""
# TODO: auto detect all the checkpoints to run
service_call_regex = re.compile(r"```homeassistant\n([\S \t\n]*?)```")
def tokenize(tokenizer, prompt):
return tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=CTX_SIZE)
@@ -38,72 +26,10 @@ def generate(model, tokenizer, prompts):
text = tokenizer.batch_decode(outputs)
return text
def main():
parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
parser.add_argument("model")
parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
parser.add_argument("--batch-size", default=8)
parser.add_argument("--lora", default=False,action='store_const', const=True)
args = parser.parse_args()
lora_folder = f"./loras/{args.model}"
model_folder = f"./models/{args.model}"
dataset = load_dataset("json", data_files={ "train": args.dataset_file })["train"]
print(f"Got {len(dataset)} examples to test")
# filter out examples that are status requests
if "text" in dataset:
dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
else:
dataset = dataset.filter(lambda example: "```homeassistant" in example["conversations"][2]["value"])
service_call_regex = re.compile(r"```homeassistant\n([\S \t\n]*?)```")
torch.set_default_device("cuda")
if args.lora:
adapter_config = PeftConfig.from_pretrained(lora_folder)
base_model_name = adapter_config.base_model_name_or_path
print(f"Loading lora from {lora_folder} ({base_model_name})...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
)
trained_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True, padding_side='left')
trained_model = PeftModel.from_pretrained(base_model, lora_folder, trust_remote_code=True, torch_dtype=torch.bfloat16)
output_folder = lora_folder
else:
print(f"Loading model from {model_folder}...")
trained_model = AutoModelForCausalLM.from_pretrained(
model_folder,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
)
trained_tokenizer = AutoTokenizer.from_pretrained(model_folder, trust_remote_code=True, padding_side='left')
output_folder = model_folder
trained_model.generation_config = GenerationConfig(
max_new_tokens=128,
use_cache=True,
do_sample=True,
temperature=0.1,
top_k=40,
top_p=1.0,
repetition_penalty=1.15,
eos_token_id=trained_model.config.eos_token_id,
pad_token_id=trained_model.config.pad_token_id if trained_model.config.pad_token_id else trained_model.config.eos_token_id,
)
def evaluate(output_folder, trained_model, trained_tokenizer, dataset, batch_size):
split = trained_tokenizer.apply_chat_template(conversation=[{"role": "assistant", "content": r"%%%%%%%%%%%%%%%%"}], tokenize=False).split( r"%%%%%%%%%%%%%%%%")[0]
print("Evaluating...")
batch_size = int(args.batch_size)
correct_answers = 0
total_answers = 0
color_mismatches = 0
@@ -193,6 +119,94 @@ def main():
"failed_examples": failed_examples,
}, f, indent=4)
def load_model(model_name, is_lora, checkpoint_name):
lora_folder = f"./loras/{model_name}/"
model_folder = f"./models/{model_name}/"
# tokenizer isn't saved into checkpoint folders
tokenizer_folder = lora_folder if is_lora else model_folder
if checkpoint_name:
lora_folder = lora_folder + f"{checkpoint_name}/"
model_folder = model_folder + f"{checkpoint_name}/"
if is_lora:
adapter_config = PeftConfig.from_pretrained(lora_folder)
base_model_name = adapter_config.base_model_name_or_path
print(f"Loading lora from {lora_folder} ({base_model_name})...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
)
trained_model = PeftModel.from_pretrained(base_model, lora_folder, trust_remote_code=True, torch_dtype=torch.bfloat16)
output_folder = lora_folder
else:
print(f"Loading model from {model_folder}...")
trained_model = AutoModelForCausalLM.from_pretrained(
model_folder,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
)
trained_tokenizer = AutoTokenizer.from_pretrained(model_folder, trust_remote_code=True, padding_side='left')
output_folder = model_folder
trained_tokenizer = AutoTokenizer.from_pretrained(tokenizer_folder, trust_remote_code=True, padding_side='left')
trained_model.generation_config = GenerationConfig(
max_new_tokens=128,
use_cache=True,
do_sample=True,
temperature=0.1,
top_k=40,
top_p=1.0,
repetition_penalty=1.15,
eos_token_id=trained_model.config.eos_token_id,
pad_token_id=trained_model.config.pad_token_id if trained_model.config.pad_token_id else trained_model.config.eos_token_id,
)
return trained_model, trained_tokenizer, output_folder
def main():
parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
parser.add_argument("model")
parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
parser.add_argument("--batch-size", default=8)
parser.add_argument("--lora", default=False, action='store_const', const=True)
parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True)
args = parser.parse_args()
batch_size = int(args.batch_size)
dataset = load_dataset("json", data_files={ "train": args.dataset_file })["train"]
print(f"Got {len(dataset)} examples to test")
# filter out examples that are status requests
if "text" in dataset:
dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
else:
dataset = dataset.filter(lambda example: "```homeassistant" in example["conversations"][2]["value"])
torch.set_default_device("cuda")
if not args.all_checkpoints:
checkpoints = [None]
else:
if args.lora:
ckpt_folder = f"./loras/{args.model}"
else:
ckpt_folder = f"./models/{args.model}"
checkpoints = [x for x in os.listdir(ckpt_folder) if os.path.isdir(os.path.join(ckpt_folder, x)) and "checkpoint" in x]
checkpoints.append(None)
print(f"Found {len(checkpoints) - 1} checkpoints to test (plus the final model)")
for ckpt in checkpoints:
trained_model, trained_tokenizer, output_folder = load_model(args.model, args.lora, ckpt)
evaluate(output_folder, trained_model, trained_tokenizer, dataset, batch_size)
if __name__ == "__main__":
main()

View File

@@ -55,14 +55,14 @@ python3 train.py \
"""
python3 train.py \
--run_name stablehome-1_6b-rev2 \
--run_name stablehome-1_6b-rev3 \
--base_model stabilityai/stablelm-2-zephyr-1_6b \
--bf16 \
--train_dataset data/home_assistant_train.jsonl \
--test_dataset data/home_assistant_test.jsonl \
--learning_rate 1e-5 \
--micro_batch_size 2 --gradient_checkpointing \
--ctx_size 2048 --save_steps 200 --save_total_limit 6
--learning_rate 1e-5 --batch_size 32 \
--micro_batch_size 2 --gradient_checkpointing --group_by_length \
--ctx_size 2048 --save_steps 100 --save_total_limit 20
"""
"""
@@ -89,6 +89,18 @@ python3 train.py \
--add_pad_token --bf16 --micro_batch_size 4 --learning_rate 2e-5
"""
"""
python3 train.py \
--run_name tinyhome-rev1 \
--base_model TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
--bf16 \
--train_dataset data/home_assistant_train.jsonl \
--test_dataset data/home_assistant_test.jsonl \
--learning_rate 5e-7 --batch_size 32 \
--micro_batch_size 2 --gradient_checkpointing --group_by_length \
--ctx_size 2048 --save_steps 100 --save_total_limit 10
"""
@dataclass
class TrainingRunArguments:
run_name: str = field(metadata={"help": "The folder to save the output model under"})