From b186f7fe37a5563b91771dbe6abff2238e645af8 Mon Sep 17 00:00:00 2001 From: Alex O'Connell Date: Thu, 18 Apr 2024 22:23:19 -0400 Subject: [PATCH] add eval results --- docs/experiment-notes-stablelm.md | 14 ++++++++++++++ evaluate.py | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/experiment-notes-stablelm.md b/docs/experiment-notes-stablelm.md index f790d55..d5a3dd6 100644 --- a/docs/experiment-notes-stablelm.md +++ b/docs/experiment-notes-stablelm.md @@ -139,6 +139,20 @@ - 600: 0.9473684210526315 - 650: 0.9387651821862348 - Final: 0.9463562753036437 + - german: 0.5758754863813229 + - french: 0.6490034030140982 + - spanish: 0.6481391976800387 + +# rev9 +- full fine-tune +- epochs: 1 +- batch size: 64 +- dataset size: medium /w 4 languages ++ eval results: + - english: 0.9961183891314895 + - german: 0.9571984435797666 + - french: 0.9484686436558094 + - spanish: 0.9685838569357177 ## stablelm-2-1_6b-zephyr diff --git a/evaluate.py b/evaluate.py index 6fcdd3a..288511f 100644 --- a/evaluate.py +++ b/evaluate.py @@ -191,7 +191,7 @@ def load_model(model_name, is_lora, checkpoint_name): def main(): parser = argparse.ArgumentParser(description="Evaluate the function calling for a model") parser.add_argument("model") - parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl") + parser.add_argument("--dataset-file", default="./data/home_assistant_test.jsonl") parser.add_argument("--batch-size", default=8) parser.add_argument("--lora", default=False, action='store_const', const=True) parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True) @@ -205,6 +205,7 @@ def main(): print(f"Got {len(dataset)} examples to test") # filter out examples that are status requests + # TODO: instead of filtering out, validate that it doesn't produce a service call if "text" in dataset: dataset = dataset.filter(lambda example: "```homeassistant" in example["text"]) else: