add eval results

This commit is contained in:
Alex O'Connell
2024-04-18 22:23:19 -04:00
parent bdda97eb45
commit b186f7fe37
2 changed files with 16 additions and 1 deletions

View File

@@ -139,6 +139,20 @@
- 600: 0.9473684210526315
- 650: 0.9387651821862348
- Final: 0.9463562753036437
- german: 0.5758754863813229
- french: 0.6490034030140982
- spanish: 0.6481391976800387
# rev9
- full fine-tune
- epochs: 1
- batch size: 64
- dataset size: medium /w 4 languages
+ eval results:
- english: 0.9961183891314895
- german: 0.9571984435797666
- french: 0.9484686436558094
- spanish: 0.9685838569357177
## stablelm-2-1_6b-zephyr

View File

@@ -191,7 +191,7 @@ def load_model(model_name, is_lora, checkpoint_name):
def main():
parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
parser.add_argument("model")
parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
parser.add_argument("--dataset-file", default="./data/home_assistant_test.jsonl")
parser.add_argument("--batch-size", default=8)
parser.add_argument("--lora", default=False, action='store_const', const=True)
parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True)
@@ -205,6 +205,7 @@ def main():
print(f"Got {len(dataset)} examples to test")
# filter out examples that are status requests
# TODO: instead of filtering out, validate that it doesn't produce a service call
if "text" in dataset:
dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
else: