mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-09 21:58:00 -05:00
add eval results
This commit is contained in:
@@ -139,6 +139,20 @@
|
|||||||
- 600: 0.9473684210526315
|
- 600: 0.9473684210526315
|
||||||
- 650: 0.9387651821862348
|
- 650: 0.9387651821862348
|
||||||
- Final: 0.9463562753036437
|
- Final: 0.9463562753036437
|
||||||
|
- german: 0.5758754863813229
|
||||||
|
- french: 0.6490034030140982
|
||||||
|
- spanish: 0.6481391976800387
|
||||||
|
|
||||||
|
# rev9
|
||||||
|
- full fine-tune
|
||||||
|
- epochs: 1
|
||||||
|
- batch size: 64
|
||||||
|
- dataset size: medium /w 4 languages
|
||||||
|
+ eval results:
|
||||||
|
- english: 0.9961183891314895
|
||||||
|
- german: 0.9571984435797666
|
||||||
|
- french: 0.9484686436558094
|
||||||
|
- spanish: 0.9685838569357177
|
||||||
|
|
||||||
|
|
||||||
## stablelm-2-1_6b-zephyr
|
## stablelm-2-1_6b-zephyr
|
||||||
|
|||||||
@@ -191,7 +191,7 @@ def load_model(model_name, is_lora, checkpoint_name):
|
|||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
|
parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
|
||||||
parser.add_argument("model")
|
parser.add_argument("model")
|
||||||
parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
|
parser.add_argument("--dataset-file", default="./data/home_assistant_test.jsonl")
|
||||||
parser.add_argument("--batch-size", default=8)
|
parser.add_argument("--batch-size", default=8)
|
||||||
parser.add_argument("--lora", default=False, action='store_const', const=True)
|
parser.add_argument("--lora", default=False, action='store_const', const=True)
|
||||||
parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True)
|
parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True)
|
||||||
@@ -205,6 +205,7 @@ def main():
|
|||||||
print(f"Got {len(dataset)} examples to test")
|
print(f"Got {len(dataset)} examples to test")
|
||||||
|
|
||||||
# filter out examples that are status requests
|
# filter out examples that are status requests
|
||||||
|
# TODO: instead of filtering out, validate that it doesn't produce a service call
|
||||||
if "text" in dataset:
|
if "text" in dataset:
|
||||||
dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
|
dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user