fix dataset generation

This commit is contained in:
Alex O'Connell
2024-02-05 21:05:28 -05:00
parent cc2c21cab5
commit 3bf674ae29
3 changed files with 71 additions and 39 deletions

View File

@@ -274,7 +274,9 @@ def get_included_vars(response: str):
pile_of_responses["contains_vars"] = pile_of_responses["response"].apply(get_included_vars)
def get_random_response(*, service: str, language: str, persona: str, required_vars: list[str], short: bool) -> str:
def get_random_response(*, service: str, language: str, persona: str, question_template: str, short: bool) -> str:
required_vars = list(set([var for var in var_pattern.findall(question_template) if "device_name" not in var]))
possible_results = pile_of_responses.loc[(pile_of_responses['service']==service) &
(pile_of_responses['language']==language) &
@@ -284,7 +286,7 @@ def get_random_response(*, service: str, language: str, persona: str, required_v
]
if len(possible_results) == 0:
raise Exception(f"No responses matched the provided filters: {service}, {language}, {persona}, {required_vars}, {short}")
raise Exception(f"No responses matched the provided filters: {service}, {language}, {persona}, {question_template}, {short}")
return possible_results.sample()["response"].values[0]
@@ -355,10 +357,10 @@ def random_device_list(max_devices: int, avoid_device_names: list[str]):
return device_lines, list(device_types), list(extra_exposed_attributes)
def generate_static_example(action: dict, max_devices: int = 32):
def generate_static_example(action: dict, language: str, persona: str, max_devices: int = 32):
question = action["english_phrase"]
device_type = service_name.split(".")[0]
service_name = action["service_name"]
device_type = service_name.split(".")[0]
target_device = f"{device_type}.{action['device_name']}"
friendly_name = target_device.split(".")[1].replace("_", " ").title()
@@ -384,10 +386,12 @@ def generate_static_example(action: dict, max_devices: int = 32):
service=action["service_name"],
language="en",
persona="assistant",
required_vars=[],
question_template="",
short=False
).lower()
response = response.replace("<device_name>", friendly_name)
return {
"states": device_list,
"available_services": list(available_services),
@@ -396,7 +400,7 @@ def generate_static_example(action: dict, max_devices: int = 32):
"service_calls": [ { "service": service_name, "target_device": target_device } ]
}
def generate_templated_example(template: dict, max_devices: int = 32):
def generate_templated_example(template: dict, language: str, persona: str, max_devices: int = 32):
template_device_types: list[str] = template["device_type"].split("|")
service_names: list[str] = [ f"{x}.{y}" for x, y in zip(template_device_types, template["service"].split("|")) ]
question_template: str = template["english_phrase"]
@@ -423,6 +427,8 @@ def generate_templated_example(template: dict, max_devices: int = 32):
extra_exposed_attributes.append("temperature")
if "<humidity>" in question_template and "humidity" not in extra_exposed_attributes:
extra_exposed_attributes.append("humidity")
if "<fan_mode>" in question_template and "fan_mode" not in extra_exposed_attributes:
extra_exposed_attributes.append("fan_mode")
state = SUPPORTED_DEVICES[device_dict["type"]].get_random_state(extra_exposed_attributes=extra_exposed_attributes)
device_name = device_dict["device_name"]
@@ -442,19 +448,32 @@ def generate_templated_example(template: dict, max_devices: int = 32):
# pick an appropriate response and generate the question
if len(template_device_types) == 1:
# TODO: pick correct resonse here (also probaly need to pass in language and persona)
answer_template: str = get_random_response(
service=service_name
answer_template = get_random_response(
service=service_names[0],
language=language,
persona=persona,
question_template=question_template,
short=False
)
question = question_template.replace("<device_name>", chosen_devices[0]["description"])
answer = answer_template.replace("<device_name>", chosen_devices[0]["description"])
else:
# TODO: pick correct resonse here (also probaly need to pass in language and persona)
else:
question = question_template
answer = answer_template
answers = []
for i in range(len(template_device_types)):
question = question.replace(f"<device_name{(i + 1)}>", chosen_devices[i]["description"])
answer = answer.replace(f"<device_name{(i + 1)}>", chosen_devices[i]["description"])
answer = get_random_response(
service=service_names[i],
language=language,
persona=persona,
question_template=question_template,
short=True
)
answers.append(answer.replace(f"<device_name>", chosen_devices[i]["description"]))
# TODO: support different "and" words per language
answer = " and ".join(answers)
# generate the list of service calls and answers
service_calls = []
@@ -462,6 +481,18 @@ def generate_templated_example(template: dict, max_devices: int = 32):
service_calls.append({ "service": service, "target_device": device_dict["device_name"] })
if any(["climate" in service for service in service_names ]):
if "<hvac_mode>" in question:
hvac_mode = random.choice(["heat", "cool", "heat_cool", "off", "auto", "fan_only"])
question = question.replace("<hvac_mode>", hvac_mode)
answer = answer.replace("<hvac_mode>", hvac_mode)
service_calls = [ { **call, "hvac_mode": hvac_mode} for call in service_calls ]
if "<fan_mode>" in question:
fan_mode = random.choice(["On Low", "On High", "Auto Low", "Auto High", "Off"])
question = question.replace("<fan_mode>", fan_mode)
answer = answer.replace("<fan_mode>", fan_mode)
service_calls = [ { **call, "fan_mode": fan_mode} for call in service_calls ]
if "<temp_f>" in question:
temp_f = random.randint(60, 80)
question = question.replace("<temp_f>", str(temp_f))
@@ -505,7 +536,7 @@ def generate_templated_example(template: dict, max_devices: int = 32):
"service_calls": service_calls
}
def generate_status_request(template: dict, max_devices: int = 32):
def generate_status_request(template: dict, language: str, persona: str, max_devices: int = 32):
device_type: str = template["device_type"]
state_name: str = template["state"]
question_template: str = template["english_phrase"]
@@ -630,7 +661,7 @@ def format_example_sharegpt(example):
return { "conversations": conversation }
def generate_example_file(filename: str, seed: int, format_func: Callable, *, static_factor: int, template_factor: int, status_request_factor: int):
def generate_example_file(filename: str, seed: int, format_func: Callable, language: str, persona: str, *, static_factor: int, template_factor: int, status_request_factor: int):
random.seed(seed)
np.random.seed(seed)
@@ -639,10 +670,10 @@ def generate_example_file(filename: str, seed: int, format_func: Callable, *, st
def run_factor_times(func, examples, data, factor):
if factor >= 1:
for i in range(factor):
examples.append(format_func(func(data)))
examples.append(format_func(func(data, language, persona)))
else:
if random.random() < factor:
examples.append(format_func(func(data)))
examples.append(format_func(func(data, language, persona)))
generated_examples = []
for action in tqdm(pile_of_specific_actions):
@@ -731,6 +762,9 @@ def main():
args = parser.parse_args()
language = "en"
persona = "assistant"
if not args.sample and not args.train and not args.test and not args.merge:
parser.print_usage()
@@ -740,20 +774,20 @@ def main():
format_func = format_example_sharegpt
if args.sample:
generate_example_file("sample", 42, format_func, static_factor=1, template_factor=1, status_request_factor=1)
generate_example_file("sample", 42, format_func, language, persona, static_factor=1, template_factor=1, status_request_factor=1)
if args.train:
if args.size == "small":
generate_example_file("home_assistant_train", 42, format_func, static_factor=1, template_factor=10, status_request_factor=8)
generate_example_file("home_assistant_train", 42, format_func, language, persona, static_factor=1, template_factor=10, status_request_factor=8)
elif args.size == "medium":
generate_example_file("home_assistant_train", 42, format_func, static_factor=5, template_factor=15, status_request_factor=12)
generate_example_file("home_assistant_train", 42, format_func, language, persona, static_factor=5, template_factor=15, status_request_factor=12)
elif args.size == "large":
generate_example_file("home_assistant_train", 42, format_func, static_factor=5, template_factor=20, status_request_factor=15)
generate_example_file("home_assistant_train", 42, format_func, language, persona, static_factor=5, template_factor=20, status_request_factor=15)
elif args.size == "xl":
generate_example_file("home_assistant_train", 42, format_func, static_factor=7, template_factor=25, status_request_factor=18)
generate_example_file("home_assistant_train", 42, format_func, language, persona, static_factor=7, template_factor=25, status_request_factor=18)
else:
raise Exception(f"Unrecognized dataset size: {args.size}")
if args.test:
generate_example_file("home_assistant_test", 12345, format_func, static_factor=0.25, template_factor=3, status_request_factor=2)
generate_example_file("home_assistant_test", 12345, format_func, language, persona, static_factor=0.25, template_factor=3, status_request_factor=2)
if args.merge == "alpaca":
merge_with_dataset("yahma/alpaca-cleaned", 42, "alpaca", format_alpaca, ["input", "output", "instruction"], format_func)

View File

@@ -29,12 +29,12 @@ blinds.toggle,Toggling <device_name>,en,assistant,0
climate.set_humidity,Increasing humidity to <humidity>.,en,assistant,0
climate.set_humidity,Setting humidity to <humidity> percent.,en,assistant,0
climate.set_humidity,Adjusting humidity to <humidity>%.,en,assistant,0
climate.set_fan_mode,Setting the fan to high speed.,en,assistant,0
climate.set_fan_mode,Putting the fan on low.,en,assistant,0
climate.set_fan_mode,Changing the fan to medium setting.,en,assistant,0
climate.set_hvac_mode,Switching to cooling mode.,en,assistant,0
climate.set_hvac_mode,Setting the HVAC to heat.,en,assistant,0
climate.set_hvac_mode,Changing HVAC to automatic mode.,en,assistant,0
climate.set_fan_mode,Setting the fan to <fan_mode> speed.,en,assistant,0
climate.set_fan_mode,Putting the fan on <fan_mode>.,en,assistant,0
climate.set_fan_mode,Changing the fan to <fan_mode> setting.,en,assistant,0
climate.set_hvac_mode,Switching to <hvac_mode> mode.,en,assistant,0
climate.set_hvac_mode,Setting the HVAC to <hvac_mode>.,en,assistant,0
climate.set_hvac_mode,Changing HVAC to <hvac_mode> mode.,en,assistant,0
climate.set_temperature,Setting temperature to <temp_f> degrees.,en,assistant,0
climate.set_temperature,Changing temperature to <temp_c> Celsius.,en,assistant,0
climate.set_temperature,Setting the room to <temp_f> degrees Fahrenheit.,en,assistant,0
1 service response language persona short
29 climate.set_humidity Increasing humidity to <humidity>. en assistant 0
30 climate.set_humidity Setting humidity to <humidity> percent. en assistant 0
31 climate.set_humidity Adjusting humidity to <humidity>%. en assistant 0
32 climate.set_fan_mode Setting the fan to high speed. Setting the fan to <fan_mode> speed. en assistant 0
33 climate.set_fan_mode Putting the fan on low. Putting the fan on <fan_mode>. en assistant 0
34 climate.set_fan_mode Changing the fan to medium setting. Changing the fan to <fan_mode> setting. en assistant 0
35 climate.set_hvac_mode Switching to cooling mode. Switching to <hvac_mode> mode. en assistant 0
36 climate.set_hvac_mode Setting the HVAC to heat. Setting the HVAC to <hvac_mode>. en assistant 0
37 climate.set_hvac_mode Changing HVAC to automatic mode. Changing HVAC to <hvac_mode> mode. en assistant 0
38 climate.set_temperature Setting temperature to <temp_f> degrees. en assistant 0
39 climate.set_temperature Changing temperature to <temp_c> Celsius. en assistant 0
40 climate.set_temperature Setting the room to <temp_f> degrees Fahrenheit. en assistant 0

View File

@@ -179,19 +179,17 @@ climate,set_temperature,"Set the temperature to <temp_f> degrees.",8
climate,set_temperature,"Can you change the temperature to <temp_c> Celsius?",8
climate,set_temperature,"I'd like the room at <temp_f> degrees Fahrenheit,8
climate,set_temperature,"Please adjust the temperature to <temp_f> degrees.",8
climate,set_temperature,"I want the room cooler,8
climate,set_temperature,"Make it warmer,8
climate,set_temperature,"Can you lower the temperature to <temp_c>?",8
climate,set_temperature,"Raise the temperature to <temp_f> degrees,8
climate,set_temperature,"Raise the temperature to <temp_f> degrees",8
climate,set_humidity,"Increase the humidity to <humidity>.",8
climate,set_humidity,"Set the humidity level to <humidity> percent.",8
climate,set_humidity,"Can you adjust the humidity to <humidity> percent?",8
climate,set_fan_mode,"Set the fan to high speed.",8
climate,set_fan_mode,"Please put the fan on low.",8
climate,set_fan_mode,"Change the fan setting to medium.",8
climate,set_hvac_mode,"Switch the system to cooling mode.",8
climate,set_hvac_mode,"Can we set the HVAC to heat?",8
climate,set_hvac_mode,"Change the HVAC to automatic.",8
climate,set_fan_mode,"Set the climate fan to <fan_mode> speed.",8
climate,set_fan_mode,"Please put the climate fan on <fan_mode>.",8
climate,set_fan_mode,"Change the air conditioning fan to <fan_mode>.",8
climate,set_hvac_mode,"Switch the system to <hvac_mode> mode.",8
climate,set_hvac_mode,"Can we set the HVAC to <hvac_mode>?",8
climate,set_hvac_mode,"Change the HVAC to <hvac_mode>.",8
light,turn_on,"Set the brightness of <device_name> to <brightness>%.",8
light,turn_on,"Dim <device_name> to <brightness> percent brightness.",8
light,turn_on,"Brighten <device_name> to <brightness>.",8
@@ -199,12 +197,12 @@ light,turn_on,"Adjust <device_name> brightness to <brightness>.",8
light,turn_on,"Increase <device_name>'s brightness to <brightness>.",8
light,turn_on,"Lower the brightness of <device_name> to <brightness>.",8
light,turn_on,"Can you set <device_name>'s brightness level to <brightness> percent?",8
light,turn_on,"I'd like <device_name> at <brightness> percent brightness,8
light,turn_on,"I'd like <device_name> at <brightness> percent brightness",8
light,turn_on,"Can you make <device_name> <color>?",8
light,turn_on,"Change the color of <device_name> to <color>.",8
light,turn_on,"Change <device_name> to a <color> hue.",8
light,turn_on,"Set <device_name> to be <color>.",8
light,turn_on,"I want <device_name> to be <color>,8
light,turn_on,"I want <device_name> to be <color>"",8
light,turn_on,"Can you make <device_name> shine in <color>?",8
light,turn_on,"Turn <device_name> to a <color> shade.",8
light,turn_on,"Turn <device_name> <color>.",8
Can't render this file because it contains an unexpected character in line 138 and column 17.