detect all required features for default build + disable f16c on noavx build

2026-01-10 06:07:58 -05:00 · 2024-05-02 20:52:14 -04:00
parent 465f6b12f6
commit a422b2c719
3 changed files with 9 additions and 7 deletions
--- a/.github/workflows/create-release.yml
+++ b/.github/workflows/create-release.yml
@@ -25,7 +25,7 @@ jobs:
        - home_assistant_version: "2024.2.1"
          arch: "amd64"
          suffix: "-noavx"
-          extra_defines: "-DLLAMA_NATIVE=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF"
+          extra_defines: "-DLLAMA_NATIVE=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF"
        - home_assistant_version: "2024.2.1"
          arch: "amd64"
          suffix: "-avx512"
@@ -33,7 +33,7 @@ jobs:
        - home_assistant_version: "2024.2.1"
          arch: "i386"
          suffix: "-noavx"
-          extra_defines: "-DLLAMA_NATIVE=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF"
+          extra_defines: "-DLLAMA_NATIVE=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF"
        - home_assistant_version: "2024.2.1"
          arch: "i386"
          suffix: "-avx512"
--- a/custom_components/llama_conversation/utils.py
+++ b/custom_components/llama_conversation/utils.py
@@ -90,7 +90,7 @@ def install_llama_cpp_python(config_dir: str):
                cpu_features = [ line for line in f.readlines() if line.startswith("Features") or line.startswith("flags")][0]
            if "avx512f" in cpu_features and "avx512bw" in cpu_features:
                instruction_extensions_suffix = "-avx512"
-            elif "avx2" not in cpu_features:
+            elif "avx2" not in cpu_features or "avx" not in cpu_features or "f16c" not in cpu_features or "fma" not in cpu_features or not ("sse3" in cpu_features or "ssse3" in cpu_features):
                instruction_extensions_suffix = "-noavx"
        except Exception as ex:
            _LOGGER.debug(f"Couldn't detect CPU features: {ex}")
--- a/evaluate.py
+++ b/evaluate.py
@@ -52,13 +52,15 @@ def icl_example_generator(num_examples, entity_names, service_names):
        print(f"Attempted to generate {num_examples} ICL examples for conversation, but only {len(selected_in_context_examples)} are available!")
    
    results = []
-    for x in range(num_examples_to_generate):
+    while len(results) < num_examples_to_generate:
+        if len(selected_in_context_examples) == 0:
+            break
+        
        chosen_example = selected_in_context_examples.pop()
        chosen_service = chosen_example["service"]
        potential_devices = [ x for x in entity_names if x.split(".")[0] == chosen_service.split(".")[0] ]

        if len(potential_devices) == 0:
-            selected_in_context_examples.append(chosen_example)
            continue
        else:
            example = {
@@ -66,7 +68,7 @@ def icl_example_generator(num_examples, entity_names, service_names):
                "service": chosen_service,
                "target_device": potential_devices[0],
            }
-            results.append(json.dumps(example))
+            results.insert(0, json.dumps(example))
    
    return "\n".join(results)

@@ -112,7 +114,7 @@ def evaluate(output_folder, trained_model, trained_tokenizer, dataset, batch_siz
                        for turn in conversation:
                            if turn["role"] == "system":
                                entity_names = entity_ids_regex.findall(turn["content"])
-                                service_names = service_names_regex.findall(turn["content"])
+                                service_names = [ x.split("(")[0] for x in service_names_regex.findall(turn["content"]) ]
                                icl_examples = icl_example_generator(5, entity_names, service_names)
                                turn["content"] = turn["content"] + "Respond to the following user instruction by responding in the same format as the following examples:\n" + icl_examples
                            new_conversation.append(turn)