tests(benchmarks): reduce number of test cases to speed-up execution

Bench just one compilation option for automatic benchmarks. Only 'loop' option is tested to take advantage of hardware with a lot of available CPUs. Running benchmarks with 'default' option is suboptimal for this kind of hardware since it uses only one CPU. This also remove time consuming MNIST test, as it should be in ML benchmarks. Moreover Makefile is fixed to use provided Python executable instead of relying on system one to generate MLIR Yaml files.
2026-02-19 08:54:26 -05:00 · 2022-11-03 17:51:06 +01:00
parent 078a82c1a4
commit d1db4a5e45
6 changed files with 19 additions and 64 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -123,6 +123,12 @@ jobs:
          cd compiler
          make BINDINGS_PYTHON_ENABLED=OFF run-benchmarks

+      - name: Upload raw results artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ github.sha }}_raw
+          path: compiler/benchmarks_results.json
+
      - name: Parse results
        shell: bash
        run: |
@@ -131,13 +137,12 @@ jobs:
          --series-name compiler_end_to_end_benchmarks \
          --series-help "Concrete compiler end-to-end benchmarks timings" \
          --series-tags "{\"compiler_hash\": \"${{ github.sha }}\", \"branch\": \"${{ github.ref_name }}\", \"optimizer_hash\": \"${OPTIMIZER_HASH}\", \"hardware\": \"aws ${{ env.EC2_INSTANCE_TYPE }}\"}"
-          gzip -k ${{ env.RESULTS_FILENAME }}

-      - name: Upload compressed results artifact
+      - name: Upload parsed results artifact
        uses: actions/upload-artifact@v3
        with:
          name: ${{ github.sha }}
-          path: ${{ env.RESULTS_FILENAME }}.gz
+          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
        uses: actions/checkout@v3
--- a/ci/benchmark_parser.py
+++ b/ci/benchmark_parser.py
@@ -38,10 +38,6 @@ def parse_results(raw_results):
    result_values = list()
    raw_results = json.loads(raw_results.read_text())
    for res in raw_results["benchmarks"]:
-        if not res.get("aggregate_name", None):
-            # Skipping iterations and focus only on aggregated results.
-            continue
-
        bench_class, action, option_class, application = res["run_name"].split("/")

        for measurement in ("real_time", "cpu_time"):
@@ -49,7 +45,6 @@ def parse_results(raw_results):
                    "action": action,
                    "option_class": option_class,
                    "application": application,
-                    "stat": res["aggregate_name"],
                    "measurement": measurement}
            result_values.append({"value": res[measurement], "tags": tags})

--- a/compiler/Makefile
+++ b/compiler/Makefile
@@ -1,6 +1,6 @@
 BUILD_TYPE?=Release
 BUILD_DIR?=./build
-Python3_EXECUTABLE?=
+Python3_EXECUTABLE?=python3
 BINDINGS_PYTHON_ENABLED=ON
 DATAFLOW_EXECUTION_ENABLED=OFF
 TIMING_ENABLED=OFF
@@ -253,13 +253,13 @@ run-end-to-end-gpu-tests: build-end-to-end-gpu-tests
 # benchmark

 generate-benchmarks:
-	python ./tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table_gen.py &> tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table.yaml
+	$(Python3_EXECUTABLE) ./tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table_gen.py > tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table.yaml

 build-benchmarks: build-initialized
 	cmake --build $(BUILD_DIR) --target end_to_end_benchmark

 run-benchmarks: build-benchmarks generate-benchmarks
-	$(BUILD_DIR)/bin/end_to_end_benchmark --benchmark_out=benchmarks_results.json --benchmark_out_format=json  --benchmark_repetitions=10 --benchmark_report_aggregates_only=true
+	$(BUILD_DIR)/bin/end_to_end_benchmark --benchmark_out=benchmarks_results.json --benchmark_out_format=json

 build-mlbench: build-initialized
 	cmake --build $(BUILD_DIR) --target end_to_end_mlbench
@@ -270,7 +270,7 @@ generate-mlbench:
 	unzip tests/end_to_end_benchmarks/mlbench.zip -d tests/end_to_end_benchmarks/mlbench
 	rm -f tests/end_to_end_benchmarks/mlbench/**/*\=*
 	find tests/end_to_end_benchmarks/mlbench -name "*.mlir" -exec sed -e '1d' -e 's/ func / func.func /g' -e 's/ linalg.tensor_/ tensor./g' -e '$$d' -i {} \;
-	python tests/end_to_end_benchmarks/generate_bench_yaml.py tests/end_to_end_benchmarks/mlbench tests/end_to_end_benchmarks/mlbench/end_to_end_mlbench
+	$(Python3_EXECUTABLE) tests/end_to_end_benchmarks/generate_bench_yaml.py tests/end_to_end_benchmarks/mlbench tests/end_to_end_benchmarks/mlbench/end_to_end_mlbench

 run-mlbench: build-mlbench generate-mlbench
 	tests/end_to_end_benchmarks/end_to_end_mlbench.sh tests/end_to_end_benchmarks/mlbench/ $(BUILD_DIR)/bin/end_to_end_mlbench
--- a/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp
+++ b/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp
@@ -132,9 +132,12 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path,
  setCurrentStackLimit(stackSizeRequirement);
  mlir::concretelang::CompilationOptions defaul;
  registe("default", defaul);
-  mlir::concretelang::CompilationOptions loop;
-  loop.loopParallelize = true;
-  registe("loop", loop);
+  // Run only parallelized benchmarks to take advantage of hardware with lots of
+  // CPU cores.
+  defaul.loopParallelize = true;
+  // mlir::concretelang::CompilationOptions loop;
+  // loop.loopParallelize = true;
+  // registe("loop", loop);
 #ifdef CONCRETELANG_CUDA_SUPPORT
  mlir::concretelang::CompilationOptions gpu;
  gpu.emitGPUOps = true;
@@ -155,9 +158,6 @@ auto _ = {
        "tests/end_to_end_fixture/end_to_end_encrypted_tensor.yaml"),
    registerEndToEndTestFromFile(
        "FHELinalg", "tests/end_to_end_fixture/end_to_end_fhelinalg.yaml"),
-    registerEndToEndTestFromFile(
-        "FHELinalg", "tests/end_to_end_fixture/end_to_end_programs.yaml",
-        0x8000000),
    registerEndToEndTestFromFile(
        "FHETLU",
        "tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table.yaml"),
--- a/compiler/tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table_gen.py
+++ b/compiler/tests/end_to_end_fixture/end_to_end_linalg_apply_lookup_table_gen.py
@@ -3,7 +3,7 @@ import numpy as np

 MIN_PRECISON = 1
 MAX_PRECISION = 16
-N_CT = [1, 10, 100, 1000, 10000]
+N_CT = [1, 10, 100, 200]


 def main():
--- a/compiler/tests/end_to_end_fixture/end_to_end_programs.yaml
+++ b/compiler/tests/end_to_end_fixture/end_to_end_programs.yaml