chore(ci): compute operations throughput on benchmarks results

In addition to values currently parsed and sent to database, now throughput is also computed. This computation is done only on mean values. New results are: - operations per ms - operations per dollar spent which depends on the machine used to perform the benchmarks
2026-04-17 03:00:54 -04:00 · 2023-02-24 16:13:55 +01:00
parent 291019ba0f
commit 8575435b3e
3 changed files with 73 additions and 10 deletions
--- a/.github/workflows/compiler_benchmark.yml
+++ b/.github/workflows/compiler_benchmark.yml
@@ -121,7 +121,8 @@ jobs:
          --project-version ${COMMIT_HASH} \
          --branch ${{ github.ref_name }} \
          --commit-date ${COMMIT_DATE} \
-          --bench-date "${{ env.BENCH_DATE }}"
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --throughput

      - name: Upload parsed results artifact
        uses: actions/upload-artifact@v3
--- a/ci/benchmark_parser.py
+++ b/ci/benchmark_parser.py
@@ -7,7 +7,9 @@ Parse benchmark raw results.
 import argparse
 import pathlib
 import json
+import sys

+ONE_HOUR_IN_NANOSECONDS = 3600E9

 parser = argparse.ArgumentParser()
 parser.add_argument('results_path',
@@ -28,36 +30,57 @@ parser.add_argument('--commit-date', dest='commit_date', required=True,
                    help='Timestamp of commit hash used in project_version')
 parser.add_argument('--bench-date', dest='bench_date', required=True,
                    help='Timestamp when benchmark was run')
+parser.add_argument('--throughput', dest='throughput', action='store_true',
+                    help='Compute and append number of operations per millisecond and'
+                         'operations per dollar, only on mean values')


-def parse_results(raw_results):
+def parse_results(raw_results, compute_throughput=False, hardware_hourly_cost=None):
    """
    Parse raw benchmark results.

    :param raw_results: path to file that contains raw results as :class:`pathlib.Path`
+    :param compute_throughput: compute number of operations per millisecond and operations per
+        dollar on mean values
+    :param hardware_hourly_cost: hourly cost of the hardware used in dollar

    :return: :class:`list` of data points
    """
    raw_results = json.loads(raw_results.read_text())
-    return [
-        {"value": res["cpu_time"], "test": res["name"]}
-        for res in raw_results["benchmarks"]
-    ]
+    parsed_results = []
+    for res in raw_results["benchmarks"]:
+        test_name = res["name"]
+        value = res["cpu_time"]
+        parsed_results.append({"value": value, "test": test_name})
+        if test_name.endswith("_mean") and compute_throughput:
+            parsed_results.append({
+                "value": compute_ops_per_millisecond(value),
+                "test": "_".join([test_name, "ops_per_ms"])})
+
+            if hardware_hourly_cost is not None:
+                parsed_results.append({
+                    "value": compute_ops_per_dollar(value, hardware_hourly_cost),
+                    "test": "_".join([test_name, "ops_per_dollar"])})
+
+    return parsed_results


-def recursive_parse(directory):
+def recursive_parse(directory, compute_throughput=False, hardware_hourly_cost=None):
    """
    Parse all the benchmark results in a directory. It will attempt to parse all the files having a
    .json extension at the top-level of this directory.

    :param directory: path to directory that contains raw results as :class:`pathlib.Path`
+    :param compute_throughput: compute number of operations per millisecond and operations per
+        dollar
+    :param hardware_hourly_cost: hourly cost of the hardware used in dollar

    :return: :class:`list` of data points
    """
    result_values = []
    for file in directory.glob('*.json'):
        try:
-            result_values.extend(parse_results(file))
+            result_values.extend(parse_results(file, compute_throughput, hardware_hourly_cost))
        except KeyError as err:
            print(f"Failed to parse '{file.resolve()}': {repr(err)}")

@@ -85,15 +108,50 @@ def dump_results(parsed_results, filename, input_args):
    filename.write_text(json.dumps(series))


+def compute_ops_per_dollar(data_point, product_hourly_cost):
+    """
+    Compute numbers of operations per dollar for a given ``data_point``.
+
+    :param data_point: timing value measured during benchmark in nanoseconds
+    :param product_hourly_cost: cost in dollar per hour of hardware used
+
+    :return: number of operations per dollar
+    """
+    return ONE_HOUR_IN_NANOSECONDS / (product_hourly_cost * data_point)
+
+
+def compute_ops_per_millisecond(data_point):
+    """
+    Compute numbers of operations per millisecond for a given ``data_point``.
+
+    :param data_point: timing value measured during benchmark in nanoseconds
+
+    :return: number of operations per millisecond
+    """
+    return 1E6 / data_point
+
+
 if __name__ == "__main__":
    args = parser.parse_args()

+    hardware_cost = None
+    if args.throughput:
+        print("Throughput computation enabled")
+        ec2_costs = json.loads(
+            pathlib.Path("ci/ec2_products_cost.json").read_text(encoding="utf-8"))
+        try:
+            hardware_cost = abs(ec2_costs[args.hardware])
+            print(f"Hardware hourly cost: {hardware_cost} $/h")
+        except KeyError:
+            print(f"Cannot find hardware hourly cost for '{args.hardware}'")
+            sys.exit(1)
+
    results_path = pathlib.Path(args.results_path)
    print("Parsing benchmark results... ")
    if results_path.is_dir():
-        results = recursive_parse(results_path)
+        results = recursive_parse(results_path, args.throughput, hardware_cost)
    else:
-        results = parse_results(results_path)
+        results = parse_results(results_path, args.throughput, hardware_cost)
    print("Parsing results done")

    output_file = pathlib.Path(args.output_file)
--- a/ci/ec2_products_cost.json
+++ b/ci/ec2_products_cost.json
@@ -0,0 +1,4 @@
+{
+  "m6i.metal": 7.168,
+  "c6a.metal": 7.344
+}