chore(ci): add throughput and hpu support to data extractor

Now throughput results can be fetched. HPU backend is supported for integer formatting
2026-01-09 22:57:59 -05:00 · 2025-10-27 12:59:42 +01:00
parent b02a3b16ff
commit fd6323b311
7 changed files with 166 additions and 43 deletions
--- a/ci/benchmark_parser.py
+++ b/ci/benchmark_parser.py
@@ -89,12 +89,12 @@ parser.add_argument(
    dest="bench_type",
    choices=["latency", "throughput"],
    default="latency",
-    help="Compute and append number of operations per second and"
+    help="Fetch results for latency or throughput benchmarks",
    "operations per dollar",
 )
 parser.add_argument(
    "--backend",
    dest="backend",
    choices=["cpu", "gpu", "hpu"],
    default="cpu",
    help="Backend on which benchmarks have run",
 )
--- a/ci/data_extractor/src/benchmark_specs.py
+++ b/ci/data_extractor/src/benchmark_specs.py
@@ -10,6 +10,7 @@ class Backend(enum.StrEnum):
    CPU = "cpu"
    GPU = "gpu"
    HPU = "hpu"
    @staticmethod
    def from_str(backend_name):
@@ -18,6 +19,8 @@ class Backend(enum.StrEnum):
                return Backend.CPU
            case "gpu":
                return Backend.GPU
            case "hpu":
                return Backend.HPU
            case _:
                raise NotImplementedError
@@ -83,7 +86,7 @@ class CoreCryptoOperation(enum.StrEnum):
    KeySwitch = "KS"
    PBS = "PBS"
    MultiBitPBS = "MB-PBS"
-    KeyswitchPBS = "KS - PBS"
+    KeySwitchPBS = "KS - PBS"
    KeySwitchMultiBitPBS = "KS - MB-PBS"
    @staticmethod
@@ -96,7 +99,7 @@ class CoreCryptoOperation(enum.StrEnum):
            case "multi_bit_pbs" | "multi_bit_deterministic_pbs":
                return CoreCryptoOperation.MultiBitPBS
            case "ks_pbs":
-                return CoreCryptoOperation.KeyswitchPBS
+                return CoreCryptoOperation.KeySwitchPBS
            case "multi_bit_ks_pbs" | "multi_bit_deterministic_ks_pbs":
                return CoreCryptoOperation.KeySwitchMultiBitPBS
            case _:
@@ -119,7 +122,7 @@ class CoreCryptoOperation(enum.StrEnum):
                return "pbs"
            case CoreCryptoOperation.MultiBitPBS:
                return "pbs"
-            case CoreCryptoOperation.KeyswitchPBS:
+            case CoreCryptoOperation.KeySwitchPBS:
                return "ks-pbs"
            case CoreCryptoOperation.KeySwitchMultiBitPBS:
                return "ks-pbs"
@@ -236,6 +239,21 @@ class ErrorFailureProbability(enum.IntEnum):
                )
 class BenchType(enum.Enum):
    Latency = 0
    Throughput = 1
    @staticmethod
    def from_str(bench_type):
        match bench_type.lower():
            case "latency":
                return BenchType.Latency
            case "throughput":
                return BenchType.Throughput
            case _:
                raise ValueError(f"BenchType '{bench_type}' not supported")
 class ParamsDefinition:
    """
    Represents a parameter definition for specific cryptographic settings.
@@ -449,7 +467,7 @@ class BenchDetails:
        match self.layer:
            case Layer.Integer:
-                op_name_index = 2 if parts[1] == "cuda" else 1
+                op_name_index = 2 if parts[1] in ["cuda", "hpu"] else 1
                if parts[op_name_index] == "signed":
                    op_name_index += 1
                    self.sign_flavor = SignFlavor.Signed
@@ -470,7 +488,7 @@ class BenchDetails:
            case Layer.CoreCrypto:
                self.operation_name = parts[2] if parts[1] == "cuda" else parts[1]
            case Layer.HLApi:
-                if parts[1] == "cuda":
+                if parts[1] in ["cuda", "hpu"]:
                    self.operation_name = "::".join(parts[2:-1])
                else:
                    self.operation_name = "::".join(parts[1:-1])
--- a/ci/data_extractor/src/config.py
+++ b/ci/data_extractor/src/config.py
@@ -1,7 +1,7 @@
 import argparse
 import pathlib
-from benchmark_specs import Backend, Layer, PBSKind
+from benchmark_specs import Backend, BenchType, Layer, PBSKind
 class UserConfig:
@@ -31,6 +31,8 @@ class UserConfig:
        self.bench_date = input_args.bench_date
        self.time_span_days = input_args.time_span_days
        self.bench_type = BenchType.from_str(input_args.bench_type.lower())
        self.layer = Layer.from_str(input_args.layer.lower())
        self.pbs_kind = PBSKind.from_str(input_args.pbs_kind)
        self.grouping_factor = input_args.grouping_factor
--- a/ci/data_extractor/src/connector.py
+++ b/ci/data_extractor/src/connector.py
@@ -4,7 +4,14 @@ import os
 import pathlib
 import psycopg2
-from benchmark_specs import BenchDetails, Layer, OperandType, PBSKind
+from benchmark_specs import (
    BenchDetails,
    Backend,
    BenchType,
    Layer,
    OperandType,
    PBSKind,
 )
 from config import UserConfig
 from exceptions import NoDataFound
@@ -179,10 +186,14 @@ class PostgreConnector:
        filters.append(f"b.name = '{branch}'")
        name_suffix = f"\\{name_suffix}"
-        if backend == "cpu":
+        match backend:
-            filters.append(f"test.name LIKE '{layer}::%{name_suffix}'")
+            case Backend.CPU:
-        elif backend == "gpu":
+                filters.append(f"test.name LIKE '{layer}::%{name_suffix}'")
-            filters.append(f"test.name LIKE '{layer}::cuda::%{name_suffix}'")
+            case Backend.GPU:
                filters.append(f"test.name LIKE '{layer}::cuda::%{name_suffix}'")
            case Backend.HPU:
                name_suffix = f"_mean"
                filters.append(f"test.name LIKE '{layer}::hpu::%{name_suffix}'")
        if version:
            filters.append(f"pv.name = '{version}'")
@@ -212,8 +223,11 @@ class PostgreConnector:
            ]
            filters.append("({})".format(" OR ".join(conditions)))
-        # Throughput is not supported yet
+        match user_config.bench_type:
-        filters.append("test.name NOT SIMILAR TO '%::throughput::%'")
+            case BenchType.Latency:
                filters.append("test.name NOT SIMILAR TO '%::throughput::%'")
            case BenchType.Throughput:
                filters.append("test.name LIKE '%::throughput::%'")
        select_parts = (
            "SELECT",
--- a/ci/data_extractor/src/data_extractor.py
+++ b/ci/data_extractor/src/data_extractor.py
@@ -18,13 +18,12 @@ import argparse
 import datetime
 import formatter
 import sys
-from formatter import (CSVFormatter, GenericFormatter, MarkdownFormatter,
+from formatter import CSVFormatter, GenericFormatter, MarkdownFormatter, SVGFormatter
                       SVGFormatter)
 import config
 import connector
 import regression
-from benchmark_specs import Backend, Layer, OperandType, PBSKind, RustType
+from benchmark_specs import BenchType, Layer, OperandType, RustType
 import utils
@@ -87,7 +86,7 @@ parser.add_argument(
 parser.add_argument(
    "--backend",
    dest="backend",
-    choices=["cpu", "gpu"],
+    choices=["cpu", "gpu", "hpu"],
    default="cpu",
    help="Backend on which benchmarks have run",
 )
@@ -118,6 +117,13 @@ parser.add_argument(
    default=30,
    help="Numbers of days prior of `bench_date` we search for results in the database",
 )
 parser.add_argument(
    "--bench-type",
    dest="bench_type",
    choices=["latency", "throughput"],
    default="latency",
    help="Type of benchmark to filter against",
 )
 parser.add_argument(
    "--regression-profiles",
    dest="regression_profiles",
@@ -212,13 +218,17 @@ def perform_hardware_comparison(
        results.append(res)
        match user_config.bench_type:
            case BenchType.Latency:
                conversion_func = utils.convert_latency_value_to_readable_text
            case BenchType.Throughput:
                conversion_func = utils.convert_throughput_value_to_readable_text
        output_filename = "".join(
            [user_config.output_file, "_", hw, "_", operand_type.lower(), ".csv"]
        )
        csv_formatter = CSVFormatter(layer, user_config.backend, user_config.pbs_kind)
-        formatted_data = csv_formatter.format_data(
+        formatted_data = csv_formatter.format_data(res, conversion_func)
            res, utils.convert_value_to_readable_text
        )
        utils.write_to_csv(
            csv_formatter.generate_csv(formatted_data),
            output_filename,
@@ -289,12 +299,18 @@ def perform_data_extraction(
        print(f"Failed to fetch benchmark data: {err}")
        sys.exit(2)
    match user_config.bench_type:
        case BenchType.Latency:
            conversion_func = utils.convert_latency_value_to_readable_text
        case BenchType.Throughput:
            conversion_func = utils.convert_throughput_value_to_readable_text
    generic_formatter = GenericFormatter(
        layer, user_config.backend, user_config.pbs_kind, user_config.grouping_factor
    )
    formatted_results = generic_formatter.format_data(
        res,
-        utils.convert_value_to_readable_text,
+        conversion_func,
    )
    file_suffix = f"_{operand_type.lower()}"
--- a/ci/data_extractor/src/formatter.py
+++ b/ci/data_extractor/src/formatter.py
@@ -6,10 +6,18 @@ import xml.dom.minidom
 from collections.abc import Callable
 import svg
-from benchmark_specs import (ALL_RUST_TYPES, Backend, BenchDetails,
+from benchmark_specs import (
-                             CoreCryptoOperation, ErrorFailureProbability,
+    ALL_RUST_TYPES,
-                             Layer, NoiseDistribution, OperandType, PBSKind,
+    Backend,
-                             RustType)
+    BenchDetails,
    CoreCryptoOperation,
    ErrorFailureProbability,
    Layer,
    NoiseDistribution,
    OperandType,
    PBSKind,
    RustType,
 )
 from py_markdown_table.markdown_table import markdown_table
@@ -259,12 +267,6 @@ class GenericFormatter:
                    f"{prefix}_if_then_else_parallelized",
                ]
            case Backend.GPU:
                match operand_type:
                    case OperandType.CipherText:
                        prefix = "cuda"
                    case OperandType.PlainText:
                        prefix = "cuda_scalar"
                operations = [
                    f"{prefix}_neg",
                    f"{prefix}_add",
@@ -280,6 +282,42 @@ class GenericFormatter:
                    f"{prefix}_ilog2",
                    f"{prefix}_if_then_else",
                ]
            case Backend.HPU:
                operations = [
                    f"{prefix}_neg",
                    (
                        f"{prefix}_add"
                        if operand_type == OperandType.CipherText
                        else f"{prefix}_adds"
                    ),
                    (
                        f"{prefix}_mul"
                        if operand_type == OperandType.CipherText
                        else f"{prefix}_muls"
                    ),
                    f"{prefix}_cmp_eq",
                    f"{prefix}_cmp_gt",
                    f"{prefix}_max",
                    f"{prefix}_bw_and",
                    (
                        f"{prefix}_div"
                        if operand_type == OperandType.CipherText
                        else f"{prefix}_divs"
                    ),
                    (
                        f"{prefix}_shift_l"
                        if operand_type == OperandType.CipherText
                        else f"{prefix}_shifts_l"
                    ),
                    (
                        f"{prefix}_rot_l"
                        if operand_type == OperandType.CipherText
                        else f"{prefix}_rots_l"
                    ),
                    f"{prefix}_lead0",
                    f"{prefix}_ilog2",
                    f"{prefix}_if_then_else",
                ]
            case _:
                raise NotImplementedError(
                    f"backend '{self.backend}' not supported yet for integer formatting"
@@ -309,7 +347,7 @@ class GenericFormatter:
        first_column_header = "Operation \\ Size"
        # Adapt list to plaintext benchmarks results.
-        if operand_type == OperandType.PlainText:
+        if operand_type == OperandType.PlainText and self.backend != Backend.HPU:
            if self.backend == Backend.CPU:
                div_name = f"{prefix}_div_parallelized"
                rem_name = f"{prefix}_rem_parallelized"
@@ -319,7 +357,7 @@ class GenericFormatter:
            operations.insert(8, div_name)
            operations.insert(9, rem_name)
-            operations.pop(7)  # Remove div_rem_parallelized
+            operations.pop(7)
            display_names.insert(
                8,
@@ -404,8 +442,6 @@ class GenericFormatter:
            NoiseDistribution.TUniform,
        ]
        operation_displays = [op.value for op in OPERATIONS_DISPLAYS]
        sorted_results = self._build_results_dict(
            supported_pfails,
            noise_distributions,
@@ -435,6 +471,12 @@ class GenericFormatter:
                ) and param_definition.pbs_kind != PBSKind.MultiBit:
                    # Skip this operation since a multi-bit operation cannot be done with any other parameters type.
                    continue
                elif (
                    formatted_name == CoreCryptoOperation.PBS
                    or formatted_name == CoreCryptoOperation.KeySwitchPBS
                ) and param_definition.pbs_kind != PBSKind.Classical:
                    # Skip this operation since a classical operation cannot be done with any other parameters type.
                    continue
                grouping_factor = param_definition.grouping_factor
                if (
@@ -443,10 +485,7 @@ class GenericFormatter:
                ):
                    continue
-                if (
+                if param_definition.details["variation"] not in ["", "BENCH"]:
                    param_definition.details["variation"]
                    or param_definition.details["trailing_details"]
                ):
                    continue
                try:
@@ -484,7 +523,7 @@ OPERATIONS_DISPLAYS = [
    # CoreCryptoOperation.KeySwitch, # Uncomment this line to get keyswitch in the tables
    CoreCryptoOperation.PBS,
    CoreCryptoOperation.MultiBitPBS,
-    CoreCryptoOperation.KeyswitchPBS,
+    CoreCryptoOperation.KeySwitchPBS,
    CoreCryptoOperation.KeySwitchMultiBitPBS,
 ]
--- a/ci/data_extractor/src/utils.py
+++ b/ci/data_extractor/src/utils.py
@@ -10,8 +10,11 @@ SECONDS_IN_NANO = 1e9
 MILLISECONDS_IN_NANO = 1e6
 MICROSECONDS_IN_NANO = 1e3
 THOUSAND_ELEMENTS = 1e3
 MILLION_ELEMENTS = 1e6
-def convert_value_to_readable_text(value: int, max_digits: int = 3) -> str:
+
 def convert_latency_value_to_readable_text(value: int, max_digits: int = 3) -> str:
    """
    Convert timing in nanoseconds to the highest unit usable.
@@ -40,6 +43,37 @@ def convert_value_to_readable_text(value: int, max_digits: int = 3) -> str:
    return f"{round(converted_parts[0], rounding_digit)} {converted_parts[1]}"
 def convert_throughput_value_to_readable_text(value: int, max_digits: int = 3):
    """
    Convert timing in elements per second to the highest unit usable.
    :param value: timing value
    :type value: int
    :param max_digits: number of digits to keep in the final representation of the value
    :type max_digits: int, optional
    :return: human-readable value with unit
    :rtype:str
    """
    if value > MILLION_ELEMENTS:
        converted_parts = (value / MILLION_ELEMENTS), "M.ops/s"
    elif value > THOUSAND_ELEMENTS:
        converted_parts = (value / THOUSAND_ELEMENTS), "k.ops/s"
    else:
        converted_parts = value, "ops/s"
    if converted_parts[0] > 0:
        power_of_10 = math.floor(math.log10(converted_parts[0]))
        rounding_digit = max_digits - (power_of_10 + 1)
    else:
        rounding_digit = None
    if converted_parts[0] >= 100.0:
        rounding_digit = None
    return f"{round(converted_parts[0], rounding_digit)} {converted_parts[1]}"
 def convert_gain_to_text(value: float) -> str:
    """
    Convert gains as :class:`float` to :class:`str`