refacto: rename 'dataset' into a clear 'inputset'

closes #340
2026-02-08 19:44:57 -05:00 · 2021-09-14 17:01:30 +02:00
parent efaf72880c
commit 8522e58280
13 changed files with 68 additions and 68 deletions
--- a/benchmarks/linear_regression.py
+++ b/benchmarks/linear_regression.py
@@ -137,15 +137,15 @@ def main():
    def function_to_compile(x_0):
        return table[(x_0 + zp_x) * w_0]

-    dataset = []
+    inputset = []
    for x_i in x_q:
-        dataset.append((int(x_i[0]),))
+        inputset.append((int(x_i[0]),))

    # Measure: Compilation Time (ms)
    engine = hnp.compile_numpy_function(
        function_to_compile,
        {"x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits))},
-        iter(dataset),
+        iter(inputset),
    )
    # Measure: End

--- a/benchmarks/logistic_regression.py
+++ b/benchmarks/logistic_regression.py
@@ -203,9 +203,9 @@ def main():
    def function_to_compile(x_0, x_1):
        return table[((x_0 + zp_x) * w_0) + ((x_1 + zp_x) * w_1)]

-    dataset = []
+    inputset = []
    for x_i in x_q:
-        dataset.append((int(x_i[0]), int(x_i[1])))
+        inputset.append((int(x_i[0]), int(x_i[1])))

    # Measure: Compilation Time (ms)
    engine = hnp.compile_numpy_function(
@@ -214,7 +214,7 @@ def main():
            "x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)),
            "x_1": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)),
        },
-        iter(dataset),
+        iter(inputset),
    )
    # Measure: End

--- a/concrete/common/bounds_measurement/init.py
+++ b/concrete/common/bounds_measurement/init.py
@@ -1,2 +1,2 @@
 """Bounds measurement module."""
-from . import dataset_eval
+from . import inputset_eval
--- a/concrete/common/bounds_measurement/inputset_eval.py
+++ b/concrete/common/bounds_measurement/inputset_eval.py
@@ -1,4 +1,4 @@
-"""Code to evaluate the IR graph on datasets."""
+"""Code to evaluate the IR graph on inputsets."""

 from typing import Any, Callable, Dict, Iterator, Tuple

@@ -7,20 +7,20 @@ from ..operator_graph import OPGraph
 from ..representation.intermediate import IntermediateNode


-def eval_op_graph_bounds_on_dataset(
+def eval_op_graph_bounds_on_inputset(
    op_graph: OPGraph,
-    dataset: Iterator[Tuple[Any, ...]],
+    inputset: Iterator[Tuple[Any, ...]],
    min_func: Callable[[Any, Any], Any] = min,
    max_func: Callable[[Any, Any], Any] = max,
 ) -> Dict[IntermediateNode, Dict[str, Any]]:
-    """Evaluate the bounds with a dataset.
+    """Evaluate the bounds with a inputset.

    Evaluate the bounds for all output values of the operators in the graph op_graph over data
-        coming from the dataset
+        coming from the inputset

    Args:
        op_graph (OPGraph): The graph for which we want to determine the bounds
-        dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
+        inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
            needs to be an iterator on tuples which are of the same length than the number of
            parameters in the function, and in the same order than these same parameters
        min_func (Callable[[Any, Any], Any], optional): custom function to compute a scalar minimum
@@ -35,11 +35,11 @@ def eval_op_graph_bounds_on_dataset(
            op_graph, stored with the node as key and a dict with keys "min" and "max" as value.
    """

-    def check_dataset_input_len_is_valid(data_to_check):
+    def check_inputset_input_len_is_valid(data_to_check):
        custom_assert(
            len(data_to_check) == len(op_graph.input_nodes),
            (
-                f"Got input data from dataset of len: {len(data_to_check)}, "
+                f"Got input data from inputset of len: {len(data_to_check)}, "
                f"function being evaluated has {len(op_graph.input_nodes)} inputs, please make "
                f"sure your data generator returns valid tuples of input values"
            ),
@@ -48,8 +48,8 @@ def eval_op_graph_bounds_on_dataset(
    # TODO: do we want to check coherence between the input data type and the corresponding Input ir
    # node expected data type ? Not considering bit_width as they may not make sense at this stage

-    first_input_data = dict(enumerate(next(dataset)))
-    check_dataset_input_len_is_valid(first_input_data.values())
+    first_input_data = dict(enumerate(next(inputset)))
+    check_inputset_input_len_is_valid(first_input_data.values())
    first_output = op_graph.evaluate(first_input_data)

    # We evaluate the min and max func to be able to resolve the tensors min and max rather than
@@ -59,9 +59,9 @@ def eval_op_graph_bounds_on_dataset(
        for node, value in first_output.items()
    }

-    for input_data in dataset:
+    for input_data in inputset:
        current_input_data = dict(enumerate(input_data))
-        check_dataset_input_len_is_valid(current_input_data.values())
+        check_inputset_input_len_is_valid(current_input_data.values())
        current_output = op_graph.evaluate(current_input_data)
        for node, value in current_output.items():
            node_bounds[node]["min"] = min_func(node_bounds[node]["min"], value)
--- a/concrete/common/extensions/table.py
+++ b/concrete/common/extensions/table.py
@@ -58,7 +58,7 @@ class LookupTable:
        if x < 0 or x >= len(table):
            raise ValueError(
                f"Lookup table with {len(table)} entries cannot be indexed with {x} "
-                f"(you should check your dataset)",
+                f"(you should check your inputset)",
            )

        return table[x]
--- a/concrete/numpy/compile.py
+++ b/concrete/numpy/compile.py
@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
 import numpy
 from zamalang import CompilerEngine

-from ..common.bounds_measurement.dataset_eval import eval_op_graph_bounds_on_dataset
+from ..common.bounds_measurement.inputset_eval import eval_op_graph_bounds_on_inputset
 from ..common.common_helpers import check_op_graph_is_integer_program
 from ..common.compilation import CompilationArtifacts, CompilationConfiguration
 from ..common.mlir import V0_OPSET_CONVERSION_FUNCTIONS, MLIRConverter
@@ -54,7 +54,7 @@ def numpy_min_func(lhs: Any, rhs: Any) -> Any:
 def _compile_numpy_function_into_op_graph_internal(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    dataset: Iterator[Tuple[Any, ...]],
+    inputset: Iterator[Tuple[Any, ...]],
    compilation_configuration: CompilationConfiguration,
    compilation_artifacts: CompilationArtifacts,
 ) -> OPGraph:
@@ -64,7 +64,7 @@ def _compile_numpy_function_into_op_graph_internal(
        function_to_compile (Callable): The function to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
+        inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
            needs to be an iterator on tuples which are of the same length than the number of
            parameters in the function, and in the same order than these same parameters
        compilation_artifacts (CompilationArtifacts): Artifacts object to fill
@@ -105,10 +105,10 @@ def _compile_numpy_function_into_op_graph_internal(
            f"{', '.join(str(node) for node in offending_non_integer_nodes)}"
        )

-    # Find bounds with the dataset
-    node_bounds = eval_op_graph_bounds_on_dataset(
+    # Find bounds with the inputset
+    node_bounds = eval_op_graph_bounds_on_inputset(
        op_graph,
-        dataset,
+        inputset,
        min_func=numpy_min_func,
        max_func=numpy_max_func,
    )
@@ -139,7 +139,7 @@ def _compile_numpy_function_into_op_graph_internal(
 def compile_numpy_function_into_op_graph(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    dataset: Iterator[Tuple[Any, ...]],
+    inputset: Iterator[Tuple[Any, ...]],
    compilation_configuration: Optional[CompilationConfiguration] = None,
    compilation_artifacts: Optional[CompilationArtifacts] = None,
 ) -> OPGraph:
@@ -149,7 +149,7 @@ def compile_numpy_function_into_op_graph(
        function_to_compile (Callable): The function to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
+        inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
            needs to be an iterator on tuples which are of the same length than the number of
            parameters in the function, and in the same order than these same parameters
        compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
@@ -177,7 +177,7 @@ def compile_numpy_function_into_op_graph(
        return _compile_numpy_function_into_op_graph_internal(
            function_to_compile,
            function_parameters,
-            dataset,
+            inputset,
            compilation_configuration,
            compilation_artifacts,
        )
@@ -201,7 +201,7 @@ def compile_numpy_function_into_op_graph(
 def _compile_numpy_function_internal(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    dataset: Iterator[Tuple[Any, ...]],
+    inputset: Iterator[Tuple[Any, ...]],
    compilation_configuration: CompilationConfiguration,
    compilation_artifacts: CompilationArtifacts,
    show_mlir: bool,
@@ -212,7 +212,7 @@ def _compile_numpy_function_internal(
        function_to_compile (Callable): The function you want to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
+        inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
            needs to be an iterator on tuples which are of the same length than the number of
            parameters in the function, and in the same order than these same parameters
        compilation_configuration (CompilationConfiguration): Configuration object to use
@@ -230,7 +230,7 @@ def _compile_numpy_function_internal(
    op_graph = _compile_numpy_function_into_op_graph_internal(
        function_to_compile,
        function_parameters,
-        dataset,
+        inputset,
        compilation_configuration,
        compilation_artifacts,
    )
@@ -256,7 +256,7 @@ def _compile_numpy_function_internal(
 def compile_numpy_function(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    dataset: Iterator[Tuple[Any, ...]],
+    inputset: Iterator[Tuple[Any, ...]],
    compilation_configuration: Optional[CompilationConfiguration] = None,
    compilation_artifacts: Optional[CompilationArtifacts] = None,
    show_mlir: bool = False,
@@ -267,7 +267,7 @@ def compile_numpy_function(
        function_to_compile (Callable): The function to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
+        inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
            needs to be an iterator on tuples which are of the same length than the number of
            parameters in the function, and in the same order than these same parameters
        compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
@@ -297,7 +297,7 @@ def compile_numpy_function(
        return _compile_numpy_function_internal(
            function_to_compile,
            function_parameters,
-            dataset,
+            inputset,
            compilation_configuration,
            compilation_artifacts,
            show_mlir,
--- a/docs/dev/explanation/COMPILATION.md
+++ b/docs/dev/explanation/COMPILATION.md
@@ -129,20 +129,20 @@ Let's take a closer look at the options we provide today.

 ### Dataset Evaluation

-This is the simplest approach, but it requires a dataset to be provided by the user.
+This is the simplest approach, but it requires an inputset to be provided by the user.

-The dataset is not the dataset in the usual sense of ML as it doesn't require labels.
+The inputset is not to be confused with the dataset which is classical in ML, as it doesn't require labels.
 Rather, it is a set of values which are typical inputs of the function.

-The idea is to evaluate each input in the dataset and record the result of each operation in the operation graph.
+The idea is to evaluate each input in the inputset and record the result of each operation in the operation graph.
 Then we compare the evaluation results with the current minimum/maximum values of each node and update the minimum/maximum accordingly.
-After the entire dataset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained.
+After the entire inputset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained.

 Here is an example, given this operation graph where `x` is encrypted:

 ![](../../_static/compilation-pipeline/two_x_plus_three.png)

-and this dataset:
+and this inputset:

 ```
 [2, 3, 1]
--- a/docs/user/howto/COMPILING_AND_EXECUTING.md
+++ b/docs/user/howto/COMPILING_AND_EXECUTING.md
@@ -28,10 +28,10 @@ y = hnp.EncryptedScalar(hnp.UnsignedInteger(3))

 In this configuration, both `x` and `y` are 3-bit unsigned integers, so they have the range of `[0, 2**3 - 1]`

-We also need a dataset. However, it's not the dataset used in traning as it doesn't contain any labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile.
+We also need an inputset. This latter is not to be confused with the dataset, which is used in training and contains labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile.

 ```python
-dataset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)]
+inputset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)]
 ```

 Finally, we can compile our function to its homomorphic equivalent.
@@ -39,7 +39,7 @@ Finally, we can compile our function to its homomorphic equivalent.
 ```python
 engine = hnp.compile_numpy_function(
    f, {"x": x, "y": y},
-    dataset=iter(dataset),
+    inputset=iter(inputset),
 )
 ```

@@ -59,7 +59,7 @@ You can use `.run(...)` method of `engine` returned by `hnp.compile_numpy_functi
 ```

 Be careful about the inputs, though.
-If you were to run with values outside the range of the dataset, the result might not be correct.
+If you were to run with values outside the range of the inputset, the result might not be correct.

 ## Further reading

--- a/examples/QuantizedLinearRegression.ipynb
+++ b/examples/QuantizedLinearRegression.ipynb
@@ -54,7 +54,7 @@
   "id": "27f67e43",
   "metadata": {},
   "source": [
-    "### We need a dataset, a handcrafted one for simplicity"
+    "### We need an inputset, a handcrafted one for simplicity"
   ]
  },
  {
@@ -73,7 +73,7 @@
   "id": "fba2eecb",
   "metadata": {},
   "source": [
-    "### Let's visualize our dataset to get a grasp of it"
+    "### Let's visualize our inputset to get a grasp of it"
   ]
  },
  {
@@ -640,14 +640,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "dataset = []\n",
+    "inputset = []\n",
    "for x_i in x_q:\n",
-    "    dataset.append((int(x_i[0]),))\n",
+    "    inputset.append((int(x_i[0]),))\n",
    "\n",
    "homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n",
    "    infer,\n",
    "    {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n",
-    "    iter(dataset),\n",
+    "    iter(inputset),\n",
    ")"
   ]
  },
@@ -723,7 +723,7 @@
    "engine = hnp.compile_numpy_function(\n",
    "    infer,\n",
    "    {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n",
-    "    iter(dataset),\n",
+    "    iter(inputset),\n",
    ")"
   ]
  },
--- a/examples/QuantizedLogisticRegression.ipynb
+++ b/examples/QuantizedLogisticRegression.ipynb
@@ -55,7 +55,7 @@
   "id": "c7a0cc5f",
   "metadata": {},
   "source": [
-    "### We need a dataset, a handcrafted one for simplicity"
+    "### We need an inputset, a handcrafted one for simplicity"
   ]
  },
  {
@@ -74,7 +74,7 @@
   "id": "2d522cb0",
   "metadata": {},
   "source": [
-    "### Let's visualize our dataset to get a grasp of it"
+    "### Let's visualize our inputset to get a grasp of it"
   ]
  },
  {
@@ -744,9 +744,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "dataset = []\n",
+    "inputset = []\n",
    "for x_i in x_q:\n",
-    "    dataset.append((int(x_i[0]), int(x_i[1])))\n",
+    "    inputset.append((int(x_i[0]), int(x_i[1])))\n",
    "    \n",
    "homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n",
    "    infer,\n",
@@ -754,7 +754,7 @@
    "        \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
    "        \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
    "    },\n",
-    "    iter(dataset),\n",
+    "    iter(inputset),\n",
    ")"
   ]
  },
@@ -839,7 +839,7 @@
    "        \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
    "        \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
    "    },\n",
-    "    iter(dataset),\n",
+    "    iter(inputset),\n",
    ")"
   ]
  },
--- a/tests/common/bounds_measurement/test_inputset_eval.py
+++ b/tests/common/bounds_measurement/test_inputset_eval.py
@@ -1,11 +1,11 @@
-"""Test file for bounds evaluation with a dataset"""
+"""Test file for bounds evaluation with a inputset"""

 from typing import Tuple

 import pytest

-from concrete.common.bounds_measurement.dataset_eval import (
-    eval_op_graph_bounds_on_dataset,
+from concrete.common.bounds_measurement.inputset_eval import (
+    eval_op_graph_bounds_on_inputset,
 )
 from concrete.common.data_types.floats import Float
 from concrete.common.data_types.integers import Integer
@@ -207,15 +207,15 @@ from concrete.numpy.tracing import trace_numpy_function
        ),
    ],
 )
-def test_eval_op_graph_bounds_on_dataset(
+def test_eval_op_graph_bounds_on_inputset(
    function,
    input_ranges,
    expected_output_bounds,
    expected_output_data_type: Integer,
 ):
-    """Test function for eval_op_graph_bounds_on_dataset"""
+    """Test function for eval_op_graph_bounds_on_inputset"""

-    test_eval_op_graph_bounds_on_dataset_multiple_output(
+    test_eval_op_graph_bounds_on_inputset_multiple_output(
        function,
        input_ranges,
        (expected_output_bounds,),
@@ -264,13 +264,13 @@ def test_eval_op_graph_bounds_on_dataset(
        ),
    ],
 )
-def test_eval_op_graph_bounds_on_dataset_multiple_output(
+def test_eval_op_graph_bounds_on_inputset_multiple_output(
    function,
    input_ranges,
    expected_output_bounds,
    expected_output_data_type: Tuple[Integer],
 ):
-    """Test function for eval_op_graph_bounds_on_dataset"""
+    """Test function for eval_op_graph_bounds_on_inputset"""

    op_graph = trace_numpy_function(
        function, {"x": EncryptedScalar(Integer(64, True)), "y": EncryptedScalar(Integer(64, True))}
@@ -281,7 +281,7 @@ def test_eval_op_graph_bounds_on_dataset_multiple_output(
            for y_gen in range_y:
                yield (x_gen, y_gen)

-    node_bounds = eval_op_graph_bounds_on_dataset(
+    node_bounds = eval_op_graph_bounds_on_inputset(
        op_graph, data_gen(*tuple(range(x[0], x[1] + 1) for x in input_ranges))
    )

--- a/tests/common/mlir/test_mlir_converter.py
+++ b/tests/common/mlir/test_mlir_converter.py
@@ -204,8 +204,8 @@ def datagen(*args):
 )
 def test_mlir_converter(func, args_dict, args_ranges):
    """Test the conversion to MLIR by calling the parser from the compiler"""
-    dataset = datagen(*args_ranges)
-    result_graph = compile_numpy_function_into_op_graph(func, args_dict, dataset)
+    inputset = datagen(*args_ranges)
+    result_graph = compile_numpy_function_into_op_graph(func, args_dict, inputset)
    converter = MLIRConverter(V0_OPSET_CONVERSION_FUNCTIONS)
    mlir_result = converter.convert(result_graph)
    # testing that this doesn't raise an error
--- a/tests/numpy/test_debugging.py
+++ b/tests/numpy/test_debugging.py
@@ -211,7 +211,7 @@ def test_print_and_draw_graph_with_dot(lambda_f, params, ref_graph_str):

 # Remark that the bitwidths are not particularly correct (eg, a MUL of a 17b times 23b
 # returning 23b), since they are replaced later by the real bitwidths computed on the
-# dataset
+# inputset
@pytest.mark.parametrize(
    "lambda_f,x_y,ref_graph_str",
    [