diff --git a/benchmarks/linear_regression.py b/benchmarks/linear_regression.py index 34b51b38a..68f20444d 100644 --- a/benchmarks/linear_regression.py +++ b/benchmarks/linear_regression.py @@ -137,15 +137,15 @@ def main(): def function_to_compile(x_0): return table[(x_0 + zp_x) * w_0] - dataset = [] + inputset = [] for x_i in x_q: - dataset.append((int(x_i[0]),)) + inputset.append((int(x_i[0]),)) # Measure: Compilation Time (ms) engine = hnp.compile_numpy_function( function_to_compile, {"x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits))}, - iter(dataset), + iter(inputset), ) # Measure: End diff --git a/benchmarks/logistic_regression.py b/benchmarks/logistic_regression.py index c6f1be799..899a874b1 100644 --- a/benchmarks/logistic_regression.py +++ b/benchmarks/logistic_regression.py @@ -203,9 +203,9 @@ def main(): def function_to_compile(x_0, x_1): return table[((x_0 + zp_x) * w_0) + ((x_1 + zp_x) * w_1)] - dataset = [] + inputset = [] for x_i in x_q: - dataset.append((int(x_i[0]), int(x_i[1]))) + inputset.append((int(x_i[0]), int(x_i[1]))) # Measure: Compilation Time (ms) engine = hnp.compile_numpy_function( @@ -214,7 +214,7 @@ def main(): "x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)), "x_1": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)), }, - iter(dataset), + iter(inputset), ) # Measure: End diff --git a/concrete/common/bounds_measurement/__init__.py b/concrete/common/bounds_measurement/__init__.py index 9bd6c5c7a..a1ea8260d 100644 --- a/concrete/common/bounds_measurement/__init__.py +++ b/concrete/common/bounds_measurement/__init__.py @@ -1,2 +1,2 @@ """Bounds measurement module.""" -from . import dataset_eval +from . import inputset_eval diff --git a/concrete/common/bounds_measurement/dataset_eval.py b/concrete/common/bounds_measurement/inputset_eval.py similarity index 79% rename from concrete/common/bounds_measurement/dataset_eval.py rename to concrete/common/bounds_measurement/inputset_eval.py index e8662f462..2103760af 100644 --- a/concrete/common/bounds_measurement/dataset_eval.py +++ b/concrete/common/bounds_measurement/inputset_eval.py @@ -1,4 +1,4 @@ -"""Code to evaluate the IR graph on datasets.""" +"""Code to evaluate the IR graph on inputsets.""" from typing import Any, Callable, Dict, Iterator, Tuple @@ -7,20 +7,20 @@ from ..operator_graph import OPGraph from ..representation.intermediate import IntermediateNode -def eval_op_graph_bounds_on_dataset( +def eval_op_graph_bounds_on_inputset( op_graph: OPGraph, - dataset: Iterator[Tuple[Any, ...]], + inputset: Iterator[Tuple[Any, ...]], min_func: Callable[[Any, Any], Any] = min, max_func: Callable[[Any, Any], Any] = max, ) -> Dict[IntermediateNode, Dict[str, Any]]: - """Evaluate the bounds with a dataset. + """Evaluate the bounds with a inputset. Evaluate the bounds for all output values of the operators in the graph op_graph over data - coming from the dataset + coming from the inputset Args: op_graph (OPGraph): The graph for which we want to determine the bounds - dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It + inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It needs to be an iterator on tuples which are of the same length than the number of parameters in the function, and in the same order than these same parameters min_func (Callable[[Any, Any], Any], optional): custom function to compute a scalar minimum @@ -35,11 +35,11 @@ def eval_op_graph_bounds_on_dataset( op_graph, stored with the node as key and a dict with keys "min" and "max" as value. """ - def check_dataset_input_len_is_valid(data_to_check): + def check_inputset_input_len_is_valid(data_to_check): custom_assert( len(data_to_check) == len(op_graph.input_nodes), ( - f"Got input data from dataset of len: {len(data_to_check)}, " + f"Got input data from inputset of len: {len(data_to_check)}, " f"function being evaluated has {len(op_graph.input_nodes)} inputs, please make " f"sure your data generator returns valid tuples of input values" ), @@ -48,8 +48,8 @@ def eval_op_graph_bounds_on_dataset( # TODO: do we want to check coherence between the input data type and the corresponding Input ir # node expected data type ? Not considering bit_width as they may not make sense at this stage - first_input_data = dict(enumerate(next(dataset))) - check_dataset_input_len_is_valid(first_input_data.values()) + first_input_data = dict(enumerate(next(inputset))) + check_inputset_input_len_is_valid(first_input_data.values()) first_output = op_graph.evaluate(first_input_data) # We evaluate the min and max func to be able to resolve the tensors min and max rather than @@ -59,9 +59,9 @@ def eval_op_graph_bounds_on_dataset( for node, value in first_output.items() } - for input_data in dataset: + for input_data in inputset: current_input_data = dict(enumerate(input_data)) - check_dataset_input_len_is_valid(current_input_data.values()) + check_inputset_input_len_is_valid(current_input_data.values()) current_output = op_graph.evaluate(current_input_data) for node, value in current_output.items(): node_bounds[node]["min"] = min_func(node_bounds[node]["min"], value) diff --git a/concrete/common/extensions/table.py b/concrete/common/extensions/table.py index 5326a8f6c..8fc3eac87 100644 --- a/concrete/common/extensions/table.py +++ b/concrete/common/extensions/table.py @@ -58,7 +58,7 @@ class LookupTable: if x < 0 or x >= len(table): raise ValueError( f"Lookup table with {len(table)} entries cannot be indexed with {x} " - f"(you should check your dataset)", + f"(you should check your inputset)", ) return table[x] diff --git a/concrete/numpy/compile.py b/concrete/numpy/compile.py index 333e73d10..fd9cd6359 100644 --- a/concrete/numpy/compile.py +++ b/concrete/numpy/compile.py @@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple import numpy from zamalang import CompilerEngine -from ..common.bounds_measurement.dataset_eval import eval_op_graph_bounds_on_dataset +from ..common.bounds_measurement.inputset_eval import eval_op_graph_bounds_on_inputset from ..common.common_helpers import check_op_graph_is_integer_program from ..common.compilation import CompilationArtifacts, CompilationConfiguration from ..common.mlir import V0_OPSET_CONVERSION_FUNCTIONS, MLIRConverter @@ -54,7 +54,7 @@ def numpy_min_func(lhs: Any, rhs: Any) -> Any: def _compile_numpy_function_into_op_graph_internal( function_to_compile: Callable, function_parameters: Dict[str, BaseValue], - dataset: Iterator[Tuple[Any, ...]], + inputset: Iterator[Tuple[Any, ...]], compilation_configuration: CompilationConfiguration, compilation_artifacts: CompilationArtifacts, ) -> OPGraph: @@ -64,7 +64,7 @@ def _compile_numpy_function_into_op_graph_internal( function_to_compile (Callable): The function to compile function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the function is e.g. an EncryptedScalar holding a 7bits unsigned Integer - dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It + inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It needs to be an iterator on tuples which are of the same length than the number of parameters in the function, and in the same order than these same parameters compilation_artifacts (CompilationArtifacts): Artifacts object to fill @@ -105,10 +105,10 @@ def _compile_numpy_function_into_op_graph_internal( f"{', '.join(str(node) for node in offending_non_integer_nodes)}" ) - # Find bounds with the dataset - node_bounds = eval_op_graph_bounds_on_dataset( + # Find bounds with the inputset + node_bounds = eval_op_graph_bounds_on_inputset( op_graph, - dataset, + inputset, min_func=numpy_min_func, max_func=numpy_max_func, ) @@ -139,7 +139,7 @@ def _compile_numpy_function_into_op_graph_internal( def compile_numpy_function_into_op_graph( function_to_compile: Callable, function_parameters: Dict[str, BaseValue], - dataset: Iterator[Tuple[Any, ...]], + inputset: Iterator[Tuple[Any, ...]], compilation_configuration: Optional[CompilationConfiguration] = None, compilation_artifacts: Optional[CompilationArtifacts] = None, ) -> OPGraph: @@ -149,7 +149,7 @@ def compile_numpy_function_into_op_graph( function_to_compile (Callable): The function to compile function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the function is e.g. an EncryptedScalar holding a 7bits unsigned Integer - dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It + inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It needs to be an iterator on tuples which are of the same length than the number of parameters in the function, and in the same order than these same parameters compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use @@ -177,7 +177,7 @@ def compile_numpy_function_into_op_graph( return _compile_numpy_function_into_op_graph_internal( function_to_compile, function_parameters, - dataset, + inputset, compilation_configuration, compilation_artifacts, ) @@ -201,7 +201,7 @@ def compile_numpy_function_into_op_graph( def _compile_numpy_function_internal( function_to_compile: Callable, function_parameters: Dict[str, BaseValue], - dataset: Iterator[Tuple[Any, ...]], + inputset: Iterator[Tuple[Any, ...]], compilation_configuration: CompilationConfiguration, compilation_artifacts: CompilationArtifacts, show_mlir: bool, @@ -212,7 +212,7 @@ def _compile_numpy_function_internal( function_to_compile (Callable): The function you want to compile function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the function is e.g. an EncryptedScalar holding a 7bits unsigned Integer - dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It + inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It needs to be an iterator on tuples which are of the same length than the number of parameters in the function, and in the same order than these same parameters compilation_configuration (CompilationConfiguration): Configuration object to use @@ -230,7 +230,7 @@ def _compile_numpy_function_internal( op_graph = _compile_numpy_function_into_op_graph_internal( function_to_compile, function_parameters, - dataset, + inputset, compilation_configuration, compilation_artifacts, ) @@ -256,7 +256,7 @@ def _compile_numpy_function_internal( def compile_numpy_function( function_to_compile: Callable, function_parameters: Dict[str, BaseValue], - dataset: Iterator[Tuple[Any, ...]], + inputset: Iterator[Tuple[Any, ...]], compilation_configuration: Optional[CompilationConfiguration] = None, compilation_artifacts: Optional[CompilationArtifacts] = None, show_mlir: bool = False, @@ -267,7 +267,7 @@ def compile_numpy_function( function_to_compile (Callable): The function to compile function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the function is e.g. an EncryptedScalar holding a 7bits unsigned Integer - dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It + inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It needs to be an iterator on tuples which are of the same length than the number of parameters in the function, and in the same order than these same parameters compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use @@ -297,7 +297,7 @@ def compile_numpy_function( return _compile_numpy_function_internal( function_to_compile, function_parameters, - dataset, + inputset, compilation_configuration, compilation_artifacts, show_mlir, diff --git a/docs/dev/explanation/COMPILATION.md b/docs/dev/explanation/COMPILATION.md index b715e70d4..de333d6df 100644 --- a/docs/dev/explanation/COMPILATION.md +++ b/docs/dev/explanation/COMPILATION.md @@ -129,20 +129,20 @@ Let's take a closer look at the options we provide today. ### Dataset Evaluation -This is the simplest approach, but it requires a dataset to be provided by the user. +This is the simplest approach, but it requires an inputset to be provided by the user. -The dataset is not the dataset in the usual sense of ML as it doesn't require labels. +The inputset is not to be confused with the dataset which is classical in ML, as it doesn't require labels. Rather, it is a set of values which are typical inputs of the function. -The idea is to evaluate each input in the dataset and record the result of each operation in the operation graph. +The idea is to evaluate each input in the inputset and record the result of each operation in the operation graph. Then we compare the evaluation results with the current minimum/maximum values of each node and update the minimum/maximum accordingly. -After the entire dataset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained. +After the entire inputset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained. Here is an example, given this operation graph where `x` is encrypted: ![](../../_static/compilation-pipeline/two_x_plus_three.png) -and this dataset: +and this inputset: ``` [2, 3, 1] diff --git a/docs/user/howto/COMPILING_AND_EXECUTING.md b/docs/user/howto/COMPILING_AND_EXECUTING.md index edd2c39c8..834bb68e1 100644 --- a/docs/user/howto/COMPILING_AND_EXECUTING.md +++ b/docs/user/howto/COMPILING_AND_EXECUTING.md @@ -28,10 +28,10 @@ y = hnp.EncryptedScalar(hnp.UnsignedInteger(3)) In this configuration, both `x` and `y` are 3-bit unsigned integers, so they have the range of `[0, 2**3 - 1]` -We also need a dataset. However, it's not the dataset used in traning as it doesn't contain any labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile. +We also need an inputset. This latter is not to be confused with the dataset, which is used in training and contains labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile. ```python -dataset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)] +inputset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)] ``` Finally, we can compile our function to its homomorphic equivalent. @@ -39,7 +39,7 @@ Finally, we can compile our function to its homomorphic equivalent. ```python engine = hnp.compile_numpy_function( f, {"x": x, "y": y}, - dataset=iter(dataset), + inputset=iter(inputset), ) ``` @@ -59,7 +59,7 @@ You can use `.run(...)` method of `engine` returned by `hnp.compile_numpy_functi ``` Be careful about the inputs, though. -If you were to run with values outside the range of the dataset, the result might not be correct. +If you were to run with values outside the range of the inputset, the result might not be correct. ## Further reading diff --git a/examples/QuantizedLinearRegression.ipynb b/examples/QuantizedLinearRegression.ipynb index c6ef9331a..c38c6bac5 100644 --- a/examples/QuantizedLinearRegression.ipynb +++ b/examples/QuantizedLinearRegression.ipynb @@ -54,7 +54,7 @@ "id": "27f67e43", "metadata": {}, "source": [ - "### We need a dataset, a handcrafted one for simplicity" + "### We need an inputset, a handcrafted one for simplicity" ] }, { @@ -73,7 +73,7 @@ "id": "fba2eecb", "metadata": {}, "source": [ - "### Let's visualize our dataset to get a grasp of it" + "### Let's visualize our inputset to get a grasp of it" ] }, { @@ -640,14 +640,14 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = []\n", + "inputset = []\n", "for x_i in x_q:\n", - " dataset.append((int(x_i[0]),))\n", + " inputset.append((int(x_i[0]),))\n", "\n", "homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n", " infer,\n", " {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n", - " iter(dataset),\n", + " iter(inputset),\n", ")" ] }, @@ -723,7 +723,7 @@ "engine = hnp.compile_numpy_function(\n", " infer,\n", " {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n", - " iter(dataset),\n", + " iter(inputset),\n", ")" ] }, diff --git a/examples/QuantizedLogisticRegression.ipynb b/examples/QuantizedLogisticRegression.ipynb index f4ddcad4a..230a195a1 100644 --- a/examples/QuantizedLogisticRegression.ipynb +++ b/examples/QuantizedLogisticRegression.ipynb @@ -55,7 +55,7 @@ "id": "c7a0cc5f", "metadata": {}, "source": [ - "### We need a dataset, a handcrafted one for simplicity" + "### We need an inputset, a handcrafted one for simplicity" ] }, { @@ -74,7 +74,7 @@ "id": "2d522cb0", "metadata": {}, "source": [ - "### Let's visualize our dataset to get a grasp of it" + "### Let's visualize our inputset to get a grasp of it" ] }, { @@ -744,9 +744,9 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = []\n", + "inputset = []\n", "for x_i in x_q:\n", - " dataset.append((int(x_i[0]), int(x_i[1])))\n", + " inputset.append((int(x_i[0]), int(x_i[1])))\n", " \n", "homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n", " infer,\n", @@ -754,7 +754,7 @@ " \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n", " \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n", " },\n", - " iter(dataset),\n", + " iter(inputset),\n", ")" ] }, @@ -839,7 +839,7 @@ " \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n", " \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n", " },\n", - " iter(dataset),\n", + " iter(inputset),\n", ")" ] }, diff --git a/tests/common/bounds_measurement/test_dataset_eval.py b/tests/common/bounds_measurement/test_inputset_eval.py similarity index 94% rename from tests/common/bounds_measurement/test_dataset_eval.py rename to tests/common/bounds_measurement/test_inputset_eval.py index e22687d95..8fa937c22 100644 --- a/tests/common/bounds_measurement/test_dataset_eval.py +++ b/tests/common/bounds_measurement/test_inputset_eval.py @@ -1,11 +1,11 @@ -"""Test file for bounds evaluation with a dataset""" +"""Test file for bounds evaluation with a inputset""" from typing import Tuple import pytest -from concrete.common.bounds_measurement.dataset_eval import ( - eval_op_graph_bounds_on_dataset, +from concrete.common.bounds_measurement.inputset_eval import ( + eval_op_graph_bounds_on_inputset, ) from concrete.common.data_types.floats import Float from concrete.common.data_types.integers import Integer @@ -207,15 +207,15 @@ from concrete.numpy.tracing import trace_numpy_function ), ], ) -def test_eval_op_graph_bounds_on_dataset( +def test_eval_op_graph_bounds_on_inputset( function, input_ranges, expected_output_bounds, expected_output_data_type: Integer, ): - """Test function for eval_op_graph_bounds_on_dataset""" + """Test function for eval_op_graph_bounds_on_inputset""" - test_eval_op_graph_bounds_on_dataset_multiple_output( + test_eval_op_graph_bounds_on_inputset_multiple_output( function, input_ranges, (expected_output_bounds,), @@ -264,13 +264,13 @@ def test_eval_op_graph_bounds_on_dataset( ), ], ) -def test_eval_op_graph_bounds_on_dataset_multiple_output( +def test_eval_op_graph_bounds_on_inputset_multiple_output( function, input_ranges, expected_output_bounds, expected_output_data_type: Tuple[Integer], ): - """Test function for eval_op_graph_bounds_on_dataset""" + """Test function for eval_op_graph_bounds_on_inputset""" op_graph = trace_numpy_function( function, {"x": EncryptedScalar(Integer(64, True)), "y": EncryptedScalar(Integer(64, True))} @@ -281,7 +281,7 @@ def test_eval_op_graph_bounds_on_dataset_multiple_output( for y_gen in range_y: yield (x_gen, y_gen) - node_bounds = eval_op_graph_bounds_on_dataset( + node_bounds = eval_op_graph_bounds_on_inputset( op_graph, data_gen(*tuple(range(x[0], x[1] + 1) for x in input_ranges)) ) diff --git a/tests/common/mlir/test_mlir_converter.py b/tests/common/mlir/test_mlir_converter.py index cca5d84db..f1378a0b8 100644 --- a/tests/common/mlir/test_mlir_converter.py +++ b/tests/common/mlir/test_mlir_converter.py @@ -204,8 +204,8 @@ def datagen(*args): ) def test_mlir_converter(func, args_dict, args_ranges): """Test the conversion to MLIR by calling the parser from the compiler""" - dataset = datagen(*args_ranges) - result_graph = compile_numpy_function_into_op_graph(func, args_dict, dataset) + inputset = datagen(*args_ranges) + result_graph = compile_numpy_function_into_op_graph(func, args_dict, inputset) converter = MLIRConverter(V0_OPSET_CONVERSION_FUNCTIONS) mlir_result = converter.convert(result_graph) # testing that this doesn't raise an error diff --git a/tests/numpy/test_debugging.py b/tests/numpy/test_debugging.py index 3f32797ea..a7342c485 100644 --- a/tests/numpy/test_debugging.py +++ b/tests/numpy/test_debugging.py @@ -211,7 +211,7 @@ def test_print_and_draw_graph_with_dot(lambda_f, params, ref_graph_str): # Remark that the bitwidths are not particularly correct (eg, a MUL of a 17b times 23b # returning 23b), since they are replaced later by the real bitwidths computed on the -# dataset +# inputset @pytest.mark.parametrize( "lambda_f,x_y,ref_graph_str", [