refacto: rename 'dataset' into a clear 'inputset'

closes #340
This commit is contained in:
Benoit Chevallier-Mames
2021-09-14 17:01:30 +02:00
committed by Benoit Chevallier
parent efaf72880c
commit 8522e58280
13 changed files with 68 additions and 68 deletions

View File

@@ -137,15 +137,15 @@ def main():
def function_to_compile(x_0):
return table[(x_0 + zp_x) * w_0]
dataset = []
inputset = []
for x_i in x_q:
dataset.append((int(x_i[0]),))
inputset.append((int(x_i[0]),))
# Measure: Compilation Time (ms)
engine = hnp.compile_numpy_function(
function_to_compile,
{"x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits))},
iter(dataset),
iter(inputset),
)
# Measure: End

View File

@@ -203,9 +203,9 @@ def main():
def function_to_compile(x_0, x_1):
return table[((x_0 + zp_x) * w_0) + ((x_1 + zp_x) * w_1)]
dataset = []
inputset = []
for x_i in x_q:
dataset.append((int(x_i[0]), int(x_i[1])))
inputset.append((int(x_i[0]), int(x_i[1])))
# Measure: Compilation Time (ms)
engine = hnp.compile_numpy_function(
@@ -214,7 +214,7 @@ def main():
"x_0": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)),
"x_1": hnp.EncryptedScalar(hnp.UnsignedInteger(input_bits)),
},
iter(dataset),
iter(inputset),
)
# Measure: End

View File

@@ -1,2 +1,2 @@
"""Bounds measurement module."""
from . import dataset_eval
from . import inputset_eval

View File

@@ -1,4 +1,4 @@
"""Code to evaluate the IR graph on datasets."""
"""Code to evaluate the IR graph on inputsets."""
from typing import Any, Callable, Dict, Iterator, Tuple
@@ -7,20 +7,20 @@ from ..operator_graph import OPGraph
from ..representation.intermediate import IntermediateNode
def eval_op_graph_bounds_on_dataset(
def eval_op_graph_bounds_on_inputset(
op_graph: OPGraph,
dataset: Iterator[Tuple[Any, ...]],
inputset: Iterator[Tuple[Any, ...]],
min_func: Callable[[Any, Any], Any] = min,
max_func: Callable[[Any, Any], Any] = max,
) -> Dict[IntermediateNode, Dict[str, Any]]:
"""Evaluate the bounds with a dataset.
"""Evaluate the bounds with a inputset.
Evaluate the bounds for all output values of the operators in the graph op_graph over data
coming from the dataset
coming from the inputset
Args:
op_graph (OPGraph): The graph for which we want to determine the bounds
dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterator on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
min_func (Callable[[Any, Any], Any], optional): custom function to compute a scalar minimum
@@ -35,11 +35,11 @@ def eval_op_graph_bounds_on_dataset(
op_graph, stored with the node as key and a dict with keys "min" and "max" as value.
"""
def check_dataset_input_len_is_valid(data_to_check):
def check_inputset_input_len_is_valid(data_to_check):
custom_assert(
len(data_to_check) == len(op_graph.input_nodes),
(
f"Got input data from dataset of len: {len(data_to_check)}, "
f"Got input data from inputset of len: {len(data_to_check)}, "
f"function being evaluated has {len(op_graph.input_nodes)} inputs, please make "
f"sure your data generator returns valid tuples of input values"
),
@@ -48,8 +48,8 @@ def eval_op_graph_bounds_on_dataset(
# TODO: do we want to check coherence between the input data type and the corresponding Input ir
# node expected data type ? Not considering bit_width as they may not make sense at this stage
first_input_data = dict(enumerate(next(dataset)))
check_dataset_input_len_is_valid(first_input_data.values())
first_input_data = dict(enumerate(next(inputset)))
check_inputset_input_len_is_valid(first_input_data.values())
first_output = op_graph.evaluate(first_input_data)
# We evaluate the min and max func to be able to resolve the tensors min and max rather than
@@ -59,9 +59,9 @@ def eval_op_graph_bounds_on_dataset(
for node, value in first_output.items()
}
for input_data in dataset:
for input_data in inputset:
current_input_data = dict(enumerate(input_data))
check_dataset_input_len_is_valid(current_input_data.values())
check_inputset_input_len_is_valid(current_input_data.values())
current_output = op_graph.evaluate(current_input_data)
for node, value in current_output.items():
node_bounds[node]["min"] = min_func(node_bounds[node]["min"], value)

View File

@@ -58,7 +58,7 @@ class LookupTable:
if x < 0 or x >= len(table):
raise ValueError(
f"Lookup table with {len(table)} entries cannot be indexed with {x} "
f"(you should check your dataset)",
f"(you should check your inputset)",
)
return table[x]

View File

@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
import numpy
from zamalang import CompilerEngine
from ..common.bounds_measurement.dataset_eval import eval_op_graph_bounds_on_dataset
from ..common.bounds_measurement.inputset_eval import eval_op_graph_bounds_on_inputset
from ..common.common_helpers import check_op_graph_is_integer_program
from ..common.compilation import CompilationArtifacts, CompilationConfiguration
from ..common.mlir import V0_OPSET_CONVERSION_FUNCTIONS, MLIRConverter
@@ -54,7 +54,7 @@ def numpy_min_func(lhs: Any, rhs: Any) -> Any:
def _compile_numpy_function_into_op_graph_internal(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
dataset: Iterator[Tuple[Any, ...]],
inputset: Iterator[Tuple[Any, ...]],
compilation_configuration: CompilationConfiguration,
compilation_artifacts: CompilationArtifacts,
) -> OPGraph:
@@ -64,7 +64,7 @@ def _compile_numpy_function_into_op_graph_internal(
function_to_compile (Callable): The function to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterator on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
compilation_artifacts (CompilationArtifacts): Artifacts object to fill
@@ -105,10 +105,10 @@ def _compile_numpy_function_into_op_graph_internal(
f"{', '.join(str(node) for node in offending_non_integer_nodes)}"
)
# Find bounds with the dataset
node_bounds = eval_op_graph_bounds_on_dataset(
# Find bounds with the inputset
node_bounds = eval_op_graph_bounds_on_inputset(
op_graph,
dataset,
inputset,
min_func=numpy_min_func,
max_func=numpy_max_func,
)
@@ -139,7 +139,7 @@ def _compile_numpy_function_into_op_graph_internal(
def compile_numpy_function_into_op_graph(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
dataset: Iterator[Tuple[Any, ...]],
inputset: Iterator[Tuple[Any, ...]],
compilation_configuration: Optional[CompilationConfiguration] = None,
compilation_artifacts: Optional[CompilationArtifacts] = None,
) -> OPGraph:
@@ -149,7 +149,7 @@ def compile_numpy_function_into_op_graph(
function_to_compile (Callable): The function to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterator on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
@@ -177,7 +177,7 @@ def compile_numpy_function_into_op_graph(
return _compile_numpy_function_into_op_graph_internal(
function_to_compile,
function_parameters,
dataset,
inputset,
compilation_configuration,
compilation_artifacts,
)
@@ -201,7 +201,7 @@ def compile_numpy_function_into_op_graph(
def _compile_numpy_function_internal(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
dataset: Iterator[Tuple[Any, ...]],
inputset: Iterator[Tuple[Any, ...]],
compilation_configuration: CompilationConfiguration,
compilation_artifacts: CompilationArtifacts,
show_mlir: bool,
@@ -212,7 +212,7 @@ def _compile_numpy_function_internal(
function_to_compile (Callable): The function you want to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterator on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
compilation_configuration (CompilationConfiguration): Configuration object to use
@@ -230,7 +230,7 @@ def _compile_numpy_function_internal(
op_graph = _compile_numpy_function_into_op_graph_internal(
function_to_compile,
function_parameters,
dataset,
inputset,
compilation_configuration,
compilation_artifacts,
)
@@ -256,7 +256,7 @@ def _compile_numpy_function_internal(
def compile_numpy_function(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
dataset: Iterator[Tuple[Any, ...]],
inputset: Iterator[Tuple[Any, ...]],
compilation_configuration: Optional[CompilationConfiguration] = None,
compilation_artifacts: Optional[CompilationArtifacts] = None,
show_mlir: bool = False,
@@ -267,7 +267,7 @@ def compile_numpy_function(
function_to_compile (Callable): The function to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
dataset (Iterator[Tuple[Any, ...]]): The dataset over which op_graph is evaluated. It
inputset (Iterator[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterator on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
@@ -297,7 +297,7 @@ def compile_numpy_function(
return _compile_numpy_function_internal(
function_to_compile,
function_parameters,
dataset,
inputset,
compilation_configuration,
compilation_artifacts,
show_mlir,

View File

@@ -129,20 +129,20 @@ Let's take a closer look at the options we provide today.
### Dataset Evaluation
This is the simplest approach, but it requires a dataset to be provided by the user.
This is the simplest approach, but it requires an inputset to be provided by the user.
The dataset is not the dataset in the usual sense of ML as it doesn't require labels.
The inputset is not to be confused with the dataset which is classical in ML, as it doesn't require labels.
Rather, it is a set of values which are typical inputs of the function.
The idea is to evaluate each input in the dataset and record the result of each operation in the operation graph.
The idea is to evaluate each input in the inputset and record the result of each operation in the operation graph.
Then we compare the evaluation results with the current minimum/maximum values of each node and update the minimum/maximum accordingly.
After the entire dataset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained.
After the entire inputset is evaluated, we assign a data type to each node using the minimum and the maximum value it contained.
Here is an example, given this operation graph where `x` is encrypted:
![](../../_static/compilation-pipeline/two_x_plus_three.png)
and this dataset:
and this inputset:
```
[2, 3, 1]

View File

@@ -28,10 +28,10 @@ y = hnp.EncryptedScalar(hnp.UnsignedInteger(3))
In this configuration, both `x` and `y` are 3-bit unsigned integers, so they have the range of `[0, 2**3 - 1]`
We also need a dataset. However, it's not the dataset used in traning as it doesn't contain any labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile.
We also need an inputset. This latter is not to be confused with the dataset, which is used in training and contains labels. It is to determine the bit-widths of the intermediate results so only the inputs are necessary. It should be an iterable yielding tuples in the same order as the inputs of the function to compile.
```python
dataset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)]
inputset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)]
```
Finally, we can compile our function to its homomorphic equivalent.
@@ -39,7 +39,7 @@ Finally, we can compile our function to its homomorphic equivalent.
```python
engine = hnp.compile_numpy_function(
f, {"x": x, "y": y},
dataset=iter(dataset),
inputset=iter(inputset),
)
```
@@ -59,7 +59,7 @@ You can use `.run(...)` method of `engine` returned by `hnp.compile_numpy_functi
```
Be careful about the inputs, though.
If you were to run with values outside the range of the dataset, the result might not be correct.
If you were to run with values outside the range of the inputset, the result might not be correct.
## Further reading

View File

@@ -54,7 +54,7 @@
"id": "27f67e43",
"metadata": {},
"source": [
"### We need a dataset, a handcrafted one for simplicity"
"### We need an inputset, a handcrafted one for simplicity"
]
},
{
@@ -73,7 +73,7 @@
"id": "fba2eecb",
"metadata": {},
"source": [
"### Let's visualize our dataset to get a grasp of it"
"### Let's visualize our inputset to get a grasp of it"
]
},
{
@@ -640,14 +640,14 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = []\n",
"inputset = []\n",
"for x_i in x_q:\n",
" dataset.append((int(x_i[0]),))\n",
" inputset.append((int(x_i[0]),))\n",
"\n",
"homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n",
" infer,\n",
" {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n",
" iter(dataset),\n",
" iter(inputset),\n",
")"
]
},
@@ -723,7 +723,7 @@
"engine = hnp.compile_numpy_function(\n",
" infer,\n",
" {\"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False))},\n",
" iter(dataset),\n",
" iter(inputset),\n",
")"
]
},

View File

@@ -55,7 +55,7 @@
"id": "c7a0cc5f",
"metadata": {},
"source": [
"### We need a dataset, a handcrafted one for simplicity"
"### We need an inputset, a handcrafted one for simplicity"
]
},
{
@@ -74,7 +74,7 @@
"id": "2d522cb0",
"metadata": {},
"source": [
"### Let's visualize our dataset to get a grasp of it"
"### Let's visualize our inputset to get a grasp of it"
]
},
{
@@ -744,9 +744,9 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = []\n",
"inputset = []\n",
"for x_i in x_q:\n",
" dataset.append((int(x_i[0]), int(x_i[1])))\n",
" inputset.append((int(x_i[0]), int(x_i[1])))\n",
" \n",
"homomorphic_model = hnp.compile_numpy_function_into_op_graph(\n",
" infer,\n",
@@ -754,7 +754,7 @@
" \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
" \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
" },\n",
" iter(dataset),\n",
" iter(inputset),\n",
")"
]
},
@@ -839,7 +839,7 @@
" \"x_0\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
" \"x_1\": hnp.EncryptedScalar(hnp.Integer(input_bits, is_signed=False)),\n",
" },\n",
" iter(dataset),\n",
" iter(inputset),\n",
")"
]
},

View File

@@ -1,11 +1,11 @@
"""Test file for bounds evaluation with a dataset"""
"""Test file for bounds evaluation with a inputset"""
from typing import Tuple
import pytest
from concrete.common.bounds_measurement.dataset_eval import (
eval_op_graph_bounds_on_dataset,
from concrete.common.bounds_measurement.inputset_eval import (
eval_op_graph_bounds_on_inputset,
)
from concrete.common.data_types.floats import Float
from concrete.common.data_types.integers import Integer
@@ -207,15 +207,15 @@ from concrete.numpy.tracing import trace_numpy_function
),
],
)
def test_eval_op_graph_bounds_on_dataset(
def test_eval_op_graph_bounds_on_inputset(
function,
input_ranges,
expected_output_bounds,
expected_output_data_type: Integer,
):
"""Test function for eval_op_graph_bounds_on_dataset"""
"""Test function for eval_op_graph_bounds_on_inputset"""
test_eval_op_graph_bounds_on_dataset_multiple_output(
test_eval_op_graph_bounds_on_inputset_multiple_output(
function,
input_ranges,
(expected_output_bounds,),
@@ -264,13 +264,13 @@ def test_eval_op_graph_bounds_on_dataset(
),
],
)
def test_eval_op_graph_bounds_on_dataset_multiple_output(
def test_eval_op_graph_bounds_on_inputset_multiple_output(
function,
input_ranges,
expected_output_bounds,
expected_output_data_type: Tuple[Integer],
):
"""Test function for eval_op_graph_bounds_on_dataset"""
"""Test function for eval_op_graph_bounds_on_inputset"""
op_graph = trace_numpy_function(
function, {"x": EncryptedScalar(Integer(64, True)), "y": EncryptedScalar(Integer(64, True))}
@@ -281,7 +281,7 @@ def test_eval_op_graph_bounds_on_dataset_multiple_output(
for y_gen in range_y:
yield (x_gen, y_gen)
node_bounds = eval_op_graph_bounds_on_dataset(
node_bounds = eval_op_graph_bounds_on_inputset(
op_graph, data_gen(*tuple(range(x[0], x[1] + 1) for x in input_ranges))
)

View File

@@ -204,8 +204,8 @@ def datagen(*args):
)
def test_mlir_converter(func, args_dict, args_ranges):
"""Test the conversion to MLIR by calling the parser from the compiler"""
dataset = datagen(*args_ranges)
result_graph = compile_numpy_function_into_op_graph(func, args_dict, dataset)
inputset = datagen(*args_ranges)
result_graph = compile_numpy_function_into_op_graph(func, args_dict, inputset)
converter = MLIRConverter(V0_OPSET_CONVERSION_FUNCTIONS)
mlir_result = converter.convert(result_graph)
# testing that this doesn't raise an error

View File

@@ -211,7 +211,7 @@ def test_print_and_draw_graph_with_dot(lambda_f, params, ref_graph_str):
# Remark that the bitwidths are not particularly correct (eg, a MUL of a 17b times 23b
# returning 23b), since they are replaced later by the real bitwidths computed on the
# dataset
# inputset
@pytest.mark.parametrize(
"lambda_f,x_y,ref_graph_str",
[