feat(compilation): provide a way to automatically generate a random inputset

This commit is contained in:
Umut
2021-10-22 16:17:15 +03:00
parent 9459675cfb
commit 70fbac7188
5 changed files with 341 additions and 10 deletions

View File

@@ -8,6 +8,8 @@ class CompilationConfiguration:
enable_topological_optimizations: bool
check_every_input_in_inputset: bool
treat_warnings_as_errors: bool
enable_unsafe_features: bool
random_inputset_samples: int
def __init__(
self,
@@ -15,8 +17,12 @@ class CompilationConfiguration:
enable_topological_optimizations: bool = True,
check_every_input_in_inputset: bool = False,
treat_warnings_as_errors: bool = False,
enable_unsafe_features: bool = False,
random_inputset_samples: int = 30,
):
self.dump_artifacts_on_unexpected_failures = dump_artifacts_on_unexpected_failures
self.enable_topological_optimizations = enable_topological_optimizations
self.check_every_input_in_inputset = check_every_input_in_inputset
self.treat_warnings_as_errors = treat_warnings_as_errors
self.enable_unsafe_features = enable_unsafe_features
self.random_inputset_samples = random_inputset_samples

View File

@@ -2,7 +2,7 @@
import sys
import traceback
from typing import Any, Callable, Dict, Iterable, Optional, Tuple
from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union
import numpy
from zamalang import CompilerEngine
@@ -28,6 +28,7 @@ from .np_dtypes_helpers import (
get_base_value_for_numpy_or_python_constant_data,
get_constructor_for_numpy_or_python_constant_data,
)
from .np_inputset_helpers import _check_special_inputset_availability, _generate_random_inputset
from .np_mlir_converter import NPMLIRConverter
@@ -158,7 +159,7 @@ def _compile_numpy_function_into_op_graph_internal(
def compile_numpy_function_into_op_graph(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
inputset: Iterable[Tuple[Any, ...]],
inputset: Union[Iterable[Tuple[Any, ...]], str],
compilation_configuration: Optional[CompilationConfiguration] = None,
compilation_artifacts: Optional[CompilationArtifacts] = None,
) -> OPGraph:
@@ -168,9 +169,11 @@ def compile_numpy_function_into_op_graph(
function_to_compile (Callable): The function to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
inputset (Iterable[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterable on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
inputset (Union[Iterable[Tuple[Any, ...]], str]): The inputset over which op_graph
is evaluated. It needs to be an iterable on tuples which are of the same length than
the number of parameters in the function, and in the same order than these same
parameters. Alternatively, it can be "random" but that's an unstable feature and should
not be used in production.
compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
during compilation
compilation_artifacts (Optional[CompilationArtifacts]): Artifacts object to fill
@@ -191,6 +194,11 @@ def compile_numpy_function_into_op_graph(
if compilation_artifacts is None:
compilation_artifacts = CompilationArtifacts()
# Generate random inputset if it is requested and available
if isinstance(inputset, str):
_check_special_inputset_availability(inputset, compilation_configuration)
inputset = _generate_random_inputset(function_parameters, compilation_configuration)
# Try to compile the function and save partial artifacts on failure
try:
# Use context manager to restore numpy error handling
@@ -306,7 +314,7 @@ def _compile_numpy_function_internal(
def compile_numpy_function(
function_to_compile: Callable,
function_parameters: Dict[str, BaseValue],
inputset: Iterable[Tuple[Any, ...]],
inputset: Union[Iterable[Tuple[Any, ...]], str],
compilation_configuration: Optional[CompilationConfiguration] = None,
compilation_artifacts: Optional[CompilationArtifacts] = None,
show_mlir: bool = False,
@@ -317,9 +325,11 @@ def compile_numpy_function(
function_to_compile (Callable): The function to compile
function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
inputset (Iterable[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
needs to be an iterable on tuples which are of the same length than the number of
parameters in the function, and in the same order than these same parameters
inputset (Union[Iterable[Tuple[Any, ...]], str]): The inputset over which op_graph
is evaluated. It needs to be an iterable on tuples which are of the same length than
the number of parameters in the function, and in the same order than these same
parameters. Alternatively, it can be "random" but that's an unstable feature and should
not be used in production.
compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
during compilation
compilation_artifacts (Optional[CompilationArtifacts]): Artifacts object to fill
@@ -342,6 +352,11 @@ def compile_numpy_function(
if compilation_artifacts is None:
compilation_artifacts = CompilationArtifacts()
# Generate random inputset if it is requested and available
if isinstance(inputset, str):
_check_special_inputset_availability(inputset, compilation_configuration)
inputset = _generate_random_inputset(function_parameters, compilation_configuration)
# Try to compile the function and save partial artifacts on failure
try:
# Use context manager to restore numpy error handling

View File

@@ -0,0 +1,157 @@
"""Helpers for numpy inputset related functionality."""
import random
from typing import Any, Dict, Iterable, Tuple
import numpy
from ..common.compilation import CompilationConfiguration
from ..common.data_types import Float, Integer
from ..common.values import BaseValue, TensorValue
def _generate_random_integer_scalar(dtype: Integer) -> int:
"""Generate a random integer scalar.
Args:
dtype (Integer): the data type to extract bounds
Returns:
int: a random value within the range [dtype.min_value(), dtype.max_value()]
"""
return random.randint(dtype.min_value(), dtype.max_value())
def _generate_random_integer_tensor(dtype: Integer, shape: Tuple[int, ...]) -> numpy.ndarray:
"""Generate a random integer tensor.
Args:
dtype (Integer): the data type to extract bounds
shape (Tuple[int, ...]): the shape of the generated tensor
Returns:
numpy.ndarray: a random array of the specified shape where each value of it
is within the range [dtype.min_value(), dtype.max_value()]
"""
return numpy.random.randint(
dtype.min_value(),
dtype.max_value() + 1,
size=shape,
dtype=numpy.int64 if dtype.is_signed else numpy.uint64, # type: ignore
)
def _generate_random_float_scalar() -> float:
"""Generate a random float scalar.
Returns:
float: a random value within the range [0, 1)
"""
return random.random()
def _generate_random_float_tensor(dtype: Float, shape: Tuple[int, ...]) -> numpy.ndarray:
"""Generate a random float tensor.
Args:
dtype (Integer): the data type to extract resulting numpy data type
shape (Tuple[int, ...]): the shape of the generated tensor
Returns:
numpy.ndarray: a random array of the specified shape where each value of it
is within the range [0, 1)
"""
result = numpy.random.rand(*shape)
return result.astype(numpy.float32 if dtype.bit_width == 32 else numpy.float64)
def _generate_random_inputset(
function_parameters: Dict[str, BaseValue],
compilation_configuration: CompilationConfiguration,
) -> Iterable[Tuple[Any, ...]]:
"""Generate a random inputset from function parameters.
Using this function is not a good practice since the randomly generated inputset
might not reflect real world data. We have it to speed up our development workflow
and we also don't use it in any of our tests, benchmarks, or examples.
Args:
function_parameters (Dict[str, BaseValue]): the function parameters
to extract data types and shapes
compilation_configuration (CompilationConfiguration): the compilation configuration
to extract the sample size of the resulting inputset
Raises:
ValueError: if the provided function arguments cannot be used for random inputset generation
Returns:
None
"""
inputset = []
for _ in range(compilation_configuration.random_inputset_samples):
sample = []
for parameter in function_parameters.values():
if not isinstance(parameter, TensorValue):
raise ValueError(f"Random inputset cannot be generated for {parameter} parameters")
if isinstance(parameter.dtype, Integer):
sample.append(
_generate_random_integer_scalar(parameter.dtype)
if parameter.is_scalar
else _generate_random_integer_tensor(parameter.dtype, parameter.shape)
)
elif isinstance(parameter.dtype, Float):
sample.append(
_generate_random_float_scalar()
if parameter.is_scalar
else _generate_random_float_tensor(parameter.dtype, parameter.shape)
)
else:
raise ValueError(
f"Random inputset cannot be generated "
f"for parameters of type {parameter.dtype}"
)
inputset.append(tuple(sample))
return inputset
def _check_special_inputset_availability(
inputset: str,
compilation_configuration: CompilationConfiguration,
):
"""Check special inputset is valid and is available.
This function makes sure the provided special inputset is valid and can be used with the
provided compilation configuration.
Currently, the only special inputset is "random" but this can be extended in the future.
Args:
inputset (str): the special inputset to check
compilation_configuration (CompilationConfiguration): the compilation configuration
to check the availability of the provided special inputset
Raises:
ValueError: if the provided special inputset is not valid
RuntimeError: if the provided special inputset is not available
Returns:
None
"""
if inputset != "random":
raise ValueError(
f"inputset can only be an iterable of tuples or the string 'random' "
f"but you specified '{inputset}' for it"
)
if not compilation_configuration.enable_unsafe_features:
raise RuntimeError(
"Random inputset generation is an unsafe feature and should not be used "
"if you don't know what you are doing"
)

View File

@@ -1,12 +1,13 @@
"""Test file for numpy compilation functions"""
import itertools
import random
from copy import deepcopy
import numpy
import pytest
from concrete.common.compilation import CompilationConfiguration
from concrete.common.data_types.integers import Integer
from concrete.common.data_types.integers import Integer, UnsignedInteger
from concrete.common.debugging import draw_graph, get_printable_graph
from concrete.common.extensions.table import LookupTable
from concrete.common.values import ClearTensor, EncryptedScalar, EncryptedTensor
@@ -1131,3 +1132,59 @@ def test_failure_for_signed_output(default_compilation_configuration):
"return(%2)\n"
)
# pylint: enable=line-too-long
def test_compile_with_random_inputset(default_compilation_configuration):
"""Test function for compile with random input set"""
configuration_to_use = deepcopy(default_compilation_configuration)
configuration_to_use.enable_unsafe_features = True
compile_numpy_function_into_op_graph(
lambda x: x + 1,
{"x": EncryptedScalar(UnsignedInteger(6))},
inputset="random",
compilation_configuration=configuration_to_use,
)
compile_numpy_function(
lambda x: x + 32,
{"x": EncryptedScalar(UnsignedInteger(6))},
inputset="random",
compilation_configuration=configuration_to_use,
)
def test_fail_compile_with_random_inputset(default_compilation_configuration):
"""Test function for failed compile with random input set"""
with pytest.raises(ValueError):
try:
compile_numpy_function_into_op_graph(
lambda x: x + 1,
{"x": EncryptedScalar(UnsignedInteger(3))},
inputset="unsupported",
compilation_configuration=default_compilation_configuration,
)
except Exception as error:
expected = (
"inputset can only be an iterable of tuples or the string 'random' "
"but you specified 'unsupported' for it"
)
assert str(error) == expected
raise
with pytest.raises(RuntimeError):
try:
compile_numpy_function(
lambda x: x + 1,
{"x": EncryptedScalar(UnsignedInteger(3))},
inputset="random",
compilation_configuration=default_compilation_configuration,
)
except Exception as error:
expected = (
"Random inputset generation is an unsafe feature "
"and should not be used if you don't know what you are doing"
)
assert str(error) == expected
raise

View File

@@ -0,0 +1,96 @@
"""Test file for numpy inputset helpers"""
import numpy as np
import pytest
from concrete.common.compilation import CompilationConfiguration
from concrete.common.data_types import Float, UnsignedInteger
from concrete.common.data_types.base import BaseDataType
from concrete.common.values import BaseValue, EncryptedScalar, EncryptedTensor
from concrete.numpy.np_inputset_helpers import _generate_random_inputset
def test_generate_random_inputset():
"""Test function for generate_random_inputset"""
inputset = _generate_random_inputset(
{
"x1": EncryptedScalar(UnsignedInteger(4)),
"x2": EncryptedTensor(UnsignedInteger(4), shape=(2, 3)),
"x3": EncryptedScalar(Float(64)),
"x4": EncryptedTensor(Float(64), shape=(3, 2)),
},
CompilationConfiguration(random_inputset_samples=15),
)
assert isinstance(inputset, list)
assert len(inputset) == 15
for sample in inputset:
assert isinstance(sample, tuple)
assert len(sample) == 4
assert isinstance(sample[0], int)
assert 0 <= sample[0] < 2 ** 4
assert isinstance(sample[1], np.ndarray)
assert sample[1].dtype == np.uint64
assert sample[1].shape == (2, 3)
assert (sample[1] >= 0).all()
assert (sample[1] < 2 ** 4).all()
assert isinstance(sample[2], float)
assert 0 <= sample[2] < 1
assert isinstance(sample[3], np.ndarray)
assert sample[3].dtype == np.float64
assert sample[3].shape == (3, 2)
assert (sample[3] >= 0).all()
assert (sample[3] < 1).all()
def test_fail_generate_random_inputset():
"""Test function for failed generate_random_inputset"""
class MockDtype(BaseDataType):
"""Unsupported dtype to check error messages"""
def __eq__(self, o: object) -> bool:
return False
def __str__(self):
return "MockDtype"
class MockValue(BaseValue):
"""Unsupported value to check error messages"""
def __init__(self):
super().__init__(MockDtype(), is_encrypted=True)
def __eq__(self, other: object) -> bool:
return False
def __str__(self):
return "MockValue"
with pytest.raises(ValueError):
try:
_generate_random_inputset(
{"x": MockValue()},
CompilationConfiguration(random_inputset_samples=15),
)
except Exception as error:
expected = "Random inputset cannot be generated for MockValue parameters"
assert str(error) == expected
raise
with pytest.raises(ValueError):
try:
_generate_random_inputset(
{"x": EncryptedScalar(MockDtype())},
CompilationConfiguration(random_inputset_samples=15),
)
except Exception as error:
expected = "Random inputset cannot be generated for parameters of type MockDtype"
assert str(error) == expected
raise