feat(compilation): provide a way to automatically generate a random inputset

2026-02-08 19:44:57 -05:00 · 2021-10-22 16:17:15 +03:00
parent 9459675cfb
commit 70fbac7188
5 changed files with 341 additions and 10 deletions
--- a/concrete/common/compilation/configuration.py
+++ b/concrete/common/compilation/configuration.py
@@ -8,6 +8,8 @@ class CompilationConfiguration:
    enable_topological_optimizations: bool
    check_every_input_in_inputset: bool
    treat_warnings_as_errors: bool
+    enable_unsafe_features: bool
+    random_inputset_samples: int

    def __init__(
        self,
@@ -15,8 +17,12 @@ class CompilationConfiguration:
        enable_topological_optimizations: bool = True,
        check_every_input_in_inputset: bool = False,
        treat_warnings_as_errors: bool = False,
+        enable_unsafe_features: bool = False,
+        random_inputset_samples: int = 30,
    ):
        self.dump_artifacts_on_unexpected_failures = dump_artifacts_on_unexpected_failures
        self.enable_topological_optimizations = enable_topological_optimizations
        self.check_every_input_in_inputset = check_every_input_in_inputset
        self.treat_warnings_as_errors = treat_warnings_as_errors
+        self.enable_unsafe_features = enable_unsafe_features
+        self.random_inputset_samples = random_inputset_samples
--- a/concrete/numpy/compile.py
+++ b/concrete/numpy/compile.py
@@ -2,7 +2,7 @@

 import sys
 import traceback
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union

 import numpy
 from zamalang import CompilerEngine
@@ -28,6 +28,7 @@ from .np_dtypes_helpers import (
    get_base_value_for_numpy_or_python_constant_data,
    get_constructor_for_numpy_or_python_constant_data,
 )
+from .np_inputset_helpers import _check_special_inputset_availability, _generate_random_inputset
 from .np_mlir_converter import NPMLIRConverter


@@ -158,7 +159,7 @@ def _compile_numpy_function_into_op_graph_internal(
 def compile_numpy_function_into_op_graph(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    inputset: Iterable[Tuple[Any, ...]],
+    inputset: Union[Iterable[Tuple[Any, ...]], str],
    compilation_configuration: Optional[CompilationConfiguration] = None,
    compilation_artifacts: Optional[CompilationArtifacts] = None,
 ) -> OPGraph:
@@ -168,9 +169,11 @@ def compile_numpy_function_into_op_graph(
        function_to_compile (Callable): The function to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        inputset (Iterable[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
-            needs to be an iterable on tuples which are of the same length than the number of
-            parameters in the function, and in the same order than these same parameters
+        inputset (Union[Iterable[Tuple[Any, ...]], str]): The inputset over which op_graph
+            is evaluated. It needs to be an iterable on tuples which are of the same length than
+            the number of parameters in the function, and in the same order than these same
+            parameters. Alternatively, it can be "random" but that's an unstable feature and should
+            not be used in production.
        compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
            during compilation
        compilation_artifacts (Optional[CompilationArtifacts]): Artifacts object to fill
@@ -191,6 +194,11 @@ def compile_numpy_function_into_op_graph(
    if compilation_artifacts is None:
        compilation_artifacts = CompilationArtifacts()

+    # Generate random inputset if it is requested and available
+    if isinstance(inputset, str):
+        _check_special_inputset_availability(inputset, compilation_configuration)
+        inputset = _generate_random_inputset(function_parameters, compilation_configuration)
+
    # Try to compile the function and save partial artifacts on failure
    try:
        # Use context manager to restore numpy error handling
@@ -306,7 +314,7 @@ def _compile_numpy_function_internal(
 def compile_numpy_function(
    function_to_compile: Callable,
    function_parameters: Dict[str, BaseValue],
-    inputset: Iterable[Tuple[Any, ...]],
+    inputset: Union[Iterable[Tuple[Any, ...]], str],
    compilation_configuration: Optional[CompilationConfiguration] = None,
    compilation_artifacts: Optional[CompilationArtifacts] = None,
    show_mlir: bool = False,
@@ -317,9 +325,11 @@ def compile_numpy_function(
        function_to_compile (Callable): The function to compile
        function_parameters (Dict[str, BaseValue]): A dictionary indicating what each input of the
            function is e.g. an EncryptedScalar holding a 7bits unsigned Integer
-        inputset (Iterable[Tuple[Any, ...]]): The inputset over which op_graph is evaluated. It
-            needs to be an iterable on tuples which are of the same length than the number of
-            parameters in the function, and in the same order than these same parameters
+        inputset (Union[Iterable[Tuple[Any, ...]], str]): The inputset over which op_graph
+            is evaluated. It needs to be an iterable on tuples which are of the same length than
+            the number of parameters in the function, and in the same order than these same
+            parameters. Alternatively, it can be "random" but that's an unstable feature and should
+            not be used in production.
        compilation_configuration (Optional[CompilationConfiguration]): Configuration object to use
            during compilation
        compilation_artifacts (Optional[CompilationArtifacts]): Artifacts object to fill
@@ -342,6 +352,11 @@ def compile_numpy_function(
    if compilation_artifacts is None:
        compilation_artifacts = CompilationArtifacts()

+    # Generate random inputset if it is requested and available
+    if isinstance(inputset, str):
+        _check_special_inputset_availability(inputset, compilation_configuration)
+        inputset = _generate_random_inputset(function_parameters, compilation_configuration)
+
    # Try to compile the function and save partial artifacts on failure
    try:
        # Use context manager to restore numpy error handling
--- a/concrete/numpy/np_inputset_helpers.py
+++ b/concrete/numpy/np_inputset_helpers.py
@@ -0,0 +1,157 @@
+"""Helpers for numpy inputset related functionality."""
+
+import random
+from typing import Any, Dict, Iterable, Tuple
+
+import numpy
+
+from ..common.compilation import CompilationConfiguration
+from ..common.data_types import Float, Integer
+from ..common.values import BaseValue, TensorValue
+
+
+def _generate_random_integer_scalar(dtype: Integer) -> int:
+    """Generate a random integer scalar.
+
+    Args:
+        dtype (Integer): the data type to extract bounds
+
+    Returns:
+        int: a random value within the range [dtype.min_value(), dtype.max_value()]
+    """
+
+    return random.randint(dtype.min_value(), dtype.max_value())
+
+
+def _generate_random_integer_tensor(dtype: Integer, shape: Tuple[int, ...]) -> numpy.ndarray:
+    """Generate a random integer tensor.
+
+    Args:
+        dtype (Integer): the data type to extract bounds
+        shape (Tuple[int, ...]): the shape of the generated tensor
+
+    Returns:
+        numpy.ndarray: a random array of the specified shape where each value of it
+            is within the range [dtype.min_value(), dtype.max_value()]
+    """
+
+    return numpy.random.randint(
+        dtype.min_value(),
+        dtype.max_value() + 1,
+        size=shape,
+        dtype=numpy.int64 if dtype.is_signed else numpy.uint64,  # type: ignore
+    )
+
+
+def _generate_random_float_scalar() -> float:
+    """Generate a random float scalar.
+
+    Returns:
+        float: a random value within the range [0, 1)
+    """
+
+    return random.random()
+
+
+def _generate_random_float_tensor(dtype: Float, shape: Tuple[int, ...]) -> numpy.ndarray:
+    """Generate a random float tensor.
+
+    Args:
+        dtype (Integer): the data type to extract resulting numpy data type
+        shape (Tuple[int, ...]): the shape of the generated tensor
+
+    Returns:
+        numpy.ndarray: a random array of the specified shape where each value of it
+            is within the range [0, 1)
+    """
+
+    result = numpy.random.rand(*shape)
+    return result.astype(numpy.float32 if dtype.bit_width == 32 else numpy.float64)
+
+
+def _generate_random_inputset(
+    function_parameters: Dict[str, BaseValue],
+    compilation_configuration: CompilationConfiguration,
+) -> Iterable[Tuple[Any, ...]]:
+    """Generate a random inputset from function parameters.
+
+    Using this function is not a good practice since the randomly generated inputset
+    might not reflect real world data. We have it to speed up our development workflow
+    and we also don't use it in any of our tests, benchmarks, or examples.
+
+    Args:
+        function_parameters (Dict[str, BaseValue]): the function parameters
+            to extract data types and shapes
+        compilation_configuration (CompilationConfiguration): the compilation configuration
+            to extract the sample size of the resulting inputset
+
+    Raises:
+        ValueError: if the provided function arguments cannot be used for random inputset generation
+
+    Returns:
+        None
+    """
+
+    inputset = []
+    for _ in range(compilation_configuration.random_inputset_samples):
+        sample = []
+        for parameter in function_parameters.values():
+            if not isinstance(parameter, TensorValue):
+                raise ValueError(f"Random inputset cannot be generated for {parameter} parameters")
+
+            if isinstance(parameter.dtype, Integer):
+                sample.append(
+                    _generate_random_integer_scalar(parameter.dtype)
+                    if parameter.is_scalar
+                    else _generate_random_integer_tensor(parameter.dtype, parameter.shape)
+                )
+            elif isinstance(parameter.dtype, Float):
+                sample.append(
+                    _generate_random_float_scalar()
+                    if parameter.is_scalar
+                    else _generate_random_float_tensor(parameter.dtype, parameter.shape)
+                )
+            else:
+                raise ValueError(
+                    f"Random inputset cannot be generated "
+                    f"for parameters of type {parameter.dtype}"
+                )
+        inputset.append(tuple(sample))
+    return inputset
+
+
+def _check_special_inputset_availability(
+    inputset: str,
+    compilation_configuration: CompilationConfiguration,
+):
+    """Check special inputset is valid and is available.
+
+    This function makes sure the provided special inputset is valid and can be used with the
+    provided compilation configuration.
+
+    Currently, the only special inputset is "random" but this can be extended in the future.
+
+    Args:
+        inputset (str): the special inputset to check
+        compilation_configuration (CompilationConfiguration): the compilation configuration
+            to check the availability of the provided special inputset
+
+    Raises:
+        ValueError: if the provided special inputset is not valid
+        RuntimeError: if the provided special inputset is not available
+
+    Returns:
+        None
+    """
+
+    if inputset != "random":
+        raise ValueError(
+            f"inputset can only be an iterable of tuples or the string 'random' "
+            f"but you specified '{inputset}' for it"
+        )
+
+    if not compilation_configuration.enable_unsafe_features:
+        raise RuntimeError(
+            "Random inputset generation is an unsafe feature and should not be used "
+            "if you don't know what you are doing"
+        )
--- a/tests/numpy/test_compile.py
+++ b/tests/numpy/test_compile.py
@@ -1,12 +1,13 @@
 """Test file for numpy compilation functions"""
 import itertools
 import random
+from copy import deepcopy

 import numpy
 import pytest

 from concrete.common.compilation import CompilationConfiguration
-from concrete.common.data_types.integers import Integer
+from concrete.common.data_types.integers import Integer, UnsignedInteger
 from concrete.common.debugging import draw_graph, get_printable_graph
 from concrete.common.extensions.table import LookupTable
 from concrete.common.values import ClearTensor, EncryptedScalar, EncryptedTensor
@@ -1131,3 +1132,59 @@ def test_failure_for_signed_output(default_compilation_configuration):
        "return(%2)\n"
    )
    # pylint: enable=line-too-long
+
+
+def test_compile_with_random_inputset(default_compilation_configuration):
+    """Test function for compile with random input set"""
+
+    configuration_to_use = deepcopy(default_compilation_configuration)
+    configuration_to_use.enable_unsafe_features = True
+
+    compile_numpy_function_into_op_graph(
+        lambda x: x + 1,
+        {"x": EncryptedScalar(UnsignedInteger(6))},
+        inputset="random",
+        compilation_configuration=configuration_to_use,
+    )
+    compile_numpy_function(
+        lambda x: x + 32,
+        {"x": EncryptedScalar(UnsignedInteger(6))},
+        inputset="random",
+        compilation_configuration=configuration_to_use,
+    )
+
+
+def test_fail_compile_with_random_inputset(default_compilation_configuration):
+    """Test function for failed compile with random input set"""
+
+    with pytest.raises(ValueError):
+        try:
+            compile_numpy_function_into_op_graph(
+                lambda x: x + 1,
+                {"x": EncryptedScalar(UnsignedInteger(3))},
+                inputset="unsupported",
+                compilation_configuration=default_compilation_configuration,
+            )
+        except Exception as error:
+            expected = (
+                "inputset can only be an iterable of tuples or the string 'random' "
+                "but you specified 'unsupported' for it"
+            )
+            assert str(error) == expected
+            raise
+
+    with pytest.raises(RuntimeError):
+        try:
+            compile_numpy_function(
+                lambda x: x + 1,
+                {"x": EncryptedScalar(UnsignedInteger(3))},
+                inputset="random",
+                compilation_configuration=default_compilation_configuration,
+            )
+        except Exception as error:
+            expected = (
+                "Random inputset generation is an unsafe feature "
+                "and should not be used if you don't know what you are doing"
+            )
+            assert str(error) == expected
+            raise
--- a/tests/numpy/test_np_inputset_helpers.py
+++ b/tests/numpy/test_np_inputset_helpers.py
@@ -0,0 +1,96 @@
+"""Test file for numpy inputset helpers"""
+
+import numpy as np
+import pytest
+
+from concrete.common.compilation import CompilationConfiguration
+from concrete.common.data_types import Float, UnsignedInteger
+from concrete.common.data_types.base import BaseDataType
+from concrete.common.values import BaseValue, EncryptedScalar, EncryptedTensor
+from concrete.numpy.np_inputset_helpers import _generate_random_inputset
+
+
+def test_generate_random_inputset():
+    """Test function for generate_random_inputset"""
+
+    inputset = _generate_random_inputset(
+        {
+            "x1": EncryptedScalar(UnsignedInteger(4)),
+            "x2": EncryptedTensor(UnsignedInteger(4), shape=(2, 3)),
+            "x3": EncryptedScalar(Float(64)),
+            "x4": EncryptedTensor(Float(64), shape=(3, 2)),
+        },
+        CompilationConfiguration(random_inputset_samples=15),
+    )
+
+    assert isinstance(inputset, list)
+    assert len(inputset) == 15
+
+    for sample in inputset:
+        assert isinstance(sample, tuple)
+        assert len(sample) == 4
+
+        assert isinstance(sample[0], int)
+        assert 0 <= sample[0] < 2 ** 4
+
+        assert isinstance(sample[1], np.ndarray)
+        assert sample[1].dtype == np.uint64
+        assert sample[1].shape == (2, 3)
+        assert (sample[1] >= 0).all()
+        assert (sample[1] < 2 ** 4).all()
+
+        assert isinstance(sample[2], float)
+        assert 0 <= sample[2] < 1
+
+        assert isinstance(sample[3], np.ndarray)
+        assert sample[3].dtype == np.float64
+        assert sample[3].shape == (3, 2)
+        assert (sample[3] >= 0).all()
+        assert (sample[3] < 1).all()
+
+
+def test_fail_generate_random_inputset():
+    """Test function for failed generate_random_inputset"""
+
+    class MockDtype(BaseDataType):
+        """Unsupported dtype to check error messages"""
+
+        def __eq__(self, o: object) -> bool:
+            return False
+
+        def __str__(self):
+            return "MockDtype"
+
+    class MockValue(BaseValue):
+        """Unsupported value to check error messages"""
+
+        def __init__(self):
+            super().__init__(MockDtype(), is_encrypted=True)
+
+        def __eq__(self, other: object) -> bool:
+            return False
+
+        def __str__(self):
+            return "MockValue"
+
+    with pytest.raises(ValueError):
+        try:
+            _generate_random_inputset(
+                {"x": MockValue()},
+                CompilationConfiguration(random_inputset_samples=15),
+            )
+        except Exception as error:
+            expected = "Random inputset cannot be generated for MockValue parameters"
+            assert str(error) == expected
+            raise
+
+    with pytest.raises(ValueError):
+        try:
+            _generate_random_inputset(
+                {"x": EncryptedScalar(MockDtype())},
+                CompilationConfiguration(random_inputset_samples=15),
+            )
+        except Exception as error:
+            expected = "Random inputset cannot be generated for parameters of type MockDtype"
+            assert str(error) == expected
+            raise