From a0c26315eac37b0157dcbf7c802236c53abeaccb Mon Sep 17 00:00:00 2001 From: Arthur Meyre Date: Fri, 3 Dec 2021 12:22:30 +0100 Subject: [PATCH] chore: make check_is_good_execution a fixture and fix flaky tests using it closes #1061 --- tests/conftest.py | 44 +++++++++++++- tests/numpy/test_compile.py | 84 ++++++++++++++++---------- tests/quantization/test_compilation.py | 37 +++++------- tests/torch/test_compile_torch.py | 44 ++++++++------ 4 files changed, 137 insertions(+), 72 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b9d7ae27d..ad4e6c360 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,14 +4,16 @@ import operator import random import re from pathlib import Path -from typing import Callable, Dict, Type +from typing import Any, Callable, Dict, Iterable, Type import networkx as nx import networkx.algorithms.isomorphism as iso +import numpy import pytest import torch from concrete.common.compilation import CompilationConfiguration +from concrete.common.fhe_circuit import FHECircuit from concrete.common.representation.intermediate import ( ALL_IR_NODES, Add, @@ -293,3 +295,43 @@ def seed_torch(): """Fixture to seed torch""" return function_to_seed_torch + + +def check_is_good_execution_impl( + fhe_circuit: FHECircuit, + function: Callable, + args: Iterable[Any], + preprocess_input_func: Callable[[Any], Any] = lambda x: x, + postprocess_output_func: Callable[[Any], Any] = lambda x: x, + check_function: Callable[[Any, Any], bool] = numpy.array_equal, + verbose: bool = True, +): + """Run several times the check compiler_engine.run(*args) == function(*args). If always wrong, + return an error. One can set the expected probability of success of one execution and the + number of tests, to finetune the probability of bad luck, ie that we run several times the + check and always have a wrong result.""" + nb_tries = 5 + + for i in range(1, nb_tries + 1): + preprocessed_args = tuple(preprocess_input_func(val) for val in args) + if check_function( + last_engine_result := postprocess_output_func(fhe_circuit.run(*preprocessed_args)), + last_function_result := postprocess_output_func(function(*preprocessed_args)), + ): + # Good computation after i tries + if verbose: + print(f"Good computation after {i} tries") + return + + # Bad computation after nb_tries + raise AssertionError( + f"bad computation after {nb_tries} tries.\nLast engine result:\n{last_engine_result}\n" + f"Last function result:\n{last_function_result}" + ) + + +@pytest.fixture +def check_is_good_execution(): + """Fixture to seed torch""" + + return check_is_good_execution_impl diff --git a/tests/numpy/test_compile.py b/tests/numpy/test_compile.py index e7b048a4e..b88eb47d5 100644 --- a/tests/numpy/test_compile.py +++ b/tests/numpy/test_compile.py @@ -305,39 +305,13 @@ def negative_unary_f(func, x, y): return z -def check_is_good_execution(compiler_engine, function, args, verbose=True): - """Run several times the check compiler_engine.run(*args) == function(*args). If always wrong, - return an error. One can set the expected probability of success of one execution and the - number of tests, to finetune the probability of bad luck, ie that we run several times the - check and always have a wrong result.""" - expected_probability_of_success = 0.95 - nb_tries = 5 - expected_bad_luck = (1 - expected_probability_of_success) ** nb_tries - - for i in range(1, nb_tries + 1): - if numpy.array_equal( - last_engine_result := compiler_engine.run(*args), - last_function_result := function(*args), - ): - # Good computation after i tries - if verbose: - print(f"Good computation after {i} tries") - return - - # Bad computation after nb_tries - raise AssertionError( - f"bad computation after {nb_tries} tries, which was supposed to happen with a " - f"probability of {expected_bad_luck}.\nLast engine result:\n{last_engine_result}\n" - f"Last function result:\n{last_function_result}" - ) - - def subtest_compile_and_run_unary_ufunc_correctness( ufunc, upper_function, input_ranges, tensor_shape, default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function""" @@ -378,6 +352,7 @@ def subtest_compile_and_run_binary_ufunc_correctness( input_ranges, tensor_shape, default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function""" @@ -418,7 +393,12 @@ def subtest_compile_and_run_binary_ufunc_correctness( @pytest.mark.parametrize( "tensor_shape", [pytest.param((), id="scalar"), pytest.param((3, 1, 2), id="tensor")] ) -def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tensor_shape): +def test_binary_ufunc_operations( + ufunc, + tensor_shape, + default_compilation_configuration, + check_is_good_execution, +): """Test biary functions which are in tracing.NPTracer.LIST_OF_SUPPORTED_UFUNC.""" run_multi_tlu_test = False @@ -436,6 +416,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 4), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -444,6 +425,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 4), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) if run_multi_tlu_test: subtest_compile_and_run_binary_ufunc_correctness( @@ -453,6 +435,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 4), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -461,6 +444,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 4), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [numpy.floor_divide, numpy.fmod, numpy.remainder, numpy.true_divide]: subtest_compile_and_run_binary_ufunc_correctness( @@ -470,6 +454,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((1, 5), (1, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) if run_multi_tlu_test: subtest_compile_and_run_binary_ufunc_correctness( @@ -479,6 +464,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((1, 5), (1, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [numpy.lcm, numpy.left_shift]: # Need small constants to keep results sufficiently small @@ -489,6 +475,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -497,6 +484,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) if run_multi_tlu_test: subtest_compile_and_run_binary_ufunc_correctness( @@ -508,6 +496,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -518,6 +507,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [numpy.ldexp]: # Need small constants to keep results sufficiently small @@ -528,6 +518,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) if run_multi_tlu_test: subtest_compile_and_run_binary_ufunc_correctness( @@ -537,6 +528,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) else: # General case @@ -547,6 +539,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -555,6 +548,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) if run_multi_tlu_test: subtest_compile_and_run_binary_ufunc_correctness( @@ -564,6 +558,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -572,6 +567,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) # Negative inputs tests on compatible functions @@ -590,6 +586,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 7), (0, 3)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) subtest_compile_and_run_binary_ufunc_correctness( ufunc, @@ -598,6 +595,7 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso ((0, 7), (0, 3)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) @@ -607,7 +605,9 @@ def test_binary_ufunc_operations(ufunc, default_compilation_configuration, tenso @pytest.mark.parametrize( "tensor_shape", [pytest.param((), id="scalar"), pytest.param((3, 1, 2), id="tensor")] ) -def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor_shape): +def test_unary_ufunc_operations( + ufunc, tensor_shape, default_compilation_configuration, check_is_good_execution +): """Test unary functions which are in tracing.NPTracer.LIST_OF_SUPPORTED_UFUNC.""" if ufunc in [ @@ -621,6 +621,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [ numpy.negative, @@ -632,6 +633,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [ numpy.arccosh, @@ -647,6 +649,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((1, 5), (1, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) elif ufunc in [ numpy.cosh, @@ -666,6 +669,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) else: # Regular case for univariate functions @@ -675,6 +679,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((0, 5), (0, 5)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) # Negative inputs tests on compatible functions @@ -696,6 +701,7 @@ def test_unary_ufunc_operations(ufunc, default_compilation_configuration, tensor ((0, 7), (0, 3)), tensor_shape, default_compilation_configuration, + check_is_good_execution, ) @@ -811,7 +817,11 @@ def test_compile_and_run_correctness( ], ) def test_compile_and_run_correctness__for_prog_with_tlu( - function, input_ranges, list_of_arg_names, default_compilation_configuration + function, + input_ranges, + list_of_arg_names, + default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function which uses a TLU""" @@ -1085,6 +1095,7 @@ def test_compile_and_run_tensor_correctness( test_input, use_check_good_exec, default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function with tensor operators""" circuit = compile_numpy_function( @@ -1389,6 +1400,7 @@ def test_compile_and_run_lut_correctness( input_bits, list_of_arg_names, default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function with LUT""" @@ -1443,6 +1455,7 @@ def test_compile_and_run_negative_lut_correctness( table, bit_width, default_compilation_configuration, + check_is_good_execution, ): """Test correctness when running a compiled function with LUT using negative values""" @@ -1459,7 +1472,10 @@ def test_compile_and_run_negative_lut_correctness( check_is_good_execution(circuit, function, [value + offset]) -def test_compile_and_run_multi_lut_correctness(default_compilation_configuration): +def test_compile_and_run_multi_lut_correctness( + default_compilation_configuration, + check_is_good_execution, +): """Test correctness of results when running a compiled function with Multi LUT""" def function_to_compile(x): @@ -2000,7 +2016,11 @@ def test_compile_and_run_correctness_with_negative_values( ], ) def test_compile_and_run_correctness_with_negative_values_and_pbs( - function, input_ranges, list_of_arg_names, default_compilation_configuration + function, + input_ranges, + list_of_arg_names, + default_compilation_configuration, + check_is_good_execution, ): """Test correctness of results when running a compiled function, which has some negative intermediate values.""" diff --git a/tests/quantization/test_compilation.py b/tests/quantization/test_compilation.py index 6394d425a..6c06f728d 100644 --- a/tests/quantization/test_compilation.py +++ b/tests/quantization/test_compilation.py @@ -39,7 +39,11 @@ class FC(nn.Module): [pytest.param(input_output_feature) for input_output_feature in INPUT_OUTPUT_FEATURE], ) def test_quantized_module_compilation( - input_output_feature, model, seed_torch, default_compilation_configuration + input_output_feature, + model, + seed_torch, + default_compilation_configuration, + check_is_good_execution, ): """Test a neural network compilation for FHE inference.""" # Seed torch @@ -68,25 +72,16 @@ def test_quantized_module_compilation( # Compile quantized_model.compile(q_input, default_compilation_configuration) - dequant_predictions = quantized_model.forward_and_dequant(q_input) - nb_tries = 5 - # Compare predictions between FHE and QuantizedModule - for _ in range(nb_tries): - homomorphic_predictions = [] - for x_q in q_input.qvalues: - homomorphic_predictions.append( - quantized_model.forward_fhe.run(numpy.array([x_q]).astype(numpy.uint8)) - ) - homomorphic_predictions = quantized_model.dequantize_output( - numpy.array(homomorphic_predictions, dtype=numpy.float32) + for x_q in q_input.qvalues: + x_q = numpy.expand_dims(x_q, 0) + check_is_good_execution( + fhe_circuit=quantized_model.forward_fhe, + function=quantized_model.forward, + args=[x_q.astype(numpy.uint8)], + postprocess_output_func=lambda x: quantized_model.dequantize_output( + x.astype(numpy.float32) + ), + check_function=lambda lhs, rhs: numpy.isclose(lhs, rhs).all(), + verbose=False, ) - - homomorphic_predictions = homomorphic_predictions.reshape(dequant_predictions.shape) - - # Make sure homomorphic_predictions are the same as dequant_predictions - if numpy.isclose(homomorphic_predictions, dequant_predictions).all(): - return - - # Bad computation after nb_tries - raise AssertionError(f"bad computation after {nb_tries} tries") diff --git a/tests/torch/test_compile_torch.py b/tests/torch/test_compile_torch.py index ed917f626..a92ca091b 100644 --- a/tests/torch/test_compile_torch.py +++ b/tests/torch/test_compile_torch.py @@ -31,13 +31,19 @@ class FC(nn.Module): @pytest.mark.parametrize( "model", - [pytest.param(FC, marks=pytest.mark.xfail)], + [pytest.param(FC)], ) @pytest.mark.parametrize( "input_output_feature", [pytest.param(input_output_feature) for input_output_feature in INPUT_OUTPUT_FEATURE], ) -def test_compile_torch(input_output_feature, model, seed_torch, default_compilation_configuration): +def test_compile_torch( + input_output_feature, + model, + seed_torch, + default_compilation_configuration, + check_is_good_execution, +): """Test the different model architecture from torch numpy.""" # Seed torch @@ -46,12 +52,14 @@ def test_compile_torch(input_output_feature, model, seed_torch, default_compilat n_bits = 2 # Define an input shape (n_examples, n_features) - n_examples = 10 + n_examples = 50 # Define the torch model torch_fc_model = model(input_output_feature) # Create random input - inputset = [numpy.random.uniform(-1, 1, size=input_output_feature) for _ in range(n_examples)] + inputset = [ + numpy.random.uniform(-100, 100, size=input_output_feature) for _ in range(n_examples) + ] # Compile quantized_numpy_module = compile_torch_model( @@ -61,19 +69,19 @@ def test_compile_torch(input_output_feature, model, seed_torch, default_compilat n_bits=n_bits, ) + # Quantize inputs all at once to have meaningful scale and zero point + q_input = QuantizedArray(n_bits, numpy.array(inputset)) + # Compare predictions between FHE and QuantizedModule - clear_predictions = [] - homomorphic_predictions = [] - for numpy_input in inputset: - q_input = QuantizedArray(n_bits, numpy_input) - x_q = q_input.qvalues - clear_predictions.append(quantized_numpy_module.forward(x_q)) - homomorphic_predictions.append( - quantized_numpy_module.forward_fhe.run(numpy.array([x_q]).astype(numpy.uint8)) + for x_q in q_input.qvalues: + x_q = numpy.expand_dims(x_q, 0) + check_is_good_execution( + fhe_circuit=quantized_numpy_module.forward_fhe, + function=quantized_numpy_module.forward, + args=[x_q.astype(numpy.uint8)], + postprocess_output_func=lambda x: quantized_numpy_module.dequantize_output( + x.astype(numpy.float32) + ), + check_function=lambda lhs, rhs: numpy.isclose(lhs, rhs).all(), + verbose=False, ) - - clear_predictions = numpy.array(clear_predictions) - homomorphic_predictions = numpy.array(homomorphic_predictions) - - # Make sure homomorphic_predictions are the same as dequant_predictions - assert numpy.array_equal(homomorphic_predictions, clear_predictions)