diff --git a/concrete/quantization/post_training.py b/concrete/quantization/post_training.py index 0035e258a..10a61ce5a 100644 --- a/concrete/quantization/post_training.py +++ b/concrete/quantization/post_training.py @@ -91,7 +91,12 @@ class PostTrainingAffineQuantization: # Check if layer is last layer from the model if name == list(self.numpy_model.torch_model.named_children())[-1][0]: # If last layer, we can use 7 bits (maximum allowed) of precision. - q_layer = QuantizedLinear(7, q_weights, q_bias) + # However, 6 bits is currently used to allow 100% FHE precision + # compared to its quantized counterpart. + # Since this is the last layer and mostly used for classification, + # this does not have much impact. + # Put back 7 bits when 100% at 7b is achieved. + q_layer = QuantizedLinear(6, q_weights, q_bias) else: q_layer = QuantizedLinear(self.n_bits, q_weights, q_bias) # Calibrate and get new calibration_data for next layer/activation diff --git a/tests/torch/test_compile_torch.py b/tests/torch/test_compile_torch.py index f6f6b036d..b3090cc9b 100644 --- a/tests/torch/test_compile_torch.py +++ b/tests/torch/test_compile_torch.py @@ -8,7 +8,7 @@ from concrete.torch.compile import compile_torch_model # INPUT_OUTPUT_FEATURE is the number of input and output of each of the network layers. # (as well as the input of the network itself) -INPUT_OUTPUT_FEATURE = [1, 2, 3] +INPUT_OUTPUT_FEATURE = [1, 2] class FC(nn.Module):