From a5b1d6232ebfd87e807a9b7ae4d0a413a682343d Mon Sep 17 00:00:00 2001 From: jfrery Date: Wed, 17 Nov 2021 12:19:40 +0100 Subject: [PATCH] feat: add signed intergers quantization --- concrete/quantization/post_training.py | 8 ++++-- concrete/quantization/quantized_array.py | 26 ++++++++++++++----- .../test_quantized_activations.py | 5 ++-- tests/quantization/test_quantized_array.py | 9 ++++--- tests/quantization/test_quantized_layers.py | 9 ++++--- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/concrete/quantization/post_training.py b/concrete/quantization/post_training.py index f5c70c558..972af4df9 100644 --- a/concrete/quantization/post_training.py +++ b/concrete/quantization/post_training.py @@ -20,14 +20,17 @@ class PostTrainingAffineQuantization: n_bits: int quant_params: dict numpy_model: NumpyModule + is_signed: bool - def __init__(self, n_bits: int, numpy_model: NumpyModule): + def __init__(self, n_bits: int, numpy_model: NumpyModule, is_signed: bool = False): """Create the quantized version of numpy module. Args: n_bits (int): Number of bits to quantize the model. Currently this n_bits will be used for all activation/inputs/weights numpy_model (NumpyModule): Model in numpy. + is_signed: Whether the weights of the layers can be signed. + Currently, only the weights can be signed. Returns: QuantizedModule: A quantized version of the numpy model. @@ -36,6 +39,7 @@ class PostTrainingAffineQuantization: self.n_bits = n_bits self.quant_params = {} self.numpy_model = numpy_model + self.is_signed = is_signed def quantize_module(self, calibration_data: numpy.ndarray) -> QuantizedModule: """Quantize numpy module. @@ -61,7 +65,7 @@ class PostTrainingAffineQuantization: """Transform all floating points parameters to integers.""" for name, params in self.numpy_model.numpy_module_dict.items(): - self.quant_params[name] = QuantizedArray(self.n_bits, params) + self.quant_params[name] = QuantizedArray(self.n_bits, params, self.is_signed) def _calibrate_layers_activation(self, name, q_function, calibration_data): # Calibrate the output of the layer diff --git a/concrete/quantization/quantized_array.py b/concrete/quantization/quantized_array.py index 70ef828ff..971b1b558 100644 --- a/concrete/quantization/quantized_array.py +++ b/concrete/quantization/quantized_array.py @@ -10,18 +10,23 @@ STABILITY_CONST = 10 ** -12 class QuantizedArray: """Abstraction of quantized array.""" - def __init__(self, n_bits: int, values: numpy.ndarray): + def __init__(self, n_bits: int, values: numpy.ndarray, is_signed=False): """Quantize an array. See https://arxiv.org/abs/1712.05877. Args: values (numpy.ndarray): Values to be quantized. - n_bits (int): The number of bits to use for quantization. Defaults to 7. + n_bits (int): The number of bits to use for quantization. + is_signed (bool): Whether the quantization can be on signed integers. """ + self.offset = 0 + if is_signed: + self.offset = 2 ** (n_bits - 1) self.values = values self.n_bits = n_bits + self.is_signed = is_signed self.scale, self.zero_point, self.qvalues = self.compute_quantization_parameters() def __call__(self) -> Optional[numpy.ndarray]: @@ -32,17 +37,25 @@ class QuantizedArray: # Small constant needed for stability rmax = numpy.max(self.values) + STABILITY_CONST rmin = numpy.min(self.values) - scale = (rmax - rmin) / (2 ** self.n_bits - 1) if rmax != rmin else 1.0 + scale = ( + (rmax - rmin) / ((2 ** self.n_bits - 1 - self.offset) - (-self.offset)) + if rmax != rmin + else 1.0 + ) - zero_point = numpy.round(-(rmin / scale)).astype(int) + zero_point = numpy.round( + (rmax * (-self.offset) - (rmin * (2 ** self.n_bits - 1 - self.offset))) / (rmax - rmin) + ) # Compute quantized values and store qvalues = self.values / scale + zero_point + qvalues = ( qvalues.round() - .clip(0, 2 ** self.n_bits - 1) + .clip(-self.offset, 2 ** (self.n_bits) - 1 - self.offset) .astype(int) # Careful this can be very large with high number of bits ) + return scale, zero_point, qvalues def update_values(self, values: numpy.ndarray) -> Optional[numpy.ndarray]: @@ -77,10 +90,11 @@ class QuantizedArray: Returns: numpy.ndarray: Quantized values. """ + self.qvalues = ( (self.values / self.scale + self.zero_point) .round() - .clip(0, 2 ** self.n_bits - 1) + .clip(-self.offset, 2 ** (self.n_bits) - 1 - self.offset) .astype(int) ) return self.qvalues diff --git a/tests/quantization/test_quantized_activations.py b/tests/quantization/test_quantized_activations.py index 1826ecc72..e6b25c02d 100644 --- a/tests/quantization/test_quantized_activations.py +++ b/tests/quantization/test_quantized_activations.py @@ -22,9 +22,10 @@ N_BITS_ATOL_TUPLE_LIST = [ "quant_activation, values", [pytest.param(QuantizedSigmoid, numpy.random.uniform(size=(10, 40, 20)))], ) -def test_activations(quant_activation, values, n_bits, atol): +@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)]) +def test_activations(quant_activation, values, n_bits, atol, is_signed): """Test activation functions.""" - q_inputs = QuantizedArray(n_bits, values) + q_inputs = QuantizedArray(n_bits, values, is_signed) quant_sigmoid = quant_activation(n_bits) quant_sigmoid.calibrate(values) expected_output = quant_sigmoid.q_out.values diff --git a/tests/quantization/test_quantized_array.py b/tests/quantization/test_quantized_array.py index 688268116..be324a5b8 100644 --- a/tests/quantization/test_quantized_array.py +++ b/tests/quantization/test_quantized_array.py @@ -18,16 +18,17 @@ N_BITS_ATOL_TUPLE_LIST = [ "n_bits, atol", [pytest.param(n_bits, atol) for n_bits, atol in N_BITS_ATOL_TUPLE_LIST], ) +@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)]) @pytest.mark.parametrize("values", [pytest.param(numpy.random.randn(2000))]) -def test_quant_dequant_update(values, n_bits, atol): +def test_quant_dequant_update(values, n_bits, atol, is_signed): """Test the quant and dequant function.""" - quant_array = QuantizedArray(n_bits, values) + quant_array = QuantizedArray(n_bits, values, is_signed) qvalues = quant_array.quant() # Quantized values must be contained between 0 and 2**n_bits - assert numpy.max(qvalues) <= 2 ** n_bits - 1 - assert numpy.min(qvalues) >= 0 + assert numpy.max(qvalues) <= 2 ** (n_bits) - 1 - quant_array.offset + assert numpy.min(qvalues) >= -quant_array.offset # Dequantized values must be close to original values dequant_values = quant_array.dequant() diff --git a/tests/quantization/test_quantized_layers.py b/tests/quantization/test_quantized_layers.py index fd38a64eb..c45519812 100644 --- a/tests/quantization/test_quantized_layers.py +++ b/tests/quantization/test_quantized_layers.py @@ -15,13 +15,15 @@ N_BITS_LIST = [20, 16, 8, 4] @pytest.mark.parametrize( "n_examples, n_features, n_neurons", [ + pytest.param(2, 3, 4), pytest.param(20, 500, 30), pytest.param(200, 300, 50), pytest.param(10000, 100, 1), pytest.param(10, 20, 1), ], ) -def test_quantized_linear(n_examples, n_features, n_neurons, n_bits): +@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)]) +def test_quantized_linear(n_examples, n_features, n_neurons, n_bits, is_signed): """Test the quantization linear layer of numpy.array. With n_bits>>0 we expect the results of the quantized linear @@ -32,16 +34,17 @@ def test_quantized_linear(n_examples, n_features, n_neurons, n_bits): # shape of weights: (n_neurons, n_features) weights = numpy.random.uniform(size=(n_features, n_neurons)) - q_weights = QuantizedArray(n_bits, weights) + q_weights = QuantizedArray(n_bits, weights, is_signed) bias = numpy.random.uniform(size=(1, n_neurons)) - q_bias = QuantizedArray(n_bits, bias) + q_bias = QuantizedArray(n_bits, bias, is_signed) # Define our QuantizedLinear layer q_linear = QuantizedLinear(n_bits, q_weights, q_bias) # Calibrate the Quantized layer q_linear.calibrate(inputs) + expected_outputs = q_linear.q_out.values actual_output = q_linear(q_inputs).dequant()