mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-08 19:44:57 -05:00
feat: add signed intergers quantization
This commit is contained in:
@@ -20,14 +20,17 @@ class PostTrainingAffineQuantization:
|
||||
n_bits: int
|
||||
quant_params: dict
|
||||
numpy_model: NumpyModule
|
||||
is_signed: bool
|
||||
|
||||
def __init__(self, n_bits: int, numpy_model: NumpyModule):
|
||||
def __init__(self, n_bits: int, numpy_model: NumpyModule, is_signed: bool = False):
|
||||
"""Create the quantized version of numpy module.
|
||||
|
||||
Args:
|
||||
n_bits (int): Number of bits to quantize the model. Currently this
|
||||
n_bits will be used for all activation/inputs/weights
|
||||
numpy_model (NumpyModule): Model in numpy.
|
||||
is_signed: Whether the weights of the layers can be signed.
|
||||
Currently, only the weights can be signed.
|
||||
|
||||
Returns:
|
||||
QuantizedModule: A quantized version of the numpy model.
|
||||
@@ -36,6 +39,7 @@ class PostTrainingAffineQuantization:
|
||||
self.n_bits = n_bits
|
||||
self.quant_params = {}
|
||||
self.numpy_model = numpy_model
|
||||
self.is_signed = is_signed
|
||||
|
||||
def quantize_module(self, calibration_data: numpy.ndarray) -> QuantizedModule:
|
||||
"""Quantize numpy module.
|
||||
@@ -61,7 +65,7 @@ class PostTrainingAffineQuantization:
|
||||
"""Transform all floating points parameters to integers."""
|
||||
|
||||
for name, params in self.numpy_model.numpy_module_dict.items():
|
||||
self.quant_params[name] = QuantizedArray(self.n_bits, params)
|
||||
self.quant_params[name] = QuantizedArray(self.n_bits, params, self.is_signed)
|
||||
|
||||
def _calibrate_layers_activation(self, name, q_function, calibration_data):
|
||||
# Calibrate the output of the layer
|
||||
|
||||
@@ -10,18 +10,23 @@ STABILITY_CONST = 10 ** -12
|
||||
class QuantizedArray:
|
||||
"""Abstraction of quantized array."""
|
||||
|
||||
def __init__(self, n_bits: int, values: numpy.ndarray):
|
||||
def __init__(self, n_bits: int, values: numpy.ndarray, is_signed=False):
|
||||
"""Quantize an array.
|
||||
|
||||
See https://arxiv.org/abs/1712.05877.
|
||||
|
||||
Args:
|
||||
values (numpy.ndarray): Values to be quantized.
|
||||
n_bits (int): The number of bits to use for quantization. Defaults to 7.
|
||||
n_bits (int): The number of bits to use for quantization.
|
||||
is_signed (bool): Whether the quantization can be on signed integers.
|
||||
"""
|
||||
|
||||
self.offset = 0
|
||||
if is_signed:
|
||||
self.offset = 2 ** (n_bits - 1)
|
||||
self.values = values
|
||||
self.n_bits = n_bits
|
||||
self.is_signed = is_signed
|
||||
self.scale, self.zero_point, self.qvalues = self.compute_quantization_parameters()
|
||||
|
||||
def __call__(self) -> Optional[numpy.ndarray]:
|
||||
@@ -32,17 +37,25 @@ class QuantizedArray:
|
||||
# Small constant needed for stability
|
||||
rmax = numpy.max(self.values) + STABILITY_CONST
|
||||
rmin = numpy.min(self.values)
|
||||
scale = (rmax - rmin) / (2 ** self.n_bits - 1) if rmax != rmin else 1.0
|
||||
scale = (
|
||||
(rmax - rmin) / ((2 ** self.n_bits - 1 - self.offset) - (-self.offset))
|
||||
if rmax != rmin
|
||||
else 1.0
|
||||
)
|
||||
|
||||
zero_point = numpy.round(-(rmin / scale)).astype(int)
|
||||
zero_point = numpy.round(
|
||||
(rmax * (-self.offset) - (rmin * (2 ** self.n_bits - 1 - self.offset))) / (rmax - rmin)
|
||||
)
|
||||
|
||||
# Compute quantized values and store
|
||||
qvalues = self.values / scale + zero_point
|
||||
|
||||
qvalues = (
|
||||
qvalues.round()
|
||||
.clip(0, 2 ** self.n_bits - 1)
|
||||
.clip(-self.offset, 2 ** (self.n_bits) - 1 - self.offset)
|
||||
.astype(int) # Careful this can be very large with high number of bits
|
||||
)
|
||||
|
||||
return scale, zero_point, qvalues
|
||||
|
||||
def update_values(self, values: numpy.ndarray) -> Optional[numpy.ndarray]:
|
||||
@@ -77,10 +90,11 @@ class QuantizedArray:
|
||||
Returns:
|
||||
numpy.ndarray: Quantized values.
|
||||
"""
|
||||
|
||||
self.qvalues = (
|
||||
(self.values / self.scale + self.zero_point)
|
||||
.round()
|
||||
.clip(0, 2 ** self.n_bits - 1)
|
||||
.clip(-self.offset, 2 ** (self.n_bits) - 1 - self.offset)
|
||||
.astype(int)
|
||||
)
|
||||
return self.qvalues
|
||||
|
||||
@@ -22,9 +22,10 @@ N_BITS_ATOL_TUPLE_LIST = [
|
||||
"quant_activation, values",
|
||||
[pytest.param(QuantizedSigmoid, numpy.random.uniform(size=(10, 40, 20)))],
|
||||
)
|
||||
def test_activations(quant_activation, values, n_bits, atol):
|
||||
@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)])
|
||||
def test_activations(quant_activation, values, n_bits, atol, is_signed):
|
||||
"""Test activation functions."""
|
||||
q_inputs = QuantizedArray(n_bits, values)
|
||||
q_inputs = QuantizedArray(n_bits, values, is_signed)
|
||||
quant_sigmoid = quant_activation(n_bits)
|
||||
quant_sigmoid.calibrate(values)
|
||||
expected_output = quant_sigmoid.q_out.values
|
||||
|
||||
@@ -18,16 +18,17 @@ N_BITS_ATOL_TUPLE_LIST = [
|
||||
"n_bits, atol",
|
||||
[pytest.param(n_bits, atol) for n_bits, atol in N_BITS_ATOL_TUPLE_LIST],
|
||||
)
|
||||
@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)])
|
||||
@pytest.mark.parametrize("values", [pytest.param(numpy.random.randn(2000))])
|
||||
def test_quant_dequant_update(values, n_bits, atol):
|
||||
def test_quant_dequant_update(values, n_bits, atol, is_signed):
|
||||
"""Test the quant and dequant function."""
|
||||
|
||||
quant_array = QuantizedArray(n_bits, values)
|
||||
quant_array = QuantizedArray(n_bits, values, is_signed)
|
||||
qvalues = quant_array.quant()
|
||||
|
||||
# Quantized values must be contained between 0 and 2**n_bits
|
||||
assert numpy.max(qvalues) <= 2 ** n_bits - 1
|
||||
assert numpy.min(qvalues) >= 0
|
||||
assert numpy.max(qvalues) <= 2 ** (n_bits) - 1 - quant_array.offset
|
||||
assert numpy.min(qvalues) >= -quant_array.offset
|
||||
|
||||
# Dequantized values must be close to original values
|
||||
dequant_values = quant_array.dequant()
|
||||
|
||||
@@ -15,13 +15,15 @@ N_BITS_LIST = [20, 16, 8, 4]
|
||||
@pytest.mark.parametrize(
|
||||
"n_examples, n_features, n_neurons",
|
||||
[
|
||||
pytest.param(2, 3, 4),
|
||||
pytest.param(20, 500, 30),
|
||||
pytest.param(200, 300, 50),
|
||||
pytest.param(10000, 100, 1),
|
||||
pytest.param(10, 20, 1),
|
||||
],
|
||||
)
|
||||
def test_quantized_linear(n_examples, n_features, n_neurons, n_bits):
|
||||
@pytest.mark.parametrize("is_signed", [pytest.param(True), pytest.param(False)])
|
||||
def test_quantized_linear(n_examples, n_features, n_neurons, n_bits, is_signed):
|
||||
"""Test the quantization linear layer of numpy.array.
|
||||
|
||||
With n_bits>>0 we expect the results of the quantized linear
|
||||
@@ -32,16 +34,17 @@ def test_quantized_linear(n_examples, n_features, n_neurons, n_bits):
|
||||
|
||||
# shape of weights: (n_neurons, n_features)
|
||||
weights = numpy.random.uniform(size=(n_features, n_neurons))
|
||||
q_weights = QuantizedArray(n_bits, weights)
|
||||
q_weights = QuantizedArray(n_bits, weights, is_signed)
|
||||
|
||||
bias = numpy.random.uniform(size=(1, n_neurons))
|
||||
q_bias = QuantizedArray(n_bits, bias)
|
||||
q_bias = QuantizedArray(n_bits, bias, is_signed)
|
||||
|
||||
# Define our QuantizedLinear layer
|
||||
q_linear = QuantizedLinear(n_bits, q_weights, q_bias)
|
||||
|
||||
# Calibrate the Quantized layer
|
||||
q_linear.calibrate(inputs)
|
||||
|
||||
expected_outputs = q_linear.q_out.values
|
||||
actual_output = q_linear(q_inputs).dequant()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user