mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
484 lines
100 KiB
Plaintext
484 lines
100 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b760a0f6",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Linear Regression\n",
|
|
"\n",
|
|
"Currently, **Concrete** only supports unsigned integers up to 7-bits. Nevertheless, we want to evaluate a linear regression model with it. Luckily, we can make use of **quantization** to overcome this limitation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "253288cf",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Let's start by importing some libraries to develop our linear regression model."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "6200ab62",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from copy import deepcopy\n",
|
|
"from typing import Any, Dict\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"from sklearn.datasets import make_regression\n",
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"from sklearn.metrics import r2_score\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from tqdm import tqdm\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c8160548",
|
|
"metadata": {},
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"### Now, import Concrete quantization tools. "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "9dc823e0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from concrete.quantization import QuantizedArray, QuantizedLinear, QuantizedModule"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f43e2387",
|
|
"metadata": {},
|
|
"source": [
|
|
"### And some helpers for visualization."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "d104c8df",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline\n",
|
|
"\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from IPython.display import display"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4a5ae7af",
|
|
"metadata": {},
|
|
"source": [
|
|
"### And, finally, the FHE compiler."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "05cda814",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import concrete.numpy as hnp"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "53e676b8",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Let's define our Quantized Linear Regression module that quantizes a sklearn linear regression."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "d451e829",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class QuantizedLinearRegression(QuantizedModule):\n",
|
|
" \"\"\"\n",
|
|
" Quantized Generalized Linear Model\n",
|
|
" Building on top of QuantizedModule, this class will chain together a linear transformation\n",
|
|
" and an inverse-link function\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def from_sklearn(sklearn_model, calibration_data):\n",
|
|
" \"\"\"Create a Quantized Linear Regression initialized from a sklearn trained model\"\"\"\n",
|
|
" weights = np.expand_dims(sklearn_model.coef_, 1)\n",
|
|
" bias = sklearn_model.intercept_\n",
|
|
" #Quantize with 6 bits for input data, 1 for weights, 1 for the bias and 6 for the output\n",
|
|
" return QuantizedLinearRegression(6, 1, 1, 6, weights, bias, calibration_data)\n",
|
|
"\n",
|
|
" def __init__(self, q_bits, w_bits, b_bits, out_bits, weights, bias, calibration_data) -> None:\n",
|
|
" \"\"\"\n",
|
|
" Create the Linear regression with different quantization bit precitions:\n",
|
|
"\n",
|
|
" Quantization Parameters - Number of bits:\n",
|
|
" q_bits (int): bits for input data, insuring that the number of bits of \n",
|
|
" the w . x + b operation does not exceed 7 for the calibration data\n",
|
|
" w_bits (int): bits for weights: in the case of a univariate regression this \n",
|
|
" can be 1 \n",
|
|
" b_bits (int): bits for bias (this is a single value so a single bit is enough)\n",
|
|
" out_bits (int): bits for the result of the linear transformation (w.x + b). \n",
|
|
" In our case since the result of the linear transformation is \n",
|
|
" directly decripted we can use the maximum of 7 bits\n",
|
|
"\n",
|
|
" Other parameters:\n",
|
|
" weights: a numpy nd-array of weights (Nxd) where d is the data dimensionality\n",
|
|
" bias: a numpy scalar\n",
|
|
" calibration_data: a numpy nd-array of data (Nxd)\n",
|
|
" \"\"\"\n",
|
|
" self.n_bits = out_bits\n",
|
|
"\n",
|
|
" # We need to calibrate to a sufficiently low number of bits\n",
|
|
" # so that the output of the Linear layer (w . x + b)\n",
|
|
" # does not exceed 7 bits\n",
|
|
" self.q_calibration_data = QuantizedArray(q_bits, calibration_data)\n",
|
|
"\n",
|
|
" # Quantize the weights and create the quantized linear layer\n",
|
|
" q_weights = QuantizedArray(w_bits, weights)\n",
|
|
" q_bias = QuantizedArray(b_bits, bias)\n",
|
|
" q_layer = QuantizedLinear(out_bits, q_weights, q_bias)\n",
|
|
"\n",
|
|
" # Store quantized layers\n",
|
|
" quant_layers_dict: Dict[str, Any] = {}\n",
|
|
"\n",
|
|
" # Calibrate the linear layer and obtain calibration_data for the next layers\n",
|
|
" calibration_data = self._calibrate_and_store_layers_activation(\n",
|
|
" \"linear\", q_layer, calibration_data, quant_layers_dict\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Finally construct our Module using the quantized layers\n",
|
|
" super().__init__(quant_layers_dict)\n",
|
|
"\n",
|
|
" def _calibrate_and_store_layers_activation(\n",
|
|
" self, name, q_function, calibration_data, quant_layers_dict\n",
|
|
" ):\n",
|
|
" \"\"\"\n",
|
|
" This function calibrates a layer of a quantized module (e.g. linear, inverse-link,\n",
|
|
" activation, etc) by looking at the input data, then computes the output of the quantized\n",
|
|
" version of the layer to be used as input to the following layers\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" # Calibrate the output of the layer\n",
|
|
" q_function.calibrate(calibration_data)\n",
|
|
" # Store the learned quantized layer\n",
|
|
" quant_layers_dict[name] = q_function\n",
|
|
" # Create new calibration data (output of the previous layer)\n",
|
|
" q_calibration_data = QuantizedArray(self.n_bits, calibration_data)\n",
|
|
" # Dequantize to have the value in clear and ready for next calibration\n",
|
|
" return q_function(q_calibration_data).dequant()\n",
|
|
"\n",
|
|
" def quantize_input(self, x):\n",
|
|
" \"\"\"Quantize an input set with the quantization parameters determined from calibration\"\"\"\n",
|
|
" q_input_arr = deepcopy(self.q_calibration_data)\n",
|
|
" q_input_arr.update_values(x)\n",
|
|
" return q_input_arr"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7945595f",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Create a synthetic dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "410b90de",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"X, y = make_regression(\n",
|
|
" n_samples=200, n_features=1, n_targets=1, bias=5.0, noise=30.0, random_state=42\n",
|
|
")\n",
|
|
"\n",
|
|
"# Split it into train/test and sort the sets for nicer visualization\n",
|
|
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)\n",
|
|
"\n",
|
|
"sidx = np.argsort(np.squeeze(x_train))\n",
|
|
"x_train = x_train[sidx, :]\n",
|
|
"y_train = y_train[sidx]\n",
|
|
"\n",
|
|
"sidx = np.argsort(np.squeeze(x_test))\n",
|
|
"x_test = x_test[sidx, :]\n",
|
|
"y_test = y_test[sidx]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "75f4fdb7",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Train a linear regression on the training set and visualize predictions on the test set."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "2a124a62",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"linreg = LinearRegression()\n",
|
|
"linreg.fit(x_train, y_train)\n",
|
|
"\n",
|
|
"y_pred = linreg.predict(x_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a0ba5509",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Visualize the regression line and the data set."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "edcd361b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 864x576 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.ioff()\n",
|
|
"\n",
|
|
"plt.clf()\n",
|
|
"fig, ax = plt.subplots(1, figsize=(12,8))\n",
|
|
"fig.patch.set_facecolor(\"white\")\n",
|
|
"ax.scatter(x_train, y_train, c=\"blue\", marker=\"D\", label=\"Train data\")\n",
|
|
"ax.scatter(x_test, y_test, c=\"orange\", marker=\"x\", label=\"Test data\")\n",
|
|
"ax.plot(x_test, y_pred, c=\"blue\", marker=None, linestyle=\"dashed\", label=\"Sklearn Regression\")\n",
|
|
"ax.legend()\n",
|
|
"display(fig)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "996fbe05",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Calibrate the model for quantization using both training and test data\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "06ed91dd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"calib_data = X \n",
|
|
"q_linreg = QuantizedLinearRegression.from_sklearn(linreg, calib_data)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "cd74c5e7",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Now, we can compile our model to FHE, taking as the possible input set all of our dataset."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "b8f8f95b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_q = q_linreg.quantize_input(X)\n",
|
|
"\n",
|
|
"engine = q_linreg.compile(X_q)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "084fb296",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Time to make some predictions, first in the clear."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "e781279a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Now that the model is quantized, predict on the test set\n",
|
|
"x_test_q = q_linreg.quantize_input(x_test)\n",
|
|
"q_y_pred = q_linreg.forward_and_dequant(x_test_q)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f28155cf",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Now let's predict using the quantized FHE classifier."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "2b6da1f6",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 80/80 [00:14<00:00, 5.57it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Now predict using the FHE quantized model on the testing set\n",
|
|
"y_test_pred_fhe = np.zeros_like(x_test)\n",
|
|
"\n",
|
|
"for i, x_i in enumerate(tqdm(x_test_q.qvalues)):\n",
|
|
" q_sample = np.expand_dims(x_i, 1).transpose([1, 0]).astype(np.uint8)\n",
|
|
" # bench: Measure: Evaluation Time (ms)\n",
|
|
" q_pred_fhe = engine.run(q_sample)\n",
|
|
" y_test_pred_fhe[i] = q_linreg.dequantize_output(q_pred_fhe)\n",
|
|
" # bench: Measure: End\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "23852861",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Evaluate all versions of the classifier."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "7b0f541f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sklearn R^2: 0.8758\n",
|
|
"Non Homomorphic R^2: 0.8735\n",
|
|
"Homomorphic R^2: 0.8735\n",
|
|
"Relative Difference Percentage: 0.00%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Measure the error for the three versions of the classifier\n",
|
|
"sklearn_r2 = r2_score(y_pred, y_test)\n",
|
|
"non_homomorphic_test_error = r2_score(q_y_pred, y_test)\n",
|
|
"homomorphic_test_error = r2_score(y_test_pred_fhe, y_test)\n",
|
|
"\n",
|
|
"# Measure the error of the FHE quantized model w.r.t the clear quantized model\n",
|
|
"difference = (\n",
|
|
" abs(homomorphic_test_error - non_homomorphic_test_error) * 100 / non_homomorphic_test_error\n",
|
|
")\n",
|
|
"\n",
|
|
"\n",
|
|
"print(f\"Sklearn R^2: {sklearn_r2:.4f}\")\n",
|
|
"print(f\"Non Homomorphic R^2: {non_homomorphic_test_error:.4f}\")\n",
|
|
"print(f\"Homomorphic R^2: {homomorphic_test_error:.4f}\")\n",
|
|
"print(f\"Relative Difference Percentage: {difference:.2f}%\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "704b2f63",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Plot the results of both the original and FHE versions of the classifier."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "aae3f6da",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 864x576 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.clf()\n",
|
|
"fig, ax = plt.subplots(1, figsize=(12,8))\n",
|
|
"fig.patch.set_facecolor(\"white\")\n",
|
|
"s1 = ax.scatter(x_train, y_train, c=\"blue\", marker=\"D\")\n",
|
|
"s2 = ax.scatter(x_test, y_test, c=\"orange\", marker=\"x\")\n",
|
|
"p1 = ax.plot(x_test, y_pred, c=\"blue\", marker=None, linestyle=\"dashed\")\n",
|
|
"p2 = ax.plot(x_test, y_test_pred_fhe, c=\"red\", marker=None, linewidth=2)\n",
|
|
"ax.legend([s1, s2, p1[0], p2[0]],\n",
|
|
" [\n",
|
|
" \"Train Data\",\n",
|
|
" \"Test Data\",\n",
|
|
" f\"Clear Reg, R^2={sklearn_r2:.4f}\",\n",
|
|
" f\"Quant. FHE Reg, R^2={homomorphic_test_error:.4f}\"\n",
|
|
" ]\n",
|
|
")\n",
|
|
"display(fig)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c18dbdd1",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Enjoy!"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"execution": {
|
|
"timeout": 10800
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|