From 07e8411b50d9d59f5936623e8e5d3edced5eb4ab Mon Sep 17 00:00:00 2001 From: jfrery Date: Tue, 21 Dec 2021 22:26:29 +0100 Subject: [PATCH] feat: update IrisFHE.ipynb with signed weights + add visualization --- docs/user/advanced_examples/IrisFHE.ipynb | 168 +++++++++++++++------- 1 file changed, 118 insertions(+), 50 deletions(-) diff --git a/docs/user/advanced_examples/IrisFHE.ipynb b/docs/user/advanced_examples/IrisFHE.ipynb index 12ec2db58..bbdb2ac46 100644 --- a/docs/user/advanced_examples/IrisFHE.ipynb +++ b/docs/user/advanced_examples/IrisFHE.ipynb @@ -13,25 +13,11 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import torch\n", "from torch import nn\n", - "import numpy as np\n", - "\n", - "# Set the random seed for reproducibility\n", - "torch.manual_seed(1)" + "import numpy as np" ] }, { @@ -50,9 +36,9 @@ "class FCIris(torch.nn.Module):\n", " \"\"\"Neural network for Iris classification\n", " \n", - " We define a fully connected network with five (5) fully connected (fc) layers that \n", + " We define a fully connected network with five (3) fully connected (fc) layers that \n", " perform feature extraction and one (fc) layer to produce the final classification. \n", - " We will use 15 neurons on all the feature extractor layers to ensure that the FHE accumulators\n", + " We will use 3 neurons on all layers to ensure that the FHE accumulators\n", " do not overflow (we are currently only allowed a maximum of 7 bits-width).\n", "\n", " Due to accumulator limits, we have to design a deep network with only a few neurons on each layer. \n", @@ -64,19 +50,13 @@ " super().__init__()\n", "\n", " # The first layer processes the input data, in our case 4 dimensional vectors \n", - " self.linear1 = nn.Linear(input_size, 15)\n", + " self.linear1 = nn.Linear(input_size, 3)\n", " self.sigmoid1 = nn.Sigmoid()\n", - " # Next, we add four intermediate layers to perform features extraction\n", - " self.linear2 = nn.Linear(15, 15)\n", + " # Next, we add a one intermediate layer\n", + " self.linear2 = nn.Linear(3, 3)\n", " self.sigmoid2 = nn.Sigmoid()\n", - " self.linear3 = nn.Linear(15, 15)\n", - " self.sigmoid3 = nn.Sigmoid()\n", - " self.linear4 = nn.Linear(15, 15)\n", - " self.sigmoid4 = nn.Sigmoid()\n", - " self.linear5 = nn.Linear(15, 15)\n", - " self.sigmoid5 = nn.Sigmoid()\n", " # Finally, we add the decision layer for 3 output classes encoded as one-hot vectors\n", - " self.decision = nn.Linear(15, 3)\n", + " self.decision = nn.Linear(3, 3)\n", "\n", " def forward(self, x):\n", "\n", @@ -84,12 +64,6 @@ " x = self.sigmoid1(x)\n", " x = self.linear2(x)\n", " x = self.sigmoid2(x)\n", - " x = self.linear3(x)\n", - " x = self.sigmoid3(x)\n", - " x = self.linear4(x)\n", - " x = self.sigmoid4(x)\n", - " x = self.linear5(x)\n", - " x = self.sigmoid5(x)\n", " x = self.decision(x)\n", "\n", " return x\n" @@ -114,7 +88,7 @@ "\n", "# Split into train and test\n", "from sklearn.model_selection import train_test_split\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)\n", "\n", "# Convert to tensors\n", "X_train = torch.tensor(X_train).float()\n", @@ -129,10 +103,10 @@ "criterion = nn.CrossEntropyLoss()\n", "\n", "# Define our optimizer\n", - "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.1)\n", "\n", "# Define the number of iterations\n", - "n_iters = 5000\n", + "n_iters = 5001\n", "\n", "# Define the batch size\n", "batch_size = 16\n" @@ -154,11 +128,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Iterations: 00 | Loss: 1.1348 | Accuracy: 31.25%\n", - "Iterations: 1000 | Loss: 0.4863 | Accuracy: 68.75%\n", - "Iterations: 2000 | Loss: 0.0661 | Accuracy: 100.00%\n", - "Iterations: 3000 | Loss: 0.0185 | Accuracy: 100.00%\n", - "Iterations: 4000 | Loss: 0.0477 | Accuracy: 100.00%\n" + "Iterations: 00 | Loss: 1.1663 | Accuracy: 37.50%\n", + "Iterations: 1000 | Loss: 0.5603 | Accuracy: 87.50%\n", + "Iterations: 2000 | Loss: 0.3205 | Accuracy: 93.75%\n", + "Iterations: 3000 | Loss: 0.1029 | Accuracy: 100.00%\n", + "Iterations: 4000 | Loss: 0.0304 | Accuracy: 100.00%\n", + "Iterations: 5000 | Loss: 0.0261 | Accuracy: 100.00%\n" ] } ], @@ -208,7 +183,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Compile the model" + "## Compile the model\n", + "\n", + "The `compile_torch_model` applies first a quantization to `model` with `n_bits` of precision using `X_train` as the calibration dataset and compile the model to its FHE counterparts. Here we use 3 bits of precision such that the accumulator does not overflow our 7 bits limitation." ] }, { @@ -221,7 +198,7 @@ "quantized_compiled_module = compile_torch_model(\n", " model,\n", " X_train,\n", - " n_bits=2,\n", + " n_bits=3,\n", ")" ] }, @@ -264,7 +241,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 23/23 [04:14<00:00, 11.07s/it]\n" + "100%|██████████| 38/38 [00:54<00:00, 1.43s/it]\n" ] } ], @@ -297,15 +274,104 @@ "output_type": "stream", "text": [ "Test Accuracy: 100.00%\n", - "Test Accuracy Quantized Inference: 73.91%\n", - "Test Accuracy Homomorphic Inference: 73.91%\n" + "Test Accuracy Quantized Inference: 97.37%\n", + "Test Accuracy Homomorphic Inference: 97.37%\n" ] } ], "source": [ "print(f'Test Accuracy: {100*(y_pred.argmax(1) == y_test).float().mean():.2f}%')\n", "print(f'Test Accuracy Quantized Inference: {100*(quant_model_predictions.argmax(1) == y_test_numpy).mean():.2f}%')\n", - "print(f'Test Accuracy Homomorphic Inference: {100*(homomorphic_predictions.argmax(1) == y_test_numpy).mean():.2f}%') \n" + "print(f'Test Accuracy Homomorphic Inference: {100*(homomorphic_predictions.argmax(1) == y_test_numpy).mean():.2f}%') " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.decomposition import PCA\n", + "pca = PCA(n_components=2)\n", + "X_train_2d = pca.fit_transform(X_train_numpy)\n", + "\n", + "b_min = np.min(X_train_2d, axis=0)\n", + "b_max = np.max(X_train_2d, axis=0)\n", + "\n", + "grid_dims = tuple([np.linspace(b_min[i], b_max[i], 128) for i in range(X_train_2d.shape[1])])\n", + "ndgrid_tuple = np.meshgrid(*grid_dims)\n", + "grid_2d = np.vstack([g.ravel() for g in ndgrid_tuple]).transpose()\n", + "\n", + "grid_test = pca.inverse_transform(grid_2d)\n", + "\n", + "grid_pred_all = quantized_compiled_module(grid_test)\n", + "grid_pred_all_original = model(torch.tensor(grid_test).float()).detach().numpy()\n", + "\n", + "pred_classes = np.argmax(grid_pred_all, axis=1).astype(np.int32)\n", + "pred_classes_original = np.argmax(grid_pred_all_original, axis=1).astype(np.int32)\n", + "\n", + "from matplotlib import pyplot as plt\n", + "\n", + "cmap = 'autumn'\n", + "# Create two subplots and set their locations\n", + "plt.clf()\n", + "fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", + "\n", + "# Plot original model contour plot\n", + "axs[0].contourf(ndgrid_tuple[0], ndgrid_tuple[1], pred_classes_original.reshape(ndgrid_tuple[0].shape), cmap=cmap)\n", + "\n", + "# Plot the scatter with marker borders\n", + "axs[0].scatter(X_train_2d[:, 0], X_train_2d[:, 1], c=y_train_numpy, s=50, edgecolors='k', cmap=cmap)\n", + "\n", + "# Add title and axis labels\n", + "axs[0].set_title('Original Inference')\n", + "\n", + "\n", + "\n", + "\n", + "# Plot quantized model contour plot\n", + "axs[1].contourf(ndgrid_tuple[0], ndgrid_tuple[1], pred_classes.reshape(ndgrid_tuple[0].shape), cmap=cmap)\n", + "\n", + "# Plot the scatter with marker borders\n", + "axs[1].scatter(X_train_2d[:, 0], X_train_2d[:, 1], c=y_train_numpy, s=50, edgecolors='k', cmap=cmap)\n", + "\n", + "# Add title and axis labels\n", + "axs[1].set_title('Quantized Inference')\n", + "\n", + "\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above plot, we show the decision boundaries for both the original and quantized model. The quantized model has it's decision boundaries (colored regions) slightly shifted compared to the original model. This is due to the low bit quantization applied to the model in post training.\n", + "\n", + "Here we do not compute the contour plot for the FHE inference as this would be really costly but it should be pretty close to the quantized model. " ] }, { @@ -316,10 +382,12 @@ "\n", "In this notebook, we presented a few steps to have a model (torch neural network) inference in over homomorphically encrypted data: \n", "- We first trained a fully connected neural network yielding ~100% accuracy\n", - "- Then, we quantized it using Concrete Framework. As we can see, the extreme post training quantization (only 2 bits of precision for weights, inputs and activations) made the neural network accruracy slighlty drop (~73%).\n", + "- Then, we quantized it using Concrete Framework. As we can see, the extreme post training quantization (only 3 bits of precision for weights, inputs and activations) made the neural network accuracy slightly drop (~97%).\n", "- We then used the compiled inference into its FHE equivalent to get our FHE predictions over the test set\n", "\n", - "The Homomorphic inference achieves a similar accuracy as the quantized model inference. " + "The Homomorphic inference achieves a similar accuracy as the quantized model inference.\n", + "\n", + "Disclaimer: post training quantization with such a low bit width (<=3) can yield different results for the quantized model which will mainly depends on the range of the learned weights." ] } ],