docs: add iris tuto and docs

This commit is contained in:
jfrery
2021-12-16 15:51:25 +01:00
committed by Jordan Fréry
parent 9b940c8a82
commit bb332861aa
2 changed files with 334 additions and 0 deletions

View File

@@ -0,0 +1,333 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Fully Connected Neural Network on Iris Dataset\n",
"\n",
"In this example, we show how one can train a neural network on a specific task (here Iris Classification) and use Concrete Framework to make the model work in FHE settings."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<torch._C.Generator at 0x7f55c9325950>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"from torch import nn\n",
"import numpy as np\n",
"\n",
"# Set the random seed for reproducibility\n",
"torch.manual_seed(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define our neural network"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class FCIris(torch.nn.Module):\n",
" \"\"\"Neural network for Iris classification\n",
" \n",
" We define a fully connected network with 5 fully connected (fc) layers that \n",
" perform feature extraction and one (fc) layer to produce the final classification. \n",
" We will use 15 neurons on all the feature extractor layers to ensure that the FHE accumulators\n",
" do not overflow (we are only allowed a maximum of 7 bits-width).\n",
"\n",
" Due to accumulator limits we have to design a deep network with few neurons on each layer. \n",
" This is in contrast to a traditional approach where the number of neurons increases after \n",
" each layer or block.\n",
" \"\"\"\n",
"\n",
" def __init__(self, input_size):\n",
" super().__init__()\n",
"\n",
" # The first layer processes the input data, in our case 4 dimensional vectors \n",
" self.linear1 = nn.Linear(input_size, 15)\n",
" self.sigmoid1 = nn.Sigmoid()\n",
" # Next, we add four intermediate layers to perform features extraction\n",
" self.linear2 = nn.Linear(15, 15)\n",
" self.sigmoid2 = nn.Sigmoid()\n",
" self.linear3 = nn.Linear(15, 15)\n",
" self.sigmoid3 = nn.Sigmoid()\n",
" self.linear4 = nn.Linear(15, 15)\n",
" self.sigmoid4 = nn.Sigmoid()\n",
" self.linear5 = nn.Linear(15, 15)\n",
" self.sigmoid5 = nn.Sigmoid()\n",
" # Finally, we add the decision layer for 3 output classes encoded as one-hot vectors\n",
" self.decision = nn.Linear(15, 3)\n",
"\n",
" def forward(self, x):\n",
"\n",
" x = self.linear1(x)\n",
" x = self.sigmoid1(x)\n",
" x = self.linear2(x)\n",
" x = self.sigmoid2(x)\n",
" x = self.linear3(x)\n",
" x = self.sigmoid3(x)\n",
" x = self.linear4(x)\n",
" x = self.sigmoid4(x)\n",
" x = self.linear5(x)\n",
" x = self.sigmoid5(x)\n",
" x = self.decision(x)\n",
"\n",
" return x\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define all required variables to train the model"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Get iris dataset\n",
"from sklearn.datasets import load_iris\n",
"X, y = load_iris(return_X_y=True)\n",
"\n",
"# Split into train and test\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)\n",
"\n",
"# Convert to tensors\n",
"X_train = torch.tensor(X_train).float()\n",
"X_test = torch.tensor(X_test).float()\n",
"y_train = torch.tensor(y_train)\n",
"y_test = torch.tensor(y_test)\n",
"\n",
"# Initialize our model\n",
"model = FCIris(X.shape[1])\n",
"\n",
"# Define our loss function\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Define our optimizer\n",
"optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
"# Define the number of iterations\n",
"n_iters = 5000\n",
"\n",
"# Define the batch size\n",
"batch_size = 16\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train the model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iterations: 00 | Loss: 1.1348 | Accuracy: 31.25%\n",
"Iterations: 1000 | Loss: 0.4863 | Accuracy: 68.75%\n",
"Iterations: 2000 | Loss: 0.0661 | Accuracy: 100.00%\n",
"Iterations: 3000 | Loss: 0.0185 | Accuracy: 100.00%\n",
"Iterations: 4000 | Loss: 0.0477 | Accuracy: 100.00%\n"
]
}
],
"source": [
"for iter in range(n_iters):\n",
" # Get a random batch of training data\n",
" idx = torch.randperm(X_train.size()[0])\n",
" X_batch = X_train[idx][:batch_size]\n",
" y_batch = y_train[idx][:batch_size]\n",
" \n",
" # Forward pass\n",
" y_pred = model(X_batch)\n",
" \n",
" # Compute loss\n",
" loss = criterion(y_pred, y_batch)\n",
" \n",
" # Backward pass\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" \n",
" # Update weights\n",
" optimizer.step()\n",
" \n",
" \n",
" if iter % 1000 == 0:\n",
" # Print epoch number, loss and accuracy\n",
" print(f'Iterations: {iter:02} | Loss: {loss.item():.4f} | Accuracy: {100*(y_pred.argmax(1) == y_batch).float().mean():.2f}%')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict with the torch model in clear"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compile the model"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from concrete.torch.compile import compile_torch_model\n",
"quantized_compiled_module = compile_torch_model(\n",
" model,\n",
" X_train,\n",
" n_bits=2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict with the quantized model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# We now have a module in full numpy.\n",
"# Convert data to a numpy array.\n",
"X_train_numpy = X_train.numpy()\n",
"X_test_numpy = X_test.numpy()\n",
"y_train_numpy = y_train.numpy()\n",
"y_test_numpy = y_test.numpy()\n",
"\n",
"quant_model_predictions = quantized_compiled_module(X_test_numpy)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict in FHE"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 23/23 [04:14<00:00, 11.07s/it]\n"
]
}
],
"source": [
"from tqdm import tqdm\n",
"homomorphic_quant_predictions = []\n",
"for x_q in tqdm(X_test_numpy):\n",
" homomorphic_quant_predictions.append(\n",
" quantized_compiled_module.forward_fhe.run(np.array([x_q]).astype(np.uint8))\n",
" )\n",
"homomorphic_predictions = quantized_compiled_module.dequantize_output(\n",
" np.array(homomorphic_quant_predictions, dtype=np.float32).reshape(quant_model_predictions.shape)\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Print the accuracy of both models"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test Accuracy: 100.00%\n",
"Test Accuracy Quantized Inference: 73.91%\n",
"Test Accuracy Homomorphic Inference: 73.91%\n"
]
}
],
"source": [
"print(f'Test Accuracy: {100*(y_pred.argmax(1) == y_test).float().mean():.2f}%')\n",
"print(f'Test Accuracy Quantized Inference: {100*(quant_model_predictions.argmax(1) == y_test_numpy).mean():.2f}%')\n",
"print(f'Test Accuracy Homomorphic Inference: {100*(homomorphic_predictions.argmax(1) == y_test_numpy).mean():.2f}%') \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary\n",
"\n",
"In this notebook we presented a few steps to have a model (torch neural network) inference in over homomorphically encrypted data: \n",
"- We first trained a fully connected neural network yielding ~100% accuracy\n",
"- Then quantized it using Concrete Framework. As we can see, the extreme post training quantization (only 2 bits of precision for weights, inputs and activations) made the neural network accruracy slighlty drop (~73%).\n",
"- We then use the compiled inference into its FHE equivalent to get our FHE predictions over the test set\n",
"\n",
"The Homomorphic inference achieves a similar accuracy as the quantized model inference. "
]
}
],
"metadata": {
"execution": {
"timeout": 10800
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -4,6 +4,7 @@ Advanced examples
.. toctree::
:maxdepth: 1
IrisFHE.ipynb
QuantizedLinearRegression.ipynb
QuantizedLogisticRegression.ipynb
QuantizedGeneralizedLinearModel.ipynb