Files
circomlib-ml/models/lr_zigmoid.ipynb
2023-10-12 14:45:44 +08:00

539 lines
18 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# example from https://towardsdatascience.com/replicate-a-logistic-regression-model-as-an-artificial-neural-network-in-keras-cd6f49cf4b2c"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.datasets import load_breast_cancer\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import InputLayer\n",
"from tensorflow.keras.layers import Dense\n",
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst radius worst texture worst perimeter \\\n",
"0 0.07871 ... 25.38 17.33 184.60 \n",
"1 0.05667 ... 24.99 23.41 158.80 \n",
"2 0.05999 ... 23.57 25.53 152.50 \n",
"3 0.09744 ... 14.91 26.50 98.87 \n",
"4 0.05883 ... 22.54 16.67 152.20 \n",
"\n",
" worst area worst smoothness worst compactness worst concavity \\\n",
"0 2019.0 0.1622 0.6656 0.7119 \n",
"1 1956.0 0.1238 0.1866 0.2416 \n",
"2 1709.0 0.1444 0.4245 0.4504 \n",
"3 567.7 0.2098 0.8663 0.6869 \n",
"4 1575.0 0.1374 0.2050 0.4000 \n",
"\n",
" worst concave points worst symmetry worst fractal dimension \n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
"\n",
"[5 rows x 30 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cancer = load_breast_cancer()\n",
"df = pd.DataFrame(cancer.data,\n",
" columns=cancer.feature_names)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X = (df-df.mean())/df.std()\n",
"# X = 2*(df-df.min())/(df.max()-df.min()) - 1\n",
"# X = df\n",
"y = pd.Series(cancer.target)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Make train and test sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, \n",
" shuffle=True, random_state=2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.97\n",
"Precision: 0.99\n",
"Recall: 0.97\n",
"F-Score: 0.98\n"
]
}
],
"source": [
"# Initialize a logistic regression model\n",
"log_reg_model = LogisticRegression(max_iter=2500,\n",
" random_state=42)\n",
"\n",
"# Train (fit) the model\n",
"log_reg_model.fit(X_train, y_train)\n",
"\n",
"# Make predictions\n",
"y_pred = log_reg_model.predict(X_test) # Predictions\n",
"y_true = y_test # True values\n",
"\n",
"# Model evaluation\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.metrics import precision_recall_fscore_support\n",
"import numpy as np\n",
"\n",
"print(\"Accuracy:\", np.round(accuracy_score(y_true, y_pred), 2))\n",
"precision, recall, fscore, _ = precision_recall_fscore_support(y_true, y_pred,\n",
" average='binary')\n",
"print(\"Precision:\", np.round(precision, 2))\n",
"print(\"Recall:\", np.round(recall, 2))\n",
"print(\"F-Score:\", np.round(fscore, 2))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"model = Sequential()\n",
"\n",
"model.add(InputLayer(input_shape=(30, )))\n",
"# No hidden layers\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"\n",
"optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.05)\n",
"model.compile(optimizer=optimizer,\n",
" loss='binary_crossentropy',\n",
" metrics=['accuracy'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-10-12 14:37:31.395427: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12/12 [==============================] - 0s 15ms/step - loss: 0.3646 - accuracy: 0.8407 - val_loss: 0.1326 - val_accuracy: 0.9341\n",
"Epoch 2/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0995 - accuracy: 0.9643 - val_loss: 0.0896 - val_accuracy: 0.9560\n",
"Epoch 3/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0793 - accuracy: 0.9808 - val_loss: 0.0776 - val_accuracy: 0.9670\n",
"Epoch 4/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0734 - accuracy: 0.9808 - val_loss: 0.0749 - val_accuracy: 0.9560\n",
"Epoch 5/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0705 - accuracy: 0.9780 - val_loss: 0.0735 - val_accuracy: 0.9560\n",
"Epoch 6/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.0684 - accuracy: 0.9808 - val_loss: 0.0713 - val_accuracy: 0.9670\n",
"Epoch 7/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0667 - accuracy: 0.9835 - val_loss: 0.0687 - val_accuracy: 0.9780\n",
"Epoch 8/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0653 - accuracy: 0.9835 - val_loss: 0.0665 - val_accuracy: 0.9670\n",
"Epoch 9/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0640 - accuracy: 0.9835 - val_loss: 0.0648 - val_accuracy: 0.9670\n",
"Epoch 10/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.0628 - accuracy: 0.9835 - val_loss: 0.0635 - val_accuracy: 0.9670\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x160461eb0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train, y_train,\n",
" epochs=10, batch_size=32,\n",
" validation_split=0.2,\n",
" shuffle=False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4/4 [==============================] - 0s 1ms/step - loss: 0.0719 - accuracy: 0.9737\n",
"Test loss: 0.07194967567920685\n",
"Test accuracy: 0.9736841917037964\n"
]
}
],
"source": [
"test_loss, test_acc = model.evaluate(X_test, y_test)\n",
"print(\"Test loss:\", test_loss)\n",
"print(\"Test accuracy:\", test_acc)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def zigmoid(x):\n",
" return 0.502073021 + 0.198695283 * x - 0.001570683 * x**2 - 0.004001354 * x**3\n",
"zigmoid_model = Sequential()\n",
"\n",
"zigmoid_model.add(InputLayer(input_shape=(30, )))\n",
"# No hidden layers\n",
"zigmoid_model.add(Dense(1, activation=zigmoid))\n",
"\n",
"optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.05)\n",
"zigmoid_model.compile(optimizer=optimizer,\n",
" loss='binary_crossentropy',\n",
" metrics=['accuracy'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"12/12 [==============================] - 0s 11ms/step - loss: 1.1248 - accuracy: 0.7720 - val_loss: 0.5485 - val_accuracy: 0.9011\n",
"Epoch 2/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.5224 - accuracy: 0.9121 - val_loss: 0.2269 - val_accuracy: 0.9780\n",
"Epoch 3/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2841 - accuracy: 0.9423 - val_loss: 0.1875 - val_accuracy: 0.9560\n",
"Epoch 4/10\n",
"12/12 [==============================] - 0s 3ms/step - loss: 0.2715 - accuracy: 0.9588 - val_loss: 0.1772 - val_accuracy: 0.9560\n",
"Epoch 5/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2591 - accuracy: 0.9560 - val_loss: 0.1714 - val_accuracy: 0.9670\n",
"Epoch 6/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2549 - accuracy: 0.9560 - val_loss: 0.1657 - val_accuracy: 0.9670\n",
"Epoch 7/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2559 - accuracy: 0.9533 - val_loss: 0.1639 - val_accuracy: 0.9670\n",
"Epoch 8/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2508 - accuracy: 0.9533 - val_loss: 0.1605 - val_accuracy: 0.9670\n",
"Epoch 9/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2475 - accuracy: 0.9560 - val_loss: 0.1590 - val_accuracy: 0.9670\n",
"Epoch 10/10\n",
"12/12 [==============================] - 0s 2ms/step - loss: 0.2490 - accuracy: 0.9588 - val_loss: 0.1591 - val_accuracy: 0.9560\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x1604f03a0>"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"zigmoid_model.fit(X_train, y_train,\n",
" epochs=10, batch_size=32,\n",
" validation_split=0.2,\n",
" shuffle=False)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4/4 [==============================] - 0s 1ms/step - loss: 0.1795 - accuracy: 0.9561\n",
"Test loss: 0.1795477569103241\n",
"Test accuracy: 0.9561403393745422\n"
]
}
],
"source": [
"test_loss, test_acc = zigmoid_model.evaluate(X_test, y_test)\n",
"print(\"Test loss:\", test_loss)\n",
"print(\"Test accuracy:\", test_acc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "sklearn",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}