From 4c58d8dc6c926a5b1888d1e682301c5e47fb0f42 Mon Sep 17 00:00:00 2001 From: leriomaggio Date: Thu, 9 Sep 2021 19:11:35 +0100 Subject: [PATCH] third module: Federated Learning & HE --- .../1 Intro to Federated Learning.ipynb | 3191 +++++++++++++++ .../2 Homomorphic Encryption.ipynb | 3499 ++++++++++++++++ .../3 Federated Learning and HE.ipynb | 3624 +++++++++++++++++ .../Duet/duet_fl/Duet_FL_1_Data_Owner.ipynb | 120 + .../Duet/duet_fl/Duet_FL_2_Data_Owner.ipynb | 120 + .../Duet/duet_fl/Duet_FL_Data_Scientist.ipynb | 695 ++++ .../Duet_Iris_Data_Owner.ipynb | 225 + .../Duet_Iris_Data_Scientist.ipynb | 428 ++ .../Duet_Iris_Data_Scientist.ipynb.json | 8 + 9 files changed, 11910 insertions(+) create mode 100644 3-federated-learning-he/1 Intro to Federated Learning.ipynb create mode 100644 3-federated-learning-he/2 Homomorphic Encryption.ipynb create mode 100644 3-federated-learning-he/3 Federated Learning and HE.ipynb create mode 100644 3-federated-learning-he/Duet/duet_fl/Duet_FL_1_Data_Owner.ipynb create mode 100644 3-federated-learning-he/Duet/duet_fl/Duet_FL_2_Data_Owner.ipynb create mode 100644 3-federated-learning-he/Duet/duet_fl/Duet_FL_Data_Scientist.ipynb create mode 100644 3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Owner.ipynb create mode 100644 3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb create mode 100644 3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb.json diff --git a/3-federated-learning-he/1 Intro to Federated Learning.ipynb b/3-federated-learning-he/1 Intro to Federated Learning.ipynb new file mode 100644 index 0000000..0ba6e20 --- /dev/null +++ b/3-federated-learning-he/1 Intro to Federated Learning.ipynb @@ -0,0 +1,3191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "46b16a89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + " The notebook is using\n", + " \n", + " no$\\TeX$book Jupyter Theme (release 2.0.1).\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%load_ext notexbook\n", + "\n", + "%texify -fs 18" + ] + }, + { + "cell_type": "markdown", + "id": "a7ccc63d", + "metadata": {}, + "source": [ + "# Federated Learning" + ] + }, + { + "cell_type": "markdown", + "id": "39b0ea14", + "metadata": {}, + "source": [ + "![](https://blog.openmined.org/content/images/2020/02/0.PNG)\n", + "\n", + "**Source**: [Open Mined Blog](https://blog.openmined.org/content/images/2020/02/0.PNG)\n" + ] + }, + { + "cell_type": "markdown", + "id": "68b9fcac", + "metadata": {}, + "source": [ + "## Is Federated Learning similar to Distributed Computation?\n", + "\n", + "**tldr;** **NO**\n", + "\n", + "- Client devices (such as smartphones) have limited network bandwidth. They cannot transfer large amounts of data and the upload speed is usually lower than the download speed.\n", + "- The client devices are not always available to take part in a training session. Optimal conditions such as charging state, connection to an unmetered Wi-Fi network, idleness, etc. are not always achievable.\n", + "- The data present on the device get updated quickly and is not always the same. [Data is not always available.]\n", + "- The client devices can choose not to participate in the training.\n", + "- The number of client devices available is very large but inconsistent.\n", + "- Federated learning incorporates privacy preservation with distributed training and aggregation across a large population.\n", + "- The data is usually unbalanced as the data is user-specific and is self-correlated.\n", + "- Federated Learning is one instance of the more general approach of “bringing the code to the data, instead of the data to the code” and addresses the fundamental problems of privacy, ownership, and locality of data." + ] + }, + { + "cell_type": "markdown", + "id": "110d61fd", + "metadata": {}, + "source": [ + "### In Federated Learning:\n", + "\n", + "- Certain techniques are used to compress the model updates.\n", + "- Quality updates are performed rather than simple gradient steps.\n", + "- Noise is added by the server before performing aggregation to obscure the impact of an individual on the learned model. [Global Differential Privacy]\n", + "- The gradients updates are clipped if they are too large." + ] + }, + { + "cell_type": "markdown", + "id": "6245cbbf", + "metadata": {}, + "source": [ + "![](https://camo.githubusercontent.com/3c74785be74fa72f34b059b5b7333845ed1dfebc/68747470733a2f2f626c6f672e6f70656e6d696e65642e6f72672f636f6e74656e742f696d616765732f323032302f30342f4f4d2d2d2d434b4b532d477261706869632d762e30314032782e706e67)\n", + "\n", + "**Source**: [PySyft](https://github.com/OpenMined/PySyft/blob/syft_0.5.0/packages/syft/examples/homomorphic-encryption/Tutorial_0_TenSEAL_Syft_Data_Scientist.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "d717eee2", + "metadata": { + "tags": [] + }, + "source": [ + "**What About Encryption?**\n", + "\n", + "### Introducing `HE`: Homomorphic Encryption\n", + "\n", + "> **Definition**:\n", + "> \n", + "> Homomorphic encryption (`HE`) is a technique that allows computations to be made on ciphertexts and generates results that when decrypted, corresponds to the result of the same computations made on plaintexts.\n", + "\n", + "![](https://camo.githubusercontent.com/04164e0d8d9ca2bbe728db6345a79217983d6d67/68747470733a2f2f6769746875622e636f6d2f4f70656e4d696e65642f54656e5345414c2f7261772f6d61737465722f7475746f7269616c732f6173736574732f68652d626c61636b2d626f782e706e67)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/2 Homomorphic Encryption.ipynb b/3-federated-learning-he/2 Homomorphic Encryption.ipynb new file mode 100644 index 0000000..9f078e3 --- /dev/null +++ b/3-federated-learning-he/2 Homomorphic Encryption.ipynb @@ -0,0 +1,3499 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "096270b1-3196-4633-bdb4-89cbe034bd34", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + " The notebook is using\n", + " \n", + " no$\\TeX$book Jupyter Theme (release 2.0.1).\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%load_ext notexbook\n", + "\n", + "%texify" + ] + }, + { + "cell_type": "markdown", + "id": "6c08e316-aadf-42ec-ba28-2274dfb7c145", + "metadata": { + "tags": [] + }, + "source": [ + "#### Logistic Regression HE" + ] + }, + { + "cell_type": "markdown", + "id": "bda589f0-047a-430f-86cc-7830ac2fe21f", + "metadata": {}, + "source": [ + "An Example of Logistic Regression Model using **P**artially **H**omomorphic **E**ncryption (`phe`) Python Libray. \n", + "\n", + "Note: This example has been adapted from the original example on `phe` [repo](https://github.com/data61/python-paillier/blob/master/examples/logistic_regression_encrypted_model.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bdcbbd53-0049-4b0c-9642-ae21f20d7a1e", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "from contextlib import contextmanager\n", + "\n", + "import numpy as np\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "import phe as paillier" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f1a0cbf9-b445-4117-b2fa-537402b081f9", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(123456) # Initialise Random Seed for reproducibility" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "25b32d56-f468-4b79-b351-4dd19bd765bb", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2fcefbf6-42a7-403a-a03c-884afe922a50", + "metadata": {}, + "outputs": [], + "source": [ + "def get_winsconsin_bc_dataset():\n", + " \"\"\"\n", + " Get the Breast Cancer Winsconsin Dataset, \n", + " as split in Training and Test partitions\n", + " \"\"\"\n", + " X, y = load_breast_cancer(return_X_y=True)\n", + " y[y==0] = -1 # so we can take the sign later :)\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)\n", + " return X_train, y_train, X_test, y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2dc11391-39fc-4e84-891f-53d4296894da", + "metadata": {}, + "outputs": [], + "source": [ + "@contextmanager\n", + "def timer():\n", + " \"\"\"Helper for measuring runtime\"\"\"\n", + " time0 = time.perf_counter()\n", + " yield\n", + " print('[elapsed time: %.2f s]' % (time.perf_counter() - time0))" + ] + }, + { + "cell_type": "markdown", + "id": "a4352aad-c4a0-40cf-add4-4d71ea05ba05", + "metadata": {}, + "source": [ + "**Alice**: Train a Logistic Regression Model on plain data, encrypts the model (parameters), and decrypts the scrores using PHE." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "46f9bfc2-678e-4cc0-80c4-833710be20b4", + "metadata": {}, + "outputs": [], + "source": [ + "class Alice:\n", + " \"\"\"\n", + " Trains a Logistic Regression model on plaintext data,\n", + " encrypts the model for remote use,\n", + " decrypts encrypted scores using the paillier private key.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " self.model = LogisticRegression()\n", + "\n", + " def generate_paillier_keypair(self, n_length):\n", + " self.pubkey, self.privkey = \\\n", + " paillier.generate_paillier_keypair(n_length=n_length)\n", + "\n", + " def fit(self, X, y):\n", + " self.model = self.model.fit(X, y)\n", + "\n", + " def predict(self, X):\n", + " return self.model.predict(X)\n", + "\n", + " def encrypt_weights(self):\n", + " coef = self.model.coef_[0, :]\n", + " encrypted_weights = [self.pubkey.encrypt(coef[i])\n", + " for i in range(coef.shape[0])]\n", + " encrypted_intercept = self.pubkey.encrypt(self.model.intercept_[0])\n", + " return encrypted_weights, encrypted_intercept\n", + "\n", + " def decrypt_scores(self, encrypted_scores):\n", + " return [self.privkey.decrypt(s) for s in encrypted_scores]" + ] + }, + { + "cell_type": "markdown", + "id": "36d3c42f-621a-47c4-bfa4-260d96654014", + "metadata": {}, + "source": [ + "**Bob**: Receives the encrypted model and the public key. \n", + "Generate scores with the encrypted model but **cannot decrypt**." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7efb4506-d11d-4fc0-95b3-fcf71fbbae41", + "metadata": {}, + "outputs": [], + "source": [ + "class Bob:\n", + " \"\"\"\n", + " Is given the encrypted model and the public key.\n", + " Scores local plaintext data with the encrypted model, but cannot decrypt\n", + " the scores without the private key held by Alice.\n", + " \"\"\"\n", + "\n", + " def __init__(self, pubkey):\n", + " self.pubkey = pubkey\n", + "\n", + " def set_weights(self, weights, intercept):\n", + " self.weights = weights\n", + " self.intercept = intercept\n", + "\n", + " def encrypted_score(self, x):\n", + " \"\"\"Compute the score of `x` by multiplying with the encrypted model,\n", + " which is a vector of `paillier.EncryptedNumber`\"\"\"\n", + " score = self.intercept\n", + " idx, *rest = x.nonzero()\n", + " for i in idx:\n", + " score += x[i] * self.weights[i]\n", + " return score\n", + "\n", + " def encrypted_evaluate(self, X):\n", + " return [self.encrypted_score(X[i, :]) for i in range(X.shape[0])]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f56f1247-27de-45e3-8299-a4793e037725", + "metadata": {}, + "outputs": [], + "source": [ + "X_train, y_train, X_test, y_test = get_winsconsin_bc_dataset()\n", + "\n", + "#Feature Scaling\n", + "from sklearn.preprocessing import RobustScaler\n", + "sc = RobustScaler()\n", + "X_train = sc.fit_transform(X_train)\n", + "X_test = sc.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5f7bd444-eb7f-4d55-a55e-f7ee737a91b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alice: Generating paillier keypair\n" + ] + } + ], + "source": [ + "print(\"Alice: Generating paillier keypair\")\n", + "alice = Alice()\n", + "# NOTE: using smaller keys sizes wouldn't be cryptographically safe\n", + "alice.generate_paillier_keypair(n_length=1024)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "55f86ad0-5fcf-4874-8d58-07c1da3a28a2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alice: Training BC Classifier\n", + "[elapsed time: 0.01 s]\n" + ] + } + ], + "source": [ + "print(\"Alice: Training BC Classifier\")\n", + "with timer() as t:\n", + " alice.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "id": "2b00c776-be78-46b4-934d-588ccd97c73f", + "metadata": {}, + "source": [ + "**Just test Model performance on Test** as Alice would have access to Bob's (test) data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "be931707-a0b6-4a0e-bab8-75d64c8e8239", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classify with model in the clear -- what Alice would get having Bob's data locally\n", + "[elapsed time: 0.00 s]\n", + "Error 0.014\n" + ] + } + ], + "source": [ + "print(\"Classify with model in the clear -- \"\n", + " \"what Alice would get having Bob's data locally\")\n", + "with timer() as t:\n", + " error = np.mean(alice.predict(X_test) != y_test)\n", + "print(\"Error {:.3f}\".format(error))" + ] + }, + { + "cell_type": "markdown", + "id": "02576aed-e415-45fd-b858-98426a97f11d", + "metadata": {}, + "source": [ + "Now Alice encrypts her (trained) model Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d97d605a-0746-47be-acc6-416f3ffd8295", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alice: Encrypting classifier\n", + "[elapsed time: 0.44 s]\n" + ] + } + ], + "source": [ + "print(\"Alice: Encrypting classifier\")\n", + "with timer() as t:\n", + " encrypted_weights, encrypted_intercept = alice.encrypt_weights()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c78f1493-5423-4b9d-89cd-60e6186ba483", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bob: Scoring with encrypted classifier\n" + ] + } + ], + "source": [ + "print(\"Bob: Scoring with encrypted classifier\")\n", + "bob = Bob(alice.pubkey)\n", + "bob.set_weights(encrypted_weights, encrypted_intercept)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "385c53c9-f399-4d8a-bcd9-2aefe0b22985", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[elapsed time: 6.28 s]\n" + ] + } + ], + "source": [ + "with timer() as t:\n", + " encrypted_scores = bob.encrypted_evaluate(X_test)" + ] + }, + { + "cell_type": "markdown", + "id": "77279853-4eac-43b6-881e-08e725dd864c", + "metadata": {}, + "source": [ + "**Finally** Alice needs to _descrypt_ Bob's scores on test data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5e44043a-08f3-4e95-9d6d-0ebc06c8c98b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alice: Decrypting Bob's scores\n", + "[elapsed time: 0.59 s]\n" + ] + } + ], + "source": [ + "print(\"Alice: Decrypting Bob's scores\")\n", + "with timer() as t:\n", + " scores = alice.decrypt_scores(encrypted_scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ed338750-e553-4b87-81d3-72338dd9116f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error 0.014 -- this is not known to Alice, who does not possess the ground truth labels\n" + ] + } + ], + "source": [ + "error = np.mean(np.sign(scores) != y_test)\n", + "print(\"Error {:.3f} -- this is not known to Alice, who does not possess \"\n", + " \"the ground truth labels\".format(error))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/3 Federated Learning and HE.ipynb b/3-federated-learning-he/3 Federated Learning and HE.ipynb new file mode 100644 index 0000000..864642a --- /dev/null +++ b/3-federated-learning-he/3 Federated Learning and HE.ipynb @@ -0,0 +1,3624 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "dad63aed-1d74-40c0-86cb-93eaebd0c1d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + " The notebook is using\n", + " \n", + " no$\\TeX$book Jupyter Theme (release 2.0.1).\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%load_ext notexbook\n", + "\n", + "%texify" + ] + }, + { + "cell_type": "markdown", + "id": "cf51c11c-e09f-4a12-b61e-50e0f73ac102", + "metadata": {}, + "source": [ + "### Federated Learning Encrypted" + ] + }, + { + "cell_type": "markdown", + "id": "e2a732e3-2834-4178-8671-ea9b39856abf", + "metadata": {}, + "source": [ + "This example involves learning using **sensitive medical data** from multiple hospitals\n", + "to predict diabetes progression in patients. \n", + "\n", + "The data is a standard dataset from `sklearn`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4ec8e51b-327c-4ab9-ab90-94a8ef272281", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.datasets import load_diabetes" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "258fd33d-1f93-4e22-9e3c-3d14c407b2f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" + ] + } + ], + "source": [ + "diabetes = load_diabetes()\n", + "print(diabetes.DESCR)" + ] + }, + { + "cell_type": "markdown", + "id": "af0f518b-7bda-4147-8fea-ff61948dd356", + "metadata": {}, + "source": [ + "Since this is a **Regression Problem**, we will be `LinearRegression`.\n", + "\n", + "The patients' data is split between `3` hospitals, all sharing the same features\n", + "but **different samples** (as in having different patients). \n", + "\n", + "We refer to this scenario as _horizontally partitioned_.\n", + "\n", + "The objective is to make use of the whole (virtual) training set to improve\n", + "upon the model that can be trained locally at each hospital.\n", + "\n", + "`50` patients will be kept as a `test set` and not used for training.\n", + "\n", + "An additional agent is the **AggregationServer** who facilitates the information exchange\n", + "among the hospitals under the following privacy constraints:\n", + "\n", + "1) The individual patient's record at each hospital cannot leave the premises,\n", + " not even in encrypted form.\n", + "\n", + "2) Model parameters (i.e. gradients) from any hospital's dataset\n", + " cannot be shared, unless it is first encrypted!\n", + "\n", + "3) None of the parties (hospitals AND server) should be able to infer WHERE\n", + " (in which hospital) a patient in the training set has been treated.\n", + " \n", + "**Note**: No particular mechanism of protection on **membership attacks** is considered here! \n", + "\n", + "**Differential privacy** could be used on top of our protocol for addressing the problem (see latest notes for reference) \n", + "\n", + "In this example linear regression is solved by gradient descent (so we have gradients to share)" + ] + }, + { + "cell_type": "markdown", + "id": "aa505b87-1969-4b42-b1ab-d16583e17195", + "metadata": {}, + "source": [ + "**Dataset Partitions**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3aa4cfdb-af0e-40f8-be8f-d6f978fb2235", + "metadata": {}, + "outputs": [], + "source": [ + "import phe as paillier\n", + "\n", + "np.random.seed(123456)\n", + "\n", + "def get_data(n_clients):\n", + " \"\"\"\n", + " Import the dataset via sklearn, shuffle and split train/test.\n", + " Return training, target lists for `n_clients` and a holdout test set\n", + " \"\"\"\n", + " print(\"Loading data\")\n", + " diabetes = load_diabetes()\n", + " y = diabetes.target\n", + " X = diabetes.data\n", + " # Add constant to emulate intercept\n", + " X = np.c_[X, np.ones(X.shape[0])]\n", + "\n", + " # The features are already preprocessed\n", + " \n", + " # Shuffle\n", + " perm = np.random.permutation(X.shape[0])\n", + " X, y = X[perm, :], y[perm]\n", + "\n", + " # Select test at random\n", + " test_size = 50\n", + " test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)\n", + " train_idx = np.ones(X.shape[0], dtype=bool)\n", + " train_idx[test_idx] = False\n", + " \n", + " X_test, y_test = X[test_idx, :], y[test_idx]\n", + " X_train, y_train = X[train_idx, :], y[train_idx]\n", + "\n", + " # Split train among multiple clients.\n", + " # The selection is not at random. We simulate the fact that each client\n", + " # sees a potentially very different sample of patients.\n", + " X_h, y_h = [], []\n", + " step = int(X_train.shape[0] / n_clients)\n", + " for c in range(n_clients):\n", + " X_h.append(X_train[step * c: step * (c + 1), :])\n", + " y_h.append(y_train[step * c: step * (c + 1)])\n", + "\n", + " return X_h, y_h, X_test, y_test" + ] + }, + { + "cell_type": "markdown", + "id": "e589509e-0321-4ff9-916d-f7d2447e5801", + "metadata": {}, + "source": [ + "**Utility functions**" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "64c81a57-8f16-4315-a110-1846cb3a77a3", + "metadata": {}, + "outputs": [], + "source": [ + "def mean_square_error(y_pred, y):\n", + " \"\"\" 1/m * \\sum_{i=1..m} (y_pred_i - y_i)^2 \"\"\"\n", + " return np.mean((y - y_pred) ** 2)\n", + "\n", + "\n", + "def encrypt_vector(public_key, x):\n", + " return [public_key.encrypt(i) for i in x]\n", + "\n", + "\n", + "def decrypt_vector(private_key, x):\n", + " return np.array([private_key.decrypt(i) for i in x])\n", + "\n", + "\n", + "def sum_encrypted_vectors(x, y):\n", + " if len(x) != len(y):\n", + " raise ValueError('Encrypted vectors must have the same size')\n", + " return [x[i] + y[i] for i in range(len(x))]" + ] + }, + { + "cell_type": "markdown", + "id": "f1b91712-02b5-4e47-a731-0ee416ff5741", + "metadata": {}, + "source": [ + "**Aggregation Server**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "93f82a11-d5fc-48be-a773-82c515e1d29a", + "metadata": {}, + "outputs": [], + "source": [ + "class Server:\n", + " \"\"\"Private key holder. Decrypts the average gradient\"\"\"\n", + "\n", + " def __init__(self, key_length):\n", + " keypair = paillier.generate_paillier_keypair(n_length=key_length)\n", + " self.pubkey, self.privkey = keypair\n", + "\n", + " def decrypt_aggregate(self, input_model, n_clients):\n", + " return decrypt_vector(self.privkey, input_model) / n_clients" + ] + }, + { + "cell_type": "markdown", + "id": "aaadc336-ad26-4b1c-8087-41299b8949bf", + "metadata": {}, + "source": [ + "**Hospital (Client)**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "33400cd3-87a8-4d7b-8ae6-6b2676e95f77", + "metadata": {}, + "outputs": [], + "source": [ + "class Hospital:\n", + " \"\"\"Runs linear regression with local data or by gradient steps,\n", + " where gradient can be passed in.\n", + " Using public key can encrypt locally computed gradients.\n", + " \"\"\"\n", + "\n", + " def __init__(self, name, X, y, pubkey):\n", + " self.name = name\n", + " self.pubkey = pubkey\n", + " self.X, self.y = X, y\n", + " self.weights = np.zeros(X.shape[1])\n", + "\n", + " def fit(self, n_iter, eta=0.01):\n", + " \"\"\"Linear regression for n_iter\"\"\"\n", + " for _ in range(n_iter):\n", + " gradient = self.compute_gradient()\n", + " self.gradient_step(gradient, eta)\n", + "\n", + " def gradient_step(self, gradient, eta=0.01):\n", + " \"\"\"Update the model with the given gradient\"\"\"\n", + " self.weights -= eta * gradient\n", + "\n", + " def compute_gradient(self):\n", + " \"\"\"Compute the gradient of the current model using the training set\n", + " \"\"\"\n", + " delta = self.predict(self.X) - self.y\n", + " return delta.dot(self.X) / len(self.X)\n", + "\n", + " def predict(self, X):\n", + " \"\"\"Score test data\"\"\"\n", + " return X.dot(self.weights)\n", + "\n", + " def encrypted_gradient(self, sum_to=None):\n", + " \"\"\"Compute and encrypt gradient.\n", + " When `sum_to` is given, sum the encrypted gradient to it, assumed\n", + " to be another vector of the same size\n", + " \"\"\"\n", + " gradient = self.compute_gradient()\n", + " encrypted_gradient = encrypt_vector(self.pubkey, gradient)\n", + "\n", + " if sum_to is not None:\n", + " return sum_encrypted_vectors(sum_to, encrypted_gradient)\n", + " else:\n", + " return encrypted_gradient" + ] + }, + { + "cell_type": "markdown", + "id": "fbf2fe10-59c7-4f8b-96a9-a0f6255b5b5a", + "metadata": {}, + "source": [ + "**Configuration Parameters** for the execution environment" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2bd2b9f6-15c2-4bac-9d60-1d6cb3cb7ecb", + "metadata": {}, + "outputs": [], + "source": [ + "config = {\n", + " 'n_clients': 5,\n", + " 'key_length': 1024,\n", + " 'n_iter': 50,\n", + " 'eta': 1.5,\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "b6c7391c-dfdd-47fc-b0f4-4ce576cbbb70", + "metadata": {}, + "source": [ + "### Local Learning (No Encryption)\n", + "\n", + "We will be first using a _standard_ learning environment to get some baseline results for comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "34cdfc9d-1320-4e21-9e94-7092885ca042", + "metadata": {}, + "outputs": [], + "source": [ + "def local_learning(X_h, y_h, X_test, y_test, config):\n", + " n_clients = config['n_clients']\n", + " names = ['Hospital {}'.format(i) for i in range(1, n_clients + 1)]\n", + "\n", + " # Instantiate the clients.\n", + " # Each client gets the public key at creation and its own local dataset\n", + " hospitals = []\n", + " for i in range(n_clients):\n", + " hospitals.append(Hospital(names[i], X_h[i], y_h[i], None))\n", + "\n", + " # Each client trains a linear regressor on its own data\n", + " print('Error (MSE) that each client gets on test set by '\n", + " 'training only on own local data:')\n", + " for h in hospitals:\n", + " h.fit(config['n_iter'], config['eta'])\n", + " y_pred = h.predict(X_test)\n", + " mse = mean_square_error(y_pred, y_test)\n", + " print('{:s}:\\t{:.2f}'.format(h.name, mse))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "40caf697-f165-41f1-bc1d-05f780fbb3d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data\n", + "Error (MSE) that each client gets on test set by training only on own local data:\n", + "Hospital 1:\t3913.59\n", + "Hospital 2:\t3740.75\n", + "Hospital 3:\t3914.80\n", + "Hospital 4:\t4033.27\n", + "Hospital 5:\t3852.99\n" + ] + } + ], + "source": [ + "# load data, train/test split and split training data between clients\n", + "X_h, y_h, X_test, y_test = get_data(n_clients=config['n_clients'])\n", + "# first each hospital learns a model on its respective dataset for comparison.\n", + "local_learning(X_h, y_h, X_test, y_test, config)" + ] + }, + { + "cell_type": "markdown", + "id": "2ad642ea-9244-4988-b3f2-af80d32281ca", + "metadata": {}, + "source": [ + "### Federated Learning with Encryption\n", + "\n", + "Now the **full-fledge** Federated Learning with Encryption" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c2218b62-33a0-4c36-a853-1954f9a33f39", + "metadata": {}, + "outputs": [], + "source": [ + "def federated_learning(X_h, y_h, X_test, y_test, config):\n", + " n_clients = config['n_clients']\n", + " n_iter = config['n_iter']\n", + " names = ['Hospital {}'.format(i) for i in range(1, n_clients + 1)]\n", + "\n", + " # Instantiate the server and generate private and public keys\n", + " # NOTE: using smaller keys sizes wouldn't be cryptographically safe\n", + " server = Server(key_length=config['key_length'])\n", + "\n", + " # Instantiate the clients.\n", + " # Each client gets the public key at creation and its own local dataset\n", + " hospitals = []\n", + " for i in range(n_clients):\n", + " hospitals.append(Hospital(names[i], X_h[i], y_h[i], server.pubkey))\n", + "\n", + " # The federated learning with gradient descent\n", + " print(f\"Running distributed gradient aggregation for {n_iter} iterations\")\n", + " for i in range(n_iter):\n", + " # Compute gradients, encrypt and aggregate\n", + " encrypt_aggr = hospitals[0].encrypted_gradient(sum_to=None)\n", + " for h in hospitals[1:]:\n", + " encrypt_aggr = h.encrypted_gradient(sum_to=encrypt_aggr)\n", + "\n", + " # Send aggregate to server and decrypt it\n", + " aggr = server.decrypt_aggregate(encrypt_aggr, n_clients)\n", + "\n", + " # Take gradient steps\n", + " for h in hospitals:\n", + " h.gradient_step(aggr, config['eta'])\n", + "\n", + " print('Error (MSE) that each client gets after running the protocol:')\n", + " for h in hospitals:\n", + " y_pred = h.predict(X_test)\n", + " mse = mean_square_error(y_pred, y_test)\n", + " print('{:s}:\\t{:.2f}'.format(h.name, mse))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "acc3f628-caab-4381-a482-cd9a6507f334", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running distributed gradient aggregation for 50 iterations\n", + "Error (MSE) that each client gets after running the protocol:\n", + "Hospital 1:\t3840.90\n", + "Hospital 2:\t3840.90\n", + "Hospital 3:\t3840.90\n", + "Hospital 4:\t3840.90\n", + "Hospital 5:\t3840.90\n" + ] + } + ], + "source": [ + "# and now the full glory of federated learning\n", + "federated_learning(X_h, y_h, X_test, y_test, config)" + ] + }, + { + "cell_type": "markdown", + "id": "3eeff44a-903e-4120-aa38-81d88e2dedfc", + "metadata": {}, + "source": [ + "## Final Remarks\n", + "\n", + "In this notebook, we have implemented _from scratch_ the whole mechanism of Federated Learning with Homomorphic Encryption using a Linear Regression model with Gradient Descent for gradient/parameters sharing.\n", + "\n", + "If you're interested in a more robust and realistic Federated Learning Scenario, I'd strongly recommend to have a look at the `Duet`\n", + "folder. \n", + "\n", + "In particular, the `Duet` folder contains **two** examples extracted from the [`private_ai_series`](https://github.com/OpenMined/PySyft/tree/dev/packages/syft/examples/private-ai-series) tutorials included in `PySyft`.\n", + "\n", + "[**Duet**](https://github.com/OpenMined/PySyft/tree/dev/packages/syft/examples/duet), [PySyft](https://github.com/OpenMined/PySyft) in a Notebook, is the first-ever project that enables _computation on sensitive data with privacy guarantee_ (and it runs directly in two notebooks!!)\n", + "\n", + "I'd really recommend to give it a go!\n", + "\n", + "Finally, in case you're interested about more complex scenarios for _Privacy Preservin Machine Learning_ (`PPML`) and its technologies, the following (_amazing_, ed.) decision-tree map help you having an idea of _what_ and _when_ should be chosen.\n", + "\n", + "\n", + "\n", + "**Reference**: https://www.private-ai.ca/PETs_Decision_Tree.svg\n" + ] + }, + { + "cell_type": "markdown", + "id": "610e4e65-a9fa-4b23-a48d-339a68852303", + "metadata": {}, + "source": [ + "## Further Readings" + ] + }, + { + "cell_type": "markdown", + "id": "06987e93-6e23-4368-9783-32e661fa5aaf", + "metadata": {}, + "source": [ + "- [DP-SGD](https://medium.com/pytorch/differential-privacy-series-part-1-dp-sgd-algorithm-explained-12512c3959a3)\n", + "- [Privacy and Machine Learning](http://www.cleverhans.io/privacy/2018/04/29/privacy-and-machine-learning.html)\n", + "- [Introducing OPACUS](https://ai.facebook.com/blog/introducing-opacus-a-high-speed-library-for-training-pytorch-models-with-differential-privacy/)\n", + "- [PATE](https://blog.openmined.org/build-pate-differential-privacy-in-pytorch/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_fl/Duet_FL_1_Data_Owner.ipynb b/3-federated-learning-he/Duet/duet_fl/Duet_FL_1_Data_Owner.ipynb new file mode 100644 index 0000000..d25e730 --- /dev/null +++ b/3-federated-learning-he/Duet/duet_fl/Duet_FL_1_Data_Owner.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "composed-receipt", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy" + ] + }, + { + "cell_type": "markdown", + "id": "continent-climb", + "metadata": {}, + "source": [ + "# Part 1: Launch a Duet Server and upload data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "parental-classics", + "metadata": {}, + "outputs": [], + "source": [ + "duet = sy.launch_duet(loopback=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dying-switch", + "metadata": {}, + "outputs": [], + "source": [ + "import torch as th\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "focused-error", + "metadata": {}, + "outputs": [], + "source": [ + "data = th.FloatTensor(np.array([5, 15, 25, 35, 45, 55]).reshape(-1, 1))\n", + "\n", + "data = data.tag(\"DO1 data\")\n", + "data = data.describe(\"Dataset of 6 samples, 1 feature\")\n", + "\n", + "data_ptr = data.send(duet, pointable=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "circular-commodity", + "metadata": {}, + "outputs": [], + "source": [ + "duet.store.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "undefined-keeping", + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "thousand-borough", + "metadata": {}, + "outputs": [], + "source": [ + "duet.requests.add_handler(\n", + " action=\"accept\",\n", + " print_local=True, # print the result in your notebook\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "assigned-tomato", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_fl/Duet_FL_2_Data_Owner.ipynb b/3-federated-learning-he/Duet/duet_fl/Duet_FL_2_Data_Owner.ipynb new file mode 100644 index 0000000..22d2fdc --- /dev/null +++ b/3-federated-learning-he/Duet/duet_fl/Duet_FL_2_Data_Owner.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "electric-requirement", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy" + ] + }, + { + "cell_type": "markdown", + "id": "dependent-pharmacy", + "metadata": {}, + "source": [ + "# Part 1: Launch a Duet Server and upload data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dirty-subject", + "metadata": {}, + "outputs": [], + "source": [ + "duet = sy.duet(loopback=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "different-setup", + "metadata": {}, + "outputs": [], + "source": [ + "import torch as th\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "continent-sixth", + "metadata": {}, + "outputs": [], + "source": [ + "data = th.FloatTensor(np.array([60, 65, 75, 85, 95]).reshape(-1, 1))\n", + "\n", + "data = data.tag(\"DO2 data\")\n", + "data = data.describe(\"Dataset of 5 samples, 1 feature\")\n", + "\n", + "data_ptr = data.send(duet, pointable=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tropical-addition", + "metadata": {}, + "outputs": [], + "source": [ + "duet.store.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "nearby-usage", + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "under-revolution", + "metadata": {}, + "outputs": [], + "source": [ + "duet.requests.add_handler(\n", + " action=\"accept\",\n", + " print_local=True, # print the result in your notebook\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "every-banana", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_fl/Duet_FL_Data_Scientist.ipynb b/3-federated-learning-he/Duet/duet_fl/Duet_FL_Data_Scientist.ipynb new file mode 100644 index 0000000..882b582 --- /dev/null +++ b/3-federated-learning-he/Duet/duet_fl/Duet_FL_Data_Scientist.ipynb @@ -0,0 +1,695 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "enclosed-accuracy", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy" + ] + }, + { + "cell_type": "markdown", + "id": "earlier-investing", + "metadata": {}, + "source": [ + "## Join the Duet Server the Data Owner 1 connected to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sized-session", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "duet1 = sy.join_duet(loopback=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "precious-banana", + "metadata": {}, + "outputs": [], + "source": [ + "duet1.store.pandas" + ] + }, + { + "cell_type": "markdown", + "id": "racial-container", + "metadata": {}, + "source": [ + "## Join the Duet Server the Data Owner 2 connected to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "centered-knife", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "duet2 = sy.join_duet(loopback=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "wanted-upper", + "metadata": {}, + "outputs": [], + "source": [ + "duet2.store.pandas" + ] + }, + { + "cell_type": "markdown", + "id": "responsible-cradle", + "metadata": {}, + "source": [ + "## Linear regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "qualified-technical", + "metadata": {}, + "outputs": [], + "source": [ + "data1_ptr = duet1.store[0]\n", + "data2_ptr = duet2.store[0]\n", + "\n", + "print(data1_ptr)\n", + "print(data2_ptr)" + ] + }, + { + "cell_type": "markdown", + "id": "seeing-savage", + "metadata": {}, + "source": [ + "### Create Base Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bizarre-portland", + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "advised-federal", + "metadata": {}, + "outputs": [], + "source": [ + "in_dim = 1\n", + "out_dim = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sapphire-personal", + "metadata": {}, + "outputs": [], + "source": [ + "class SyNet(sy.Module):\n", + " def __init__(self, torch_ref):\n", + " super(SyNet, self).__init__(torch_ref=torch_ref)\n", + " self.linear = self.torch_ref.nn.Linear(in_dim, out_dim)\n", + "\n", + " def forward(self, x):\n", + " x = self.linear(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "accessory-picture", + "metadata": {}, + "outputs": [], + "source": [ + "combined_model = SyNet(torch)" + ] + }, + { + "cell_type": "markdown", + "id": "rubber-factor", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "chinese-amount", + "metadata": {}, + "outputs": [], + "source": [ + "def train(iterations, model, torch_ref, optim, data_ptr, target_ptr):\n", + "\n", + " losses = []\n", + "\n", + " for i in range(iterations):\n", + "\n", + " optim.zero_grad()\n", + "\n", + " output = model(data_ptr)\n", + "\n", + " loss = torch_ref.nn.functional.mse_loss(output, target_ptr)\n", + "\n", + " loss_item = loss.item()\n", + "\n", + " loss_value = loss_item.get(\n", + " reason=\"To evaluate training progress\",\n", + " request_block=True,\n", + " timeout_secs=5,\n", + " )\n", + "\n", + " if i % 10 == 0:\n", + " print(\"Epoch\", i, \"loss\", loss_value)\n", + "\n", + " losses.append(loss_value)\n", + "\n", + " loss.backward()\n", + "\n", + " optim.step()\n", + "\n", + " return losses" + ] + }, + { + "cell_type": "markdown", + "id": "creative-guide", + "metadata": {}, + "source": [ + "#### Send one copy of the model to each data owner or client and train remotely" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "virgin-dayton", + "metadata": {}, + "outputs": [], + "source": [ + "import torch as th\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "armed-partner", + "metadata": {}, + "source": [ + "Train on Data Owner 1 data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "understanding-battle", + "metadata": {}, + "outputs": [], + "source": [ + "local_model1 = SyNet(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "female-johnson", + "metadata": {}, + "outputs": [], + "source": [ + "print(local_model1.parameters())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "convinced-rogers", + "metadata": {}, + "outputs": [], + "source": [ + "remote_model1 = local_model1.send(duet1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cordless-singles", + "metadata": {}, + "outputs": [], + "source": [ + "remote_torch1 = duet1.torch\n", + "params = remote_model1.parameters()\n", + "optim1 = remote_torch1.optim.Adam(params=params, lr=0.1)" + ] + }, + { + "cell_type": "markdown", + "id": "average-spirit", + "metadata": {}, + "source": [ + "Dummy target data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "weird-carrier", + "metadata": {}, + "outputs": [], + "source": [ + "target1_ptr = th.FloatTensor(np.array([5, 10, 15, 22, 30, 38]).reshape(-1, 1))\n", + "target1_ptr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "clinical-cement", + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 100\n", + "losses = train(iteration, remote_model1, remote_torch1, optim1, data1_ptr, target1_ptr)" + ] + }, + { + "cell_type": "markdown", + "id": "moral-election", + "metadata": {}, + "source": [ + "Train on Data Owner 2 data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "heavy-spice", + "metadata": {}, + "outputs": [], + "source": [ + "local_model2 = SyNet(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "proprietary-vision", + "metadata": {}, + "outputs": [], + "source": [ + "print(local_model2.parameters())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "continental-carter", + "metadata": {}, + "outputs": [], + "source": [ + "remote_model2 = local_model2.send(duet2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cellular-century", + "metadata": {}, + "outputs": [], + "source": [ + "remote_torch2 = duet2.torch\n", + "params = remote_model2.parameters()\n", + "optim2 = remote_torch2.optim.Adam(params=params, lr=0.1)" + ] + }, + { + "cell_type": "markdown", + "id": "corporate-material", + "metadata": {}, + "source": [ + "Dummy Target data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "unlikely-digest", + "metadata": {}, + "outputs": [], + "source": [ + "target2_ptr = th.FloatTensor(np.array([35, 40, 45, 55, 60]).reshape(-1, 1))\n", + "target2_ptr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "loose-aging", + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 100\n", + "losses = train(iteration, remote_model2, remote_torch2, optim2, data2_ptr, target2_ptr)" + ] + }, + { + "cell_type": "markdown", + "id": "grave-gravity", + "metadata": {}, + "source": [ + "### Averaging Model Updates" + ] + }, + { + "cell_type": "markdown", + "id": "closed-bangladesh", + "metadata": {}, + "source": [ + "Ideally, there will be a coordinator server who will get the model updates from different clients and make an aggregation. For the case of simplicity, in this example we will make THIS server the coordinator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aggregate-desperate", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import OrderedDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eastern-silicon", + "metadata": {}, + "outputs": [], + "source": [ + "## Little sanity check!\n", + "\n", + "param1 = remote_model1.parameters().get(request_block=True)\n", + "param2 = remote_model2.parameters().get(request_block=True)\n", + "\n", + "print(\"Local model1 parameters:\")\n", + "print(local_model1.parameters())\n", + "print(\"Remote model1 parameters:\")\n", + "print(param1)\n", + "print()\n", + "\n", + "print(\"Local model2 parameters:\")\n", + "print(local_model2.parameters())\n", + "print(\"Remote model2 parameters:\")\n", + "print(param2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afraid-bishop", + "metadata": {}, + "outputs": [], + "source": [ + "remote_model1_updates = remote_model1.get(\n", + " request_block=True\n", + ").state_dict()\n", + "\n", + "print(remote_model1_updates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "limiting-slope", + "metadata": {}, + "outputs": [], + "source": [ + "remote_model2_updates = remote_model2.get(\n", + " request_block=True\n", + ").state_dict()\n", + "\n", + "print(remote_model2_updates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "experimental-pulse", + "metadata": {}, + "outputs": [], + "source": [ + "avg_updates = OrderedDict()\n", + "avg_updates[\"linear.weight\"] = (\n", + " remote_model1_updates[\"linear.weight\"] + remote_model2_updates[\"linear.weight\"]\n", + ") / 2\n", + "avg_updates[\"linear.bias\"] = (\n", + " remote_model1_updates[\"linear.bias\"] + remote_model2_updates[\"linear.bias\"]\n", + ") / 2\n", + "\n", + "print(avg_updates)" + ] + }, + { + "cell_type": "markdown", + "id": "approximate-scotland", + "metadata": {}, + "source": [ + "### Load aggregated weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "exempt-gathering", + "metadata": {}, + "outputs": [], + "source": [ + "combined_model.load_state_dict(avg_updates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "beautiful-coating", + "metadata": {}, + "outputs": [], + "source": [ + "del avg_updates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "compressed-employer", + "metadata": {}, + "outputs": [], + "source": [ + "test_data = th.FloatTensor(np.array([17, 25, 32, 50, 80]).reshape(-1, 1))\n", + "test_target = th.FloatTensor(np.array([12, 15, 20, 30, 50]).reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "assured-amount", + "metadata": {}, + "outputs": [], + "source": [ + "preds = []\n", + "with torch.no_grad():\n", + " for i in range(len(test_data)):\n", + " sample = test_data[i]\n", + " y_hat = combined_model(sample)\n", + "\n", + " print(f\"Prediction: {y_hat.item()} Ground Truth: {test_target[i].item()}\")\n", + " preds.append(y_hat)" + ] + }, + { + "cell_type": "markdown", + "id": "documented-italy", + "metadata": {}, + "source": [ + "## Comparison to classical linear regression on centralised data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "external-recommendation", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import numpy as np\n", + "\n", + "in_dim = 1\n", + "out_dim = 1\n", + "\n", + "\n", + "class ClassicalLR(torch.nn.Module):\n", + " def __init__(self, torch):\n", + " super(ClassicalLR, self).__init__()\n", + " self.linear = torch.nn.Linear(in_dim, out_dim)\n", + "\n", + " def forward(self, x):\n", + " x = self.linear(x)\n", + " return x\n", + "\n", + "\n", + "classical_model = ClassicalLR(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "featured-antibody", + "metadata": {}, + "outputs": [], + "source": [ + "data = torch.FloatTensor(\n", + " np.array([5, 15, 25, 35, 45, 55, 60, 65, 75, 85, 95]).reshape(-1, 1)\n", + ")\n", + "target = torch.FloatTensor(\n", + " np.array([5, 10, 15, 22, 30, 38, 35, 40, 45, 55, 60]).reshape(-1, 1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adopted-costs", + "metadata": {}, + "outputs": [], + "source": [ + "def classic_train(iterations, model, torch, optim, data, target, criterion):\n", + "\n", + " losses = []\n", + "\n", + " for i in range(iterations):\n", + "\n", + " optim.zero_grad()\n", + "\n", + " output = model(data)\n", + "\n", + " loss = criterion(output, target)\n", + "\n", + " loss_item = loss.item()\n", + "\n", + " if i % 10 == 0:\n", + " print(\"Epoch\", i, \"loss\", loss_item)\n", + "\n", + " losses.append(loss_item)\n", + "\n", + " loss.backward()\n", + "\n", + " optim.step()\n", + "\n", + " return losses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "balanced-lawyer", + "metadata": {}, + "outputs": [], + "source": [ + "params = classical_model.parameters()\n", + "optim = torch.optim.Adam(params=params, lr=0.1)\n", + "criterion = torch.nn.MSELoss()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "technical-gasoline", + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 100\n", + "losses = classic_train(\n", + " iteration, classical_model, torch, optim, data, target, criterion\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "compound-airline", + "metadata": {}, + "outputs": [], + "source": [ + "test_data = th.FloatTensor(np.array([17, 25, 32, 50, 80]).reshape(-1, 1))\n", + "test_target = th.FloatTensor(np.array([12, 15, 20, 30, 50]).reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "prospective-blood", + "metadata": {}, + "outputs": [], + "source": [ + "preds = []\n", + "with torch.no_grad():\n", + " for i in range(len(test_data)):\n", + " sample = test_data[i]\n", + " y_hat = classical_model(sample)\n", + "\n", + " print(f\"Prediction: {y_hat.item()} Ground Truth: {test_target[i].item()}\")\n", + " preds.append(y_hat)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "veterinary-makeup", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Owner.ipynb b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Owner.ipynb new file mode 100644 index 0000000..607ffa1 --- /dev/null +++ b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Owner.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "unsigned-palace", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy" + ] + }, + { + "cell_type": "markdown", + "id": "korean-bunch", + "metadata": {}, + "source": [ + "# Part 1: Launch a Duet Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "plastic-ridge", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "duet = sy.launch_duet(loopback=True)" + ] + }, + { + "cell_type": "markdown", + "id": "wrapped-eligibility", + "metadata": {}, + "source": [ + "# Part 2: Upload data to Duet Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "understood-aging", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "import torch as th" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "employed-momentum", + "metadata": {}, + "outputs": [], + "source": [ + "iris = datasets.load_iris()\n", + "X, y = iris.data, iris.target\n", + "\n", + "print(X[0:5])" + ] + }, + { + "cell_type": "markdown", + "id": "grand-differential", + "metadata": {}, + "source": [ + "Flower species mappings:\n", + "1. \"Iris-setosa\": 0,\n", + "2. \"Iris-versicolor\": 1,\n", + "3. \"Iris-virginica\": 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "theoretical-ownership", + "metadata": {}, + "outputs": [], + "source": [ + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "occasional-secretary", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Length of dataset:\", len(X))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "numerical-carry", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(X))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "direct-sussex", + "metadata": {}, + "outputs": [], + "source": [ + "X = th.FloatTensor(X)\n", + "y = th.FloatTensor(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "flexible-canyon", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(X))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "assured-clinton", + "metadata": {}, + "outputs": [], + "source": [ + "X = X.tag(\"iris-data\")\n", + "y = y.tag(\"iris-target\")\n", + "\n", + "X = X.describe(\n", + " \"This is a dataset for flower classification of 150 samples. 4 Features are sepal length (cm), sepal width (cm),\"\n", + " \"petal length (cm), petal width (cm)\"\n", + ")\n", + "y = y.describe(\"Labels for flowers: Iris-setosa, Iris-versicolour, Iris-virginica\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "floating-kingston", + "metadata": {}, + "outputs": [], + "source": [ + "data_pointer = X.send(duet, pointable=True)\n", + "target_pointer = y.send(duet, pointable=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "italic-zoning", + "metadata": {}, + "outputs": [], + "source": [ + "# Once uploaded, the data owner can see the object stored in the tensor\n", + "duet.store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dynamic-harrison", + "metadata": {}, + "outputs": [], + "source": [ + "# To see it in a human-readable format, data owner can also pretty-print the tensor information\n", + "duet.store.pandas" + ] + }, + { + "cell_type": "markdown", + "id": "extreme-specific", + "metadata": {}, + "source": [ + "# Part 3: Response to requests coming from Data Scientist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "operational-exercise", + "metadata": {}, + "outputs": [], + "source": [ + "duet.requests.add_handler(action=\"accept\")" + ] + }, + { + "cell_type": "markdown", + "id": "incident-intervention", + "metadata": {}, + "source": [ + "### \"he-black-box\" Checkpoint 1 : Well done!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb new file mode 100644 index 0000000..1b9263a --- /dev/null +++ b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "relevant-thong", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy" + ] + }, + { + "cell_type": "markdown", + "id": "express-crisis", + "metadata": {}, + "source": [ + "# Part 1: Join the Duet Server the Data Owner connected to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "reserved-chain", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "duet = sy.join_duet(loopback=True)" + ] + }, + { + "cell_type": "markdown", + "id": "impaired-beast", + "metadata": {}, + "source": [ + "### \"he-black-box\" Checkpoint 0 : Now STOP and run the Data Owner notebook until Checkpoint 1." + ] + }, + { + "cell_type": "markdown", + "id": "intellectual-suffering", + "metadata": {}, + "source": [ + "# Part 2: Search for Available Data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "virtual-small", + "metadata": {}, + "outputs": [], + "source": [ + "# The data scientist can check the list of pointable data in Data Owner's duet store\n", + "duet.store.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "statewide-sydney", + "metadata": {}, + "outputs": [], + "source": [ + "# Data Scientist wants to get the iris dataset. (S)He needs a pointer to the data and\n", + "# a pointer to the target for prediction.\n", + "data_ptr = duet.store[0]\n", + "target_ptr = duet.store[1]\n", + "\n", + "# data_ptr.requires_grad = True\n", + "# target_ptr.requires_grad = True\n", + "\n", + "# data_ptr is a reference to the iris dataset remotely available on data owner's server\n", + "# target_ptr is a reference to the iris dataset LABELS\n", + "# remotely available on data owner's server\n", + "print(data_ptr)\n", + "print(target_ptr)" + ] + }, + { + "cell_type": "markdown", + "id": "satellite-english", + "metadata": {}, + "source": [ + "# Part 3: Perform Logistic Regression on Iris dataset\n", + "Now the data scientist can perform machine learning on the data that is in the Data Owner's duet server, without the owner having to share his/her data." + ] + }, + { + "cell_type": "markdown", + "id": "introductory-dominican", + "metadata": {}, + "source": [ + "### Basic analysis" + ] + }, + { + "cell_type": "markdown", + "id": "auburn-symphony", + "metadata": {}, + "source": [ + "First the data scientist needs to know some basic information about the dataset.\n", + "1. The length of the dataset\n", + "2. The input dimension\n", + "3. The output dimension\n", + "\n", + "These information have to explicitly shared by the Data Owner. Let's try to find them in the data description." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "crucial-saskatchewan", + "metadata": {}, + "outputs": [], + "source": [ + "print(duet.store.pandas[\"Description\"][0])\n", + "\n", + "print(duet.store.pandas[\"Description\"][1])" + ] + }, + { + "cell_type": "markdown", + "id": "fallen-destruction", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "disabled-secretariat", + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cutting-hamburg", + "metadata": {}, + "outputs": [], + "source": [ + "in_dim = 4\n", + "out_dim = 3\n", + "n_samples = 150" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "accurate-prototype", + "metadata": {}, + "outputs": [], + "source": [ + "class SyNet(sy.Module):\n", + " def __init__(self, torch_ref):\n", + " super(SyNet, self).__init__(torch_ref=torch_ref)\n", + " self.layer1 = self.torch_ref.nn.Linear(in_dim, 20)\n", + " self.layer2 = self.torch_ref.nn.Linear(20, 30)\n", + " self.out = self.torch_ref.nn.Linear(30, out_dim)\n", + "\n", + " def forward(self, x):\n", + " x = self.torch_ref.nn.functional.relu(self.layer1(x))\n", + " x = self.torch_ref.nn.functional.relu(self.layer2(x))\n", + " output = self.torch_ref.nn.functional.log_softmax(self.out(x), dim=1)\n", + " return output\n", + "\n", + "\n", + "local_model = SyNet(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "spare-promotion", + "metadata": {}, + "outputs": [], + "source": [ + "remote_model = local_model.send(duet)" + ] + }, + { + "cell_type": "markdown", + "id": "grateful-calgary", + "metadata": {}, + "source": [ + "Let's create an alias for our partner’s torch called remote_torch so we can refer to the local torch as torch and any operation we want to do remotely as remote_torch. Remember, the return values from remote_torch are Pointers, not the real objects. They mostly act the same when using them with other Pointers but you can't mix them with local torch objects." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caroline-sheep", + "metadata": {}, + "outputs": [], + "source": [ + "remote_torch = duet.torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "wooden-strip", + "metadata": {}, + "outputs": [], + "source": [ + "params = remote_model.parameters()\n", + "optim = remote_torch.optim.Adam(params=params, lr=0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "monetary-table", + "metadata": {}, + "outputs": [], + "source": [ + "def train(iterations, model, torch_ref, optim, data_ptr, target_ptr):\n", + "\n", + " losses = []\n", + "\n", + " for i in range(iterations):\n", + "\n", + " optim.zero_grad()\n", + "\n", + " output = model(data_ptr)\n", + "\n", + " loss = torch_ref.nn.functional.nll_loss(output, target_ptr.long())\n", + "\n", + " loss_item = loss.item()\n", + "\n", + " loss_value = loss_item.get(\n", + " reason=\"To evaluate training progress\", request_block=True, timeout_secs=5\n", + " )\n", + "\n", + " if i % 10 == 0:\n", + " print(\"Epoch\", i, \"loss\", loss_value)\n", + "\n", + " losses.append(loss_value)\n", + "\n", + " loss.backward()\n", + "\n", + " optim.step()\n", + "\n", + " return losses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "responsible-international", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "iteration = 100\n", + "losses = train(iteration, remote_model, remote_torch, optim, data_ptr, target_ptr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "generous-azerbaijan", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "established-illinois", + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(range(iteration), losses)\n", + "plt.ylabel(\"Loss\")\n", + "plt.xlabel(\"iteration\")" + ] + }, + { + "cell_type": "markdown", + "id": "rubber-judge", + "metadata": {}, + "source": [ + "### Download model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "healthy-passion", + "metadata": {}, + "outputs": [], + "source": [ + "def get_local_model(model):\n", + " if not model.is_local:\n", + " local_model = model.get(\n", + " request_block=True,\n", + " reason=\"To run test and inference locally\",\n", + " timeout_secs=5,\n", + " )\n", + " else:\n", + " local_model = model\n", + "\n", + " return local_model\n", + "\n", + "\n", + "local_model = get_local_model(remote_model)" + ] + }, + { + "cell_type": "markdown", + "id": "smart-amateur", + "metadata": {}, + "source": [ + "### Test on local data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "generous-violin", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "minimal-management", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"https://raw.githubusercontent.com/znreza/Federated-Learning-Course-Material/main/Lecture%20Notebooks/Duet%20Iris%20Classifier/\"\n", + "\n", + "iris_test = pd.read_csv(f\"{url}data/iris-test.csv\")\n", + "iris_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "explicit-resolution", + "metadata": {}, + "outputs": [], + "source": [ + "X_test = iris_test.loc[:, iris_test.columns != \"species\"]\n", + "y_test = iris_test[\"species\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "finnish-citation", + "metadata": {}, + "outputs": [], + "source": [ + "X_test = torch.FloatTensor(np.array(X_test))\n", + "y_test = torch.LongTensor(np.array(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "wicked-waste", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "preds = []\n", + "with torch.no_grad():\n", + " for i in range(len(X_test)):\n", + " sample = X_test[i]\n", + " y_hat = local_model(sample.unsqueeze(0))\n", + " pred = y_hat.argmax().item()\n", + " print(f\"Prediction: {pred} Ground Truth: {y_test[i]}\")\n", + " preds.append(pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "collaborative-beast", + "metadata": {}, + "outputs": [], + "source": [ + "acc = accuracy_score(y_test, preds)\n", + "print(\"Overall test accuracy\", acc * 100)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb.json b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb.json new file mode 100644 index 0000000..c3ea45b --- /dev/null +++ b/3-federated-learning-he/Duet/duet_iris_classifier/Duet_Iris_Data_Scientist.ipynb.json @@ -0,0 +1,8 @@ +{ + "replace_lines": [ + { + "match": "^iteration = 1.*", + "replace": "iteration = 1" + } + ] +}