From 6face6b91196d1c392d90d1ef7908f716625a831 Mon Sep 17 00:00:00 2001
From: Lucas Camillo <lucascamillo@Lucass-MacBook-Pro-4.local>
Date: Thu, 8 Feb 2024 20:04:52 +0000
Subject: [PATCH] added tutorials to docs again

---
 docs/source/tutorials/tutorial_atacseq.ipynb  |  772 ++++++++
 .../tutorials/tutorial_bloodchemistry.ipynb   |  633 +++++++
 .../tutorial_dnam_illumina_human_array.ipynb  | 1186 ++++++++++++
 ...torial_dnam_illumina_mammalian_array.ipynb | 1616 ++++++++++++++++
 .../source/tutorials/tutorial_dnam_rrbs.ipynb | 1656 +++++++++++++++++
 .../tutorial_histonemarkchipseq.ipynb         |  298 +++
 docs/source/tutorials/tutorial_rnaseq.ipynb   |  683 +++++++
 docs/source/tutorials/tutorial_utils.ipynb    |  522 ++++++
 tutorials/tutorial_histonemarkchipseq.ipynb   |    2 +-
 9 files changed, 7367 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/tutorials/tutorial_atacseq.ipynb
 create mode 100644 docs/source/tutorials/tutorial_bloodchemistry.ipynb
 create mode 100644 docs/source/tutorials/tutorial_dnam_illumina_human_array.ipynb
 create mode 100644 docs/source/tutorials/tutorial_dnam_illumina_mammalian_array.ipynb
 create mode 100644 docs/source/tutorials/tutorial_dnam_rrbs.ipynb
 create mode 100644 docs/source/tutorials/tutorial_histonemarkchipseq.ipynb
 create mode 100644 docs/source/tutorials/tutorial_rnaseq.ipynb
 create mode 100644 docs/source/tutorials/tutorial_utils.ipynb

diff --git a/docs/source/tutorials/tutorial_atacseq.ipynb b/docs/source/tutorials/tutorial_atacseq.ipynb
new file mode 100644
index 0000000..b99a0bb
--- /dev/null
+++ b/docs/source/tutorials/tutorial_atacseq.ipynb
@@ -0,0 +1,772 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "82321cbb-e1b9-49f3-b826-32c4fafd96f4",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_atacseq.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_atacseq.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cf837dcf-de19-46f7-9c81-c063a45b14b6",
+   "metadata": {},
+   "source": [
+    "# Bulk ATAC-Seq"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "133e64f6-a0d2-4a70-84a0-33a4c2bea725",
+   "metadata": {},
+   "source": [
+    "This tutorial is a brief guide for the implementation of the two ATAC clocks developed by Morandini et al. Link to [paper](https://link.springer.com/article/10.1007/s11357-023-00986-0)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a0469770-c9af-4d69-a055-bf3c312286db",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e64f6bb6-7c95-4b9e-b37a-4ae811dc088d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c367e47-ceca-4c73-ac14-b6a9a781c66a",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "52b6e9c2-b7b7-4655-ab9c-84afe3185b78",
+   "metadata": {},
+   "source": [
+    "If you have your own ATAC-Seq data, please follow the recommendations in the Ocampo paper. Specifically, one needs to count the number of reads for each of the peak regions from the paper (file [here](https://static-content.springer.com/esm/art%3A10.1007%2Fs11357-023-00986-0/MediaObjects/11357_2023_986_MOESM9_ESM.tsv)). This can be done through the code found on their [GitHub](https://github.com/SunScript0/ATAC-clock/blob/main/pipeline_atac/02_peakset_and_counts.sh) using featureCounts."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "76f0a1a6-ad70-4a40-8fd0-63de208c7ad5",
+   "metadata": {},
+   "source": [
+    "For testing purposes, let's download an example of input for the ATAC clocks. For instructions on how to go from raw sequencing reads to the data table, please refer to the paper. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "992db2fb-2b58-4f8b-92ac-f760df3758dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/GSE193140.pkl\n",
+      "|-----> 🎉 Done! [0.4942s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE193140')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "078b2b96-7317-4f84-a8c4-16276fb76137",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/GSE193140.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cd1a8e9b-53a5-4cfc-ac6d-22d9ed09784a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>chr1:817100-817691</th>\n",
+       "      <th>chr1:826742-828191</th>\n",
+       "      <th>chr1:841908-843021</th>\n",
+       "      <th>chr1:844055-844921</th>\n",
+       "      <th>chr1:857908-859108</th>\n",
+       "      <th>chr1:869571-870271</th>\n",
+       "      <th>chr1:898378-899076</th>\n",
+       "      <th>chr1:904303-905702</th>\n",
+       "      <th>chr1:906675-907111</th>\n",
+       "      <th>chr1:912617-913368</th>\n",
+       "      <th>...</th>\n",
+       "      <th>chrY:21073148-21074236</th>\n",
+       "      <th>chrY:21174455-21175401</th>\n",
+       "      <th>chrY:21177324-21177828</th>\n",
+       "      <th>chrY:21180682-21181317</th>\n",
+       "      <th>chrY:21239902-21241040</th>\n",
+       "      <th>chrY:21248553-21249961</th>\n",
+       "      <th>chrY:21256824-21257260</th>\n",
+       "      <th>chrY:21259823-21260874</th>\n",
+       "      <th>chrY:22086084-22086722</th>\n",
+       "      <th>chrY:22499696-22500344</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>CR_124</th>\n",
+       "      <td>182</td>\n",
+       "      <td>2652</td>\n",
+       "      <td>15</td>\n",
+       "      <td>11</td>\n",
+       "      <td>9</td>\n",
+       "      <td>843</td>\n",
+       "      <td>2</td>\n",
+       "      <td>714</td>\n",
+       "      <td>556</td>\n",
+       "      <td>37</td>\n",
+       "      <td>...</td>\n",
+       "      <td>62</td>\n",
+       "      <td>104</td>\n",
+       "      <td>65</td>\n",
+       "      <td>31</td>\n",
+       "      <td>90</td>\n",
+       "      <td>20</td>\n",
+       "      <td>50</td>\n",
+       "      <td>21</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_122</th>\n",
+       "      <td>96</td>\n",
+       "      <td>2688</td>\n",
+       "      <td>27</td>\n",
+       "      <td>25</td>\n",
+       "      <td>40</td>\n",
+       "      <td>1097</td>\n",
+       "      <td>13</td>\n",
+       "      <td>786</td>\n",
+       "      <td>167</td>\n",
+       "      <td>12</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>13</td>\n",
+       "      <td>31</td>\n",
+       "      <td>25</td>\n",
+       "      <td>270</td>\n",
+       "      <td>37</td>\n",
+       "      <td>29</td>\n",
+       "      <td>18</td>\n",
+       "      <td>9</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_121</th>\n",
+       "      <td>137</td>\n",
+       "      <td>2785</td>\n",
+       "      <td>42</td>\n",
+       "      <td>46</td>\n",
+       "      <td>69</td>\n",
+       "      <td>1297</td>\n",
+       "      <td>8</td>\n",
+       "      <td>638</td>\n",
+       "      <td>351</td>\n",
+       "      <td>24</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_120</th>\n",
+       "      <td>169</td>\n",
+       "      <td>2819</td>\n",
+       "      <td>29</td>\n",
+       "      <td>35</td>\n",
+       "      <td>46</td>\n",
+       "      <td>1373</td>\n",
+       "      <td>20</td>\n",
+       "      <td>931</td>\n",
+       "      <td>301</td>\n",
+       "      <td>10</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>8</td>\n",
+       "      <td>33</td>\n",
+       "      <td>151</td>\n",
+       "      <td>47</td>\n",
+       "      <td>50</td>\n",
+       "      <td>18</td>\n",
+       "      <td>32</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_119</th>\n",
+       "      <td>205</td>\n",
+       "      <td>3005</td>\n",
+       "      <td>18</td>\n",
+       "      <td>45</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1025</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1138</td>\n",
+       "      <td>241</td>\n",
+       "      <td>36</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15</td>\n",
+       "      <td>18</td>\n",
+       "      <td>17</td>\n",
+       "      <td>12</td>\n",
+       "      <td>57</td>\n",
+       "      <td>25</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 80400 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        chr1:817100-817691  chr1:826742-828191  chr1:841908-843021  \\\n",
+       "CR_124                 182                2652                  15   \n",
+       "CR_122                  96                2688                  27   \n",
+       "CR_121                 137                2785                  42   \n",
+       "CR_120                 169                2819                  29   \n",
+       "CR_119                 205                3005                  18   \n",
+       "\n",
+       "        chr1:844055-844921  chr1:857908-859108  chr1:869571-870271  \\\n",
+       "CR_124                  11                   9                 843   \n",
+       "CR_122                  25                  40                1097   \n",
+       "CR_121                  46                  69                1297   \n",
+       "CR_120                  35                  46                1373   \n",
+       "CR_119                  45                  37                1025   \n",
+       "\n",
+       "        chr1:898378-899076  chr1:904303-905702  chr1:906675-907111  \\\n",
+       "CR_124                   2                 714                 556   \n",
+       "CR_122                  13                 786                 167   \n",
+       "CR_121                   8                 638                 351   \n",
+       "CR_120                  20                 931                 301   \n",
+       "CR_119                  33                1138                 241   \n",
+       "\n",
+       "        chr1:912617-913368  ...  chrY:21073148-21074236  \\\n",
+       "CR_124                  37  ...                      62   \n",
+       "CR_122                  12  ...                      11   \n",
+       "CR_121                  24  ...                       0   \n",
+       "CR_120                  10  ...                       7   \n",
+       "CR_119                  36  ...                      15   \n",
+       "\n",
+       "        chrY:21174455-21175401  chrY:21177324-21177828  \\\n",
+       "CR_124                     104                      65   \n",
+       "CR_122                      13                      31   \n",
+       "CR_121                       0                       0   \n",
+       "CR_120                       9                       8   \n",
+       "CR_119                      18                      17   \n",
+       "\n",
+       "        chrY:21180682-21181317  chrY:21239902-21241040  \\\n",
+       "CR_124                      31                      90   \n",
+       "CR_122                      25                     270   \n",
+       "CR_121                       0                       0   \n",
+       "CR_120                      33                     151   \n",
+       "CR_119                      12                      57   \n",
+       "\n",
+       "        chrY:21248553-21249961  chrY:21256824-21257260  \\\n",
+       "CR_124                      20                      50   \n",
+       "CR_122                      37                      29   \n",
+       "CR_121                       0                       0   \n",
+       "CR_120                      47                      50   \n",
+       "CR_119                      25                       7   \n",
+       "\n",
+       "        chrY:21259823-21260874  chrY:22086084-22086722  chrY:22499696-22500344  \n",
+       "CR_124                      21                       2                       2  \n",
+       "CR_122                      18                       9                      12  \n",
+       "CR_121                       0                       0                       0  \n",
+       "CR_120                      18                      32                      14  \n",
+       "CR_119                       8                       7                       7  \n",
+       "\n",
+       "[5 rows x 80400 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "251495e7-082f-45ae-841c-a2dd86a3cb15",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73ba54e0-4292-4d85-b208-e56e267d6797",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "4dab5019-9f54-4e32-be19-abbb1c71a2d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0289s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0004s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 157 observations\n",
+      "|-----------> There are 80400 features\n",
+      "|-----------> Total missing values: 0\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0049s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> No missing values found. No imputation necessary\n",
+      "|-----> ✅ Impute missing values finished [0.0053s]\n",
+      "|-----> 🎉 Done! [0.0419s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.preprocess.df_to_adata(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5042e04f-17c0-4eb2-8c5d-2c2fc5d6d2d6",
+   "metadata": {},
+   "source": [
+    "Note that the original DataFrame is stored in `X_original` under layers. is This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "503da312-2256-4e67-9747-107f5c4587ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 157 × 80400\n",
+       "    var: 'percent_na'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c072990d-0f54-49b3-bb7a-7bbd13301e2a",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5fe08978-f1ba-49b3-a0d8-52df4b6efb4e",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. For convenience, let's simply input all two clocks of interest at once. The function is invariant to the capitalization of the clock name. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "96e008fe-9f8c-45fb-8dc6-6a39f1ecb7ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: ocampoatac1\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/ocampoatac1.pt\n",
+      "|-----------> ✅ Load clock finished [0.5113s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_ocampoatac1]\n",
+      "|-----------> ✅ Check features in adata finished [3.8480s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is tpm_norm_log1p\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.1635s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: ocampoatac2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/ocampoatac2.pt\n",
+      "|-----------> ✅ Load clock finished [0.4514s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_ocampoatac2]\n",
+      "|-----------> ✅ Check features in adata finished [4.9598s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is tpm_norm_log1p\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0690s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🎉 Done! [10.1175s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, ['OcampoATAC1', 'OcampoATAC2'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b83a10e2-7984-4427-9e85-05329a16feb3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ocampoatac1</th>\n",
+       "      <th>ocampoatac2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>CR_124</th>\n",
+       "      <td>29.527124</td>\n",
+       "      <td>28.114206</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_122</th>\n",
+       "      <td>39.003097</td>\n",
+       "      <td>40.061162</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_121</th>\n",
+       "      <td>40.716008</td>\n",
+       "      <td>43.095199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_120</th>\n",
+       "      <td>32.380372</td>\n",
+       "      <td>33.033456</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_119</th>\n",
+       "      <td>36.440711</td>\n",
+       "      <td>38.301516</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        ocampoatac1  ocampoatac2\n",
+       "CR_124    29.527124    28.114206\n",
+       "CR_122    39.003097    40.061162\n",
+       "CR_121    40.716008    43.095199\n",
+       "CR_120    32.380372    33.033456\n",
+       "CR_119    36.440711    38.301516"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2cfa1e5-a7f4-4157-8c66-4afcc2323ef7",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "055761d9-7e22-49f3-a1db-31c3ed3749ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.data.download_example_data('GSE193140', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE193140.pkl')\n",
+    "adata = pya.preprocess.df_to_adata(df, verbose=False)\n",
+    "pya.pred.predict_age(adata, ['OcampoATAC1', 'OcampoATAC2'], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "fdd9d6c2-7f0a-4f96-a095-4a492ed73f8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ocampoatac1</th>\n",
+       "      <th>ocampoatac2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>CR_124</th>\n",
+       "      <td>29.527124</td>\n",
+       "      <td>28.114206</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_122</th>\n",
+       "      <td>39.003097</td>\n",
+       "      <td>40.061162</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_121</th>\n",
+       "      <td>40.716008</td>\n",
+       "      <td>43.095199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_120</th>\n",
+       "      <td>32.380372</td>\n",
+       "      <td>33.033456</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CR_119</th>\n",
+       "      <td>36.440711</td>\n",
+       "      <td>38.301516</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        ocampoatac1  ocampoatac2\n",
+       "CR_124    29.527124    28.114206\n",
+       "CR_122    39.003097    40.061162\n",
+       "CR_121    40.716008    43.095199\n",
+       "CR_120    32.380372    33.033456\n",
+       "CR_119    36.440711    38.301516"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bad3df8-f868-4cf5-be74-00ffd02c18f5",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "0d13fb55-8a12-4d28-83e9-ec7c9fbbe30c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 157 × 80400\n",
+       "    obs: 'ocampoatac1', 'ocampoatac2'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'ocampoatac1_percent_na', 'ocampoatac1_missing_features', 'ocampoatac1_metadata', 'ocampoatac2_percent_na', 'ocampoatac2_missing_features', 'ocampoatac2_metadata'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4e7ad8d-44ae-4ced-a626-f9e3b2d04114",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d7d089b0-5433-47a9-b031-bc4504c6b55d",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "6b368506-55d1-4b74-be61-817bcf575ade",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'ocampoatac1',\n",
+       " 'data_type': 'atac',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2023,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Morandini, Francesco, et al. \"ATAC-clock: An aging clock based on chromatin accessibility.\" GeroScience (2023): 1-18.',\n",
+       " 'doi': 'https://doi.org/10.1007/s11357-023-00986-0',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['ocampoatac1_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_bloodchemistry.ipynb b/docs/source/tutorials/tutorial_bloodchemistry.ipynb
new file mode 100644
index 0000000..3c25b1c
--- /dev/null
+++ b/docs/source/tutorials/tutorial_bloodchemistry.ipynb
@@ -0,0 +1,633 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2089cc5b-a025-4928-a331-ad33fd1b6a85",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_rnaseq.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_rnaseq.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31cf37ce-09ee-49d7-a411-719bf65e186e",
+   "metadata": {},
+   "source": [
+    "# Blood chemistry"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ea2b570-56af-4e4f-9606-d4c6d071554c",
+   "metadata": {},
+   "source": [
+    "This tutorial is a brief guide for the implementation of PhenoAge. Link to [paper](https://www.aging-us.com/article/101414/text)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a093c7d-dea7-4b34-91bf-08cde6c98011",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ad192191-e44f-4994-80ad-ab16cdb7c7e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d87488d5-731c-469e-ad6f-79c4c9662371",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c30471f-89e7-4e92-a176-aa3af14a5274",
+   "metadata": {},
+   "source": [
+    "Let's download some example human blood data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a0692cf7-e979-4f27-bc14-e1013057c16d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/blood_chemistry_example.pkl\n",
+      "|-----> 🎉 Done! [0.5248s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('blood_chemistry_example')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "13aeb69a-4b0e-40f2-8094-194c9a6b42a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/blood_chemistry_example.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0106112d-21ad-4991-af9f-74b92f46c55b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>albumin</th>\n",
+       "      <th>creatinine</th>\n",
+       "      <th>glucose</th>\n",
+       "      <th>log_crp</th>\n",
+       "      <th>lymphocyte_percent</th>\n",
+       "      <th>mean_cell_volume</th>\n",
+       "      <th>red_cell_distribution_width</th>\n",
+       "      <th>alkaline_phosphatase</th>\n",
+       "      <th>white_blood_cell_count</th>\n",
+       "      <th>age</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>patient1</th>\n",
+       "      <td>51.8</td>\n",
+       "      <td>87.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>-0.2</td>\n",
+       "      <td>27.9</td>\n",
+       "      <td>92.4</td>\n",
+       "      <td>13.9</td>\n",
+       "      <td>123.5</td>\n",
+       "      <td>0.006037</td>\n",
+       "      <td>70.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient2</th>\n",
+       "      <td>53.1</td>\n",
+       "      <td>57.3</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>-0.2</td>\n",
+       "      <td>27.8</td>\n",
+       "      <td>80.9</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>81.5</td>\n",
+       "      <td>0.004135</td>\n",
+       "      <td>76.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient3</th>\n",
+       "      <td>37.4</td>\n",
+       "      <td>114.7</td>\n",
+       "      <td>5.6</td>\n",
+       "      <td>-0.2</td>\n",
+       "      <td>23.6</td>\n",
+       "      <td>83.2</td>\n",
+       "      <td>12.4</td>\n",
+       "      <td>124.4</td>\n",
+       "      <td>0.007382</td>\n",
+       "      <td>66.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient4</th>\n",
+       "      <td>45.9</td>\n",
+       "      <td>88.1</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>-0.2</td>\n",
+       "      <td>38.6</td>\n",
+       "      <td>92.5</td>\n",
+       "      <td>11.4</td>\n",
+       "      <td>113.4</td>\n",
+       "      <td>0.006537</td>\n",
+       "      <td>46.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient5</th>\n",
+       "      <td>40.7</td>\n",
+       "      <td>45.4</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>-0.2</td>\n",
+       "      <td>38.3</td>\n",
+       "      <td>88.8</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>107.8</td>\n",
+       "      <td>0.004695</td>\n",
+       "      <td>42.3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          albumin  creatinine  glucose  log_crp  lymphocyte_percent  \\\n",
+       "patient1     51.8        87.2      4.5     -0.2                27.9   \n",
+       "patient2     53.1        57.3      6.1     -0.2                27.8   \n",
+       "patient3     37.4       114.7      5.6     -0.2                23.6   \n",
+       "patient4     45.9        88.1      5.4     -0.2                38.6   \n",
+       "patient5     40.7        45.4      4.7     -0.2                38.3   \n",
+       "\n",
+       "          mean_cell_volume  red_cell_distribution_width  alkaline_phosphatase  \\\n",
+       "patient1              92.4                         13.9                 123.5   \n",
+       "patient2              80.9                         12.0                  81.5   \n",
+       "patient3              83.2                         12.4                 124.4   \n",
+       "patient4              92.5                         11.4                 113.4   \n",
+       "patient5              88.8                         13.5                 107.8   \n",
+       "\n",
+       "          white_blood_cell_count   age  \n",
+       "patient1                0.006037  70.2  \n",
+       "patient2                0.004135  76.5  \n",
+       "patient3                0.007382  66.4  \n",
+       "patient4                0.006537  46.5  \n",
+       "patient5                0.004695  42.3  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45cbc6e1-9cf7-46a8-ac92-18924a7a5cf8",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae486006-b533-411b-b449-ff6d2261345a",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "acf93ebe-0440-4b1f-9040-05260df459f9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0029s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0006s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 30 observations\n",
+      "|-----------> There are 10 features\n",
+      "|-----------> Total missing values: 0\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0018s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> No missing values found. No imputation necessary\n",
+      "|-----> ✅ Impute missing values finished [0.0012s]\n",
+      "|-----> 🎉 Done! [0.0098s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.preprocess.df_to_adata(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "54dcb802-6dd7-40cc-ab61-073f70778754",
+   "metadata": {},
+   "source": [
+    "Note that the original DataFrame is stored in `X_original` under layers. is This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3cfcf1f4-01d8-4da2-81e9-fee50e051ffe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 30 × 10\n",
+       "    var: 'percent_na'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2277ede6-ab9e-487b-a58d-c01cb21b6b68",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "889d2d5f-a596-41d0-b849-560b6bc856a1",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. Given we only have one clock of interest for this tutorial, let's go with one. The function is invariant to the capitalization of the clock name. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2dbc7beb-79b8-4e99-b36f-36bcd693c864",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0011s]\n",
+      "|-----> 🕒 Processing clock: phenoage\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/phenoage.pt\n",
+      "|-----------> ✅ Load clock finished [0.4217s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_phenoage]\n",
+      "|-----------> ✅ Check features in adata finished [0.0050s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mortality_to_phenoage\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0037s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0020s]\n",
+      "|-----> 🎉 Done! [0.5048s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, 'PhenoAge')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "032382f5-7d98-465e-a3cb-51165eeb7025",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>phenoage</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>patient1</th>\n",
+       "      <td>70.643137</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient2</th>\n",
+       "      <td>64.834061</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient3</th>\n",
+       "      <td>70.258559</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient4</th>\n",
+       "      <td>42.979385</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient5</th>\n",
+       "      <td>41.677749</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           phenoage\n",
+       "patient1  70.643137\n",
+       "patient2  64.834061\n",
+       "patient3  70.258559\n",
+       "patient4  42.979385\n",
+       "patient5  41.677749"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2acc80b1-f936-40e4-900a-ef4deb304558",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a587f129-a88b-46ec-a249-ac62737a0cb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.data.download_example_data('blood_chemistry_example', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/blood_chemistry_example.pkl')\n",
+    "adata = pya.preprocess.df_to_adata(df, verbose=False)\n",
+    "pya.pred.predict_age(adata, ['PhenoAge'], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "99fbe406-d076-4979-a2f4-70469755937f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>phenoage</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>patient1</th>\n",
+       "      <td>70.643137</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient2</th>\n",
+       "      <td>64.834061</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient3</th>\n",
+       "      <td>70.258559</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient4</th>\n",
+       "      <td>42.979385</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>patient5</th>\n",
+       "      <td>41.677749</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           phenoage\n",
+       "patient1  70.643137\n",
+       "patient2  64.834061\n",
+       "patient3  70.258559\n",
+       "patient4  42.979385\n",
+       "patient5  41.677749"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72f0eb22-76f2-41b5-b20f-824548215122",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a778028a-7ee6-419c-9be6-e7046a9d8f9a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 30 × 10\n",
+       "    obs: 'phenoage'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'phenoage_percent_na', 'phenoage_missing_features', 'phenoage_metadata'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a73e164-a610-4cb6-93f5-6f8ac7d8d56f",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6c7a070c-c448-4ad7-ae0b-21857dafd00e",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "9908d25a-9639-4684-9da6-353c7eb4a555",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'phenoage',\n",
+       " 'data_type': 'blood chemistry',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2018,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Levine, Morgan E., et al. \"An epigenetic biomarker of aging for lifespan and healthspan.\" Aging (albany NY) 10.4 (2018): 573.',\n",
+       " 'doi': 'https://doi.org/10.18632%2Faging.101414',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['phenoage_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_dnam_illumina_human_array.ipynb b/docs/source/tutorials/tutorial_dnam_illumina_human_array.ipynb
new file mode 100644
index 0000000..9a9d827
--- /dev/null
+++ b/docs/source/tutorials/tutorial_dnam_illumina_human_array.ipynb
@@ -0,0 +1,1186 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4e690b3c-4dec-450e-a7f8-f63987e60cdb",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62e1ac68-927d-4ca8-a2ab-bd99a7ee52ab",
+   "metadata": {},
+   "source": [
+    "# Illumina Human Methylation Arrays"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41699e8b-e682-4617-867c-fd0a624ae0ef",
+   "metadata": {},
+   "source": [
+    "This tutorial is a brief guide for the implementation of an array of bulk DNA-methylation epigenetic clocks that predict age in humans. In this notebook, we will demonstrate the breadth of epigenetic clock models available in `pyaging` by showing:\n",
+    "\n",
+    "- Horvath's 2013 ElasticNet-based clock ([paper](https://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-10-r115));\n",
+    "  \n",
+    "- AltumAge, a highly accurate deep-learning based clock ([paper](https://www.nature.com/articles/s41514-022-00085-y));\n",
+    "    \n",
+    "- PCGrimAge, a principal-component based version of the GrimAge clock ([paper](https://www.nature.com/articles/s43587-022-00248-2));\n",
+    "\n",
+    "- GrimAge2, the latest version of GrimAge ([paper](https://www.aging-us.com/article/204434/text]));\n",
+    "\n",
+    "- DunedinPACE, a biomarker of the pace of aging ([paper](https://elifesciences.org/articles/73420))."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8ddf439-0b19-4a1a-9491-3a0e1ee94447",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dc347ae0-41ae-46ac-ba50-08cacd4c9241",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3eebcc0-d3a2-42a4-b87f-1637d0db2588",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "096cb9c0-67d1-4a37-958e-13da15b15435",
+   "metadata": {},
+   "source": [
+    "Let's download the publicly avaiable dataset GSE139307 with Illumina's 450k array. The CpG coverage of the 450k array should be good enough for most clocks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4b2680c6-0c91-4d6a-bd12-6f9cc06932f7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/GSE139307.pkl\n",
+      "|-----> 🎉 Done! [0.0006s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE139307')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3192d1bd-f5d6-426c-8c65-29971e46c4b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/GSE139307.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ddf9e1d5-ddb1-42e2-9dfc-9cf4441b326d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>dataset</th>\n",
+       "      <th>tissue_type</th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>cg00000029</th>\n",
+       "      <th>cg00000108</th>\n",
+       "      <th>cg00000109</th>\n",
+       "      <th>cg00000165</th>\n",
+       "      <th>cg00000236</th>\n",
+       "      <th>cg00000289</th>\n",
+       "      <th>...</th>\n",
+       "      <th>ch.X.93511680F</th>\n",
+       "      <th>ch.X.938089F</th>\n",
+       "      <th>ch.X.94051109R</th>\n",
+       "      <th>ch.X.94260649R</th>\n",
+       "      <th>ch.X.967194F</th>\n",
+       "      <th>ch.X.97129969R</th>\n",
+       "      <th>ch.X.97133160R</th>\n",
+       "      <th>ch.X.97651759F</th>\n",
+       "      <th>ch.X.97737721F</th>\n",
+       "      <th>ch.X.98007042R</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM4137709</th>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>84.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0.084811</td>\n",
+       "      <td>0.920696</td>\n",
+       "      <td>0.856851</td>\n",
+       "      <td>0.084567</td>\n",
+       "      <td>0.838699</td>\n",
+       "      <td>0.247273</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.061751</td>\n",
+       "      <td>0.045942</td>\n",
+       "      <td>0.037631</td>\n",
+       "      <td>0.056455</td>\n",
+       "      <td>0.249872</td>\n",
+       "      <td>0.049022</td>\n",
+       "      <td>0.085691</td>\n",
+       "      <td>0.037435</td>\n",
+       "      <td>0.077820</td>\n",
+       "      <td>0.106234</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137710</th>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0.099626</td>\n",
+       "      <td>0.919073</td>\n",
+       "      <td>0.890024</td>\n",
+       "      <td>0.115541</td>\n",
+       "      <td>0.852584</td>\n",
+       "      <td>0.198103</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.075077</td>\n",
+       "      <td>0.041849</td>\n",
+       "      <td>0.032573</td>\n",
+       "      <td>0.089790</td>\n",
+       "      <td>0.250245</td>\n",
+       "      <td>0.079095</td>\n",
+       "      <td>0.079756</td>\n",
+       "      <td>0.046229</td>\n",
+       "      <td>0.091256</td>\n",
+       "      <td>0.120241</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137711</th>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0.117228</td>\n",
+       "      <td>0.920276</td>\n",
+       "      <td>0.894317</td>\n",
+       "      <td>0.117127</td>\n",
+       "      <td>0.839258</td>\n",
+       "      <td>0.213410</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.068679</td>\n",
+       "      <td>0.049515</td>\n",
+       "      <td>0.058097</td>\n",
+       "      <td>0.079919</td>\n",
+       "      <td>0.299758</td>\n",
+       "      <td>0.079305</td>\n",
+       "      <td>0.089815</td>\n",
+       "      <td>0.065364</td>\n",
+       "      <td>0.086864</td>\n",
+       "      <td>0.156005</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137712</th>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0.077096</td>\n",
+       "      <td>0.910204</td>\n",
+       "      <td>0.908400</td>\n",
+       "      <td>0.073885</td>\n",
+       "      <td>0.861615</td>\n",
+       "      <td>0.163276</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.070091</td>\n",
+       "      <td>0.033289</td>\n",
+       "      <td>0.038836</td>\n",
+       "      <td>0.108213</td>\n",
+       "      <td>0.295428</td>\n",
+       "      <td>0.050731</td>\n",
+       "      <td>0.099943</td>\n",
+       "      <td>0.047597</td>\n",
+       "      <td>0.078480</td>\n",
+       "      <td>0.107480</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137713</th>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0.063524</td>\n",
+       "      <td>0.911608</td>\n",
+       "      <td>0.884643</td>\n",
+       "      <td>0.079877</td>\n",
+       "      <td>0.864654</td>\n",
+       "      <td>0.176169</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.082368</td>\n",
+       "      <td>0.038411</td>\n",
+       "      <td>0.048787</td>\n",
+       "      <td>0.088631</td>\n",
+       "      <td>0.316694</td>\n",
+       "      <td>0.041873</td>\n",
+       "      <td>0.079303</td>\n",
+       "      <td>0.048823</td>\n",
+       "      <td>0.089010</td>\n",
+       "      <td>0.117903</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 485516 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              dataset tissue_type   age gender  cg00000029  cg00000108  \\\n",
+       "GSM4137709  GSE139307       sperm  84.0      M    0.084811    0.920696   \n",
+       "GSM4137710  GSE139307       sperm  69.0      M    0.099626    0.919073   \n",
+       "GSM4137711  GSE139307       sperm  69.0      M    0.117228    0.920276   \n",
+       "GSM4137712  GSE139307       sperm  69.0      M    0.077096    0.910204   \n",
+       "GSM4137713  GSE139307       sperm  67.0      M    0.063524    0.911608   \n",
+       "\n",
+       "            cg00000109  cg00000165  cg00000236  cg00000289  ...  \\\n",
+       "GSM4137709    0.856851    0.084567    0.838699    0.247273  ...   \n",
+       "GSM4137710    0.890024    0.115541    0.852584    0.198103  ...   \n",
+       "GSM4137711    0.894317    0.117127    0.839258    0.213410  ...   \n",
+       "GSM4137712    0.908400    0.073885    0.861615    0.163276  ...   \n",
+       "GSM4137713    0.884643    0.079877    0.864654    0.176169  ...   \n",
+       "\n",
+       "            ch.X.93511680F  ch.X.938089F  ch.X.94051109R  ch.X.94260649R  \\\n",
+       "GSM4137709        0.061751      0.045942        0.037631        0.056455   \n",
+       "GSM4137710        0.075077      0.041849        0.032573        0.089790   \n",
+       "GSM4137711        0.068679      0.049515        0.058097        0.079919   \n",
+       "GSM4137712        0.070091      0.033289        0.038836        0.108213   \n",
+       "GSM4137713        0.082368      0.038411        0.048787        0.088631   \n",
+       "\n",
+       "            ch.X.967194F  ch.X.97129969R  ch.X.97133160R  ch.X.97651759F  \\\n",
+       "GSM4137709      0.249872        0.049022        0.085691        0.037435   \n",
+       "GSM4137710      0.250245        0.079095        0.079756        0.046229   \n",
+       "GSM4137711      0.299758        0.079305        0.089815        0.065364   \n",
+       "GSM4137712      0.295428        0.050731        0.099943        0.047597   \n",
+       "GSM4137713      0.316694        0.041873        0.079303        0.048823   \n",
+       "\n",
+       "            ch.X.97737721F  ch.X.98007042R  \n",
+       "GSM4137709        0.077820        0.106234  \n",
+       "GSM4137710        0.091256        0.120241  \n",
+       "GSM4137711        0.086864        0.156005  \n",
+       "GSM4137712        0.078480        0.107480  \n",
+       "GSM4137713        0.089010        0.117903  \n",
+       "\n",
+       "[5 rows x 485516 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cfbebcff-687e-4199-8b8a-6712577cebc0",
+   "metadata": {},
+   "source": [
+    "For PCGrimAge and GrimAge2, both age and sex are features. Therefore, to get the full prediction, let's convert the column `gender` into a column called `female`, with 1 being female and 0 being male."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "671906c4-ae7e-4808-859a-470e19757477",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# needs only numerical data (doesn't work with strings)\n",
+    "df['female'] = (df['gender'] == 'F').astype(int)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "125e979c-3149-4e00-9a78-820b9c89c0ef",
+   "metadata": {},
+   "source": [
+    "Moreover, it is important to note that some probes are duplicated in the EPICv2 array, following the format cg#########_BC11 and cg#########_TC11 for the opposite strands. Given that at this moment most clocks have not been trained with EPICv2 data directly, it is recommended to average these probes. This is particularly the case for DunedinPACE, from which some clock probes were duplicated in the update from EPICv1. To remedy this issue, simply use the following function to aggregate any duplicated probes that may be present."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cdb19129-49b3-4b81-b264-73ccbc7943c3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting epicv2_probe_aggregation function\n",
+      "|-----> ⚙️ Looking for duplicated probes started\n",
+      "|-----------> in progress: 100.0000%\n",
+      "|-----------> There are no duplicated probes. Returning original data\n",
+      "|-----> 🎉 Done! [7.6244s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = pya.pp.epicv2_probe_aggregation(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "798b8998-dc50-44ac-bc4e-c97c4f1ec183",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8269bff-fadf-4524-801d-6294655c005b",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "037192c8-ef2e-440a-88ae-06bddbab80a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----------? Dropping 1 columns with only NAs: ['cg01550828'], etc.\n",
+      "|-----> ⚠️ Create anndata object finished [0.3588s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------> Adding provided metadata to adata.obs\n",
+      "|-----> ✅ Add metadata to anndata finished [0.0007s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 37 observations\n",
+      "|-----------> There are 485513 features\n",
+      "|-----------> Total missing values: 489\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0203s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> Imputing missing values using knn strategy\n",
+      "|-----> ✅ Impute missing values finished [5.1869s]\n",
+      "|-----> ⚙️ Add imputer strategy to adata.uns started\n",
+      "|-----> ✅ Add imputer strategy to adata.uns finished [0.0002s]\n",
+      "|-----> 🎉 Done! [5.6547s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.pp.df_to_adata(df, metadata_cols=['gender', 'tissue_type', 'dataset'], imputer_strategy='knn')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f4e479d-0b80-4079-9cd5-79f7d5130ba4",
+   "metadata": {},
+   "source": [
+    "Note that the original DataFrame is stored in `X_original` under layers. is This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3259b596-5679-42c1-967b-5297f1612cf4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 37 × 485513\n",
+       "    obs: 'gender', 'tissue_type', 'dataset'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'imputer_strategy'\n",
+       "    layers: 'X_original', 'X_imputed'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bff7c621-f7b7-4fc0-9c91-6f82a9211e8b",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07f5a0ae-1901-4f49-af64-320974584231",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. For convenience, let's simply input all four clocks of interest at once. The function is invariant to the capitalization of the clock name. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "5383ef0f-ff8b-4e41-bbb6-7fd7ff6cc6be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0008s]\n",
+      "|-----> 🕒 Processing clock: horvath2013\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Downloading data to pyaging_data/horvath2013.pt\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Load clock finished [0.5065s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_horvath2013]\n",
+      "|-----------> ✅ Check features in adata finished [0.0667s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is anti_log_linear\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0108s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: altumage\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Downloading data to pyaging_data/altumage.pt\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Load clock finished [6.2463s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_altumage]\n",
+      "|-----------> ✅ Check features in adata finished [1.8184s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is scale\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0105s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: pcgrimage\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Downloading data to pyaging_data/pcgrimage.pt\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Load clock finished [173.4044s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_pcgrimage]\n",
+      "|-----------> ✅ Check features in adata finished [7.7016s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.1926s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: grimage2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Downloading data to pyaging_data/grimage2.pt\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Load clock finished [0.6879s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_grimage2]\n",
+      "|-----------> ✅ Check features in adata finished [0.1193s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is cox_to_years\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0028s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: dunedinpace\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Downloading data to pyaging_data/dunedinpace.pt\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Load clock finished [0.8928s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_dunedinpace]\n",
+      "|-----------> ✅ Check features in adata finished [2.4793s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is quantile_normalization_with_gold_standard\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0608s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🎉 Done! [197.5751s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, ['Horvath2013', 'AltumAge', 'PCGrimAge', 'GrimAge2', 'DunedinPACE'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9c0c6926-2944-4274-aefa-eb099b8e6737",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gender</th>\n",
+       "      <th>tissue_type</th>\n",
+       "      <th>dataset</th>\n",
+       "      <th>horvath2013</th>\n",
+       "      <th>altumage</th>\n",
+       "      <th>pcgrimage</th>\n",
+       "      <th>grimage2</th>\n",
+       "      <th>dunedinpace</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM4137709</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>33.624776</td>\n",
+       "      <td>37.007213</td>\n",
+       "      <td>95.506114</td>\n",
+       "      <td>77.581057</td>\n",
+       "      <td>1.326327</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137710</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>28.829344</td>\n",
+       "      <td>29.426899</td>\n",
+       "      <td>83.934244</td>\n",
+       "      <td>65.926346</td>\n",
+       "      <td>1.215611</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137711</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>28.316545</td>\n",
+       "      <td>22.798928</td>\n",
+       "      <td>82.709334</td>\n",
+       "      <td>63.358341</td>\n",
+       "      <td>1.271091</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137712</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>24.850630</td>\n",
+       "      <td>18.079173</td>\n",
+       "      <td>84.269462</td>\n",
+       "      <td>60.218880</td>\n",
+       "      <td>1.276866</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137713</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>25.942111</td>\n",
+       "      <td>20.071985</td>\n",
+       "      <td>84.356985</td>\n",
+       "      <td>61.235919</td>\n",
+       "      <td>1.262023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           gender tissue_type    dataset  horvath2013   altumage  pcgrimage  \\\n",
+       "GSM4137709      M       sperm  GSE139307    33.624776  37.007213  95.506114   \n",
+       "GSM4137710      M       sperm  GSE139307    28.829344  29.426899  83.934244   \n",
+       "GSM4137711      M       sperm  GSE139307    28.316545  22.798928  82.709334   \n",
+       "GSM4137712      M       sperm  GSE139307    24.850630  18.079173  84.269462   \n",
+       "GSM4137713      M       sperm  GSE139307    25.942111  20.071985  84.356985   \n",
+       "\n",
+       "             grimage2  dunedinpace  \n",
+       "GSM4137709  77.581057     1.326327  \n",
+       "GSM4137710  65.926346     1.215611  \n",
+       "GSM4137711  63.358341     1.271091  \n",
+       "GSM4137712  60.218880     1.276866  \n",
+       "GSM4137713  61.235919     1.262023  "
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0274db5f-c66d-4b74-a4f2-d5e40ed2824e",
+   "metadata": {},
+   "source": [
+    "For curiosity, we can also check if there are any correlations amongst these clocks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "bbe696da-df04-4e28-b72a-b70bf6a9bffc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>horvath2013</th>\n",
+       "      <th>altumage</th>\n",
+       "      <th>pcgrimage</th>\n",
+       "      <th>grimage2</th>\n",
+       "      <th>dunedinpace</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>horvath2013</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.676242</td>\n",
+       "      <td>0.211881</td>\n",
+       "      <td>0.459193</td>\n",
+       "      <td>0.354771</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>altumage</th>\n",
+       "      <td>0.676242</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.156456</td>\n",
+       "      <td>0.440044</td>\n",
+       "      <td>0.164101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pcgrimage</th>\n",
+       "      <td>0.211881</td>\n",
+       "      <td>0.156456</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.859490</td>\n",
+       "      <td>0.061491</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>grimage2</th>\n",
+       "      <td>0.459193</td>\n",
+       "      <td>0.440044</td>\n",
+       "      <td>0.859490</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.183725</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>dunedinpace</th>\n",
+       "      <td>0.354771</td>\n",
+       "      <td>0.164101</td>\n",
+       "      <td>0.061491</td>\n",
+       "      <td>0.183725</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             horvath2013  altumage  pcgrimage  grimage2  dunedinpace\n",
+       "horvath2013     1.000000  0.676242   0.211881  0.459193     0.354771\n",
+       "altumage        0.676242  1.000000   0.156456  0.440044     0.164101\n",
+       "pcgrimage       0.211881  0.156456   1.000000  0.859490     0.061491\n",
+       "grimage2        0.459193  0.440044   0.859490  1.000000     0.183725\n",
+       "dunedinpace     0.354771  0.164101   0.061491  0.183725     1.000000"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.iloc[:, 3:].corr('pearson')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "069520e6-0ffe-43d5-a7c1-c3f726b0a1ac",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "7009e4a4-90c6-4c4f-9bbd-bcb3e3836eee",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gender</th>\n",
+       "      <th>tissue_type</th>\n",
+       "      <th>dataset</th>\n",
+       "      <th>horvath2013</th>\n",
+       "      <th>altumage</th>\n",
+       "      <th>pcgrimage</th>\n",
+       "      <th>grimage2</th>\n",
+       "      <th>dunedinpace</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM4137709</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>33.624776</td>\n",
+       "      <td>37.007213</td>\n",
+       "      <td>95.505780</td>\n",
+       "      <td>77.581057</td>\n",
+       "      <td>1.326308</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137710</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>28.829344</td>\n",
+       "      <td>29.426899</td>\n",
+       "      <td>83.934244</td>\n",
+       "      <td>65.926346</td>\n",
+       "      <td>1.215614</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137711</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>28.316545</td>\n",
+       "      <td>22.805551</td>\n",
+       "      <td>82.709334</td>\n",
+       "      <td>63.358341</td>\n",
+       "      <td>1.271033</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137712</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>24.850630</td>\n",
+       "      <td>18.060107</td>\n",
+       "      <td>84.269462</td>\n",
+       "      <td>60.218880</td>\n",
+       "      <td>1.276866</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4137713</th>\n",
+       "      <td>M</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>GSE139307</td>\n",
+       "      <td>25.942111</td>\n",
+       "      <td>20.071985</td>\n",
+       "      <td>84.356985</td>\n",
+       "      <td>61.235919</td>\n",
+       "      <td>1.262023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           gender tissue_type    dataset  horvath2013   altumage  pcgrimage  \\\n",
+       "GSM4137709      M       sperm  GSE139307    33.624776  37.007213  95.505780   \n",
+       "GSM4137710      M       sperm  GSE139307    28.829344  29.426899  83.934244   \n",
+       "GSM4137711      M       sperm  GSE139307    28.316545  22.805551  82.709334   \n",
+       "GSM4137712      M       sperm  GSE139307    24.850630  18.060107  84.269462   \n",
+       "GSM4137713      M       sperm  GSE139307    25.942111  20.071985  84.356985   \n",
+       "\n",
+       "             grimage2  dunedinpace  \n",
+       "GSM4137709  77.581057     1.326308  \n",
+       "GSM4137710  65.926346     1.215614  \n",
+       "GSM4137711  63.358341     1.271033  \n",
+       "GSM4137712  60.218880     1.276866  \n",
+       "GSM4137713  61.235919     1.262023  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE139307', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE139307.pkl')\n",
+    "df['female'] = (df['gender'] == 'F').astype(int)\n",
+    "df = pya.pp.epicv2_probe_aggregation(df, verbose=False)\n",
+    "adata = pya.preprocess.df_to_adata(df, metadata_cols=['gender', 'tissue_type', 'dataset'], imputer_strategy='mean', verbose=False)\n",
+    "pya.pred.predict_age(adata, ['Horvath2013', 'AltumAge', 'PCGrimAge', 'GrimAge2', 'DunedinPACE'], verbose=False)\n",
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6f246126-0be1-47b2-86f4-c81dcc752da0",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "14649c41-167e-4771-a4de-1ed2ae72eb51",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 37 × 485513\n",
+       "    obs: 'gender', 'tissue_type', 'dataset', 'horvath2013', 'altumage', 'pcgrimage', 'grimage2', 'dunedinpace'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'imputer_strategy', 'horvath2013_percent_na', 'horvath2013_missing_features', 'horvath2013_metadata', 'altumage_percent_na', 'altumage_missing_features', 'altumage_metadata', 'pcgrimage_percent_na', 'pcgrimage_missing_features', 'pcgrimage_metadata', 'grimage2_percent_na', 'grimage2_missing_features', 'grimage2_metadata', 'dunedinpace_percent_na', 'dunedinpace_missing_features', 'dunedinpace_metadata'\n",
+       "    layers: 'X_original', 'X_imputed'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ba818dde-1561-4184-9c54-3aedc5c8de8e",
+   "metadata": {},
+   "source": [
+    "We can also look at which features seem to be missing from each clock (if there are any)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "083b2f80-c191-4f84-abcc-d18aa9659e99",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['dunedinpace_missing_features']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "123ea2ce-8db1-492d-9d13-c57447030ad8",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99ad630f-a3b1-4cf7-a180-b8b56bd548e1",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7d2d10dc-4ffe-4940-a7f1-2041b933f7b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'horvath2013',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2013,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Horvath, Steve. \"DNA methylation age of human tissues and cell types.\" Genome biology 14.10 (2013): 1-20.',\n",
+       " 'doi': 'https://doi.org/10.1186/gb-2013-14-10-r115',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['horvath2013_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "e2a5311a-ed7d-4e1b-9fbb-b4ad676ce9da",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'altumage',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2022,\n",
+       " 'approved_by_author': '✅',\n",
+       " 'citation': 'de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.',\n",
+       " 'doi': 'https://doi.org/10.1038/s41514-022-00085-y',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['altumage_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "0bbefc0c-acc8-47db-84dc-5ebe80d08500",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'pcgrimage',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2022,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661.',\n",
+       " 'doi': 'https://doi.org/10.1038/s43587-022-00248-2',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['pcgrimage_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "9ddfb7c3-83ef-44a6-ace3-ffd5553a5770",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'grimage2',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2022,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Lu, Ake T., et al. \"DNA methylation GrimAge version 2.\" Aging (Albany NY) 14.23 (2022): 9484.',\n",
+       " 'doi': 'https://doi.org/10.18632/aging.204434',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['grimage2_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "54497c7f-e1cb-4dd4-815a-182af52155b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'dunedinpace',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2022,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Belsky, Daniel W., et al. \"DunedinPACE, a DNA methylation biomarker of the pace of aging.\" Elife 11 (2022): e73420.',\n",
+       " 'doi': 'https://doi.org/10.7554/eLife.73420',\n",
+       " 'notes': \"The automatic failure if fewer than 80% of the CpG probes are available is not implemented and left to the user's discretion.\",\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['dunedinpace_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_dnam_illumina_mammalian_array.ipynb b/docs/source/tutorials/tutorial_dnam_illumina_mammalian_array.ipynb
new file mode 100644
index 0000000..777e70d
--- /dev/null
+++ b/docs/source/tutorials/tutorial_dnam_illumina_mammalian_array.ipynb
@@ -0,0 +1,1616 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4e690b3c-4dec-450e-a7f8-f63987e60cdb",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62e1ac68-927d-4ca8-a2ab-bd99a7ee52ab",
+   "metadata": {},
+   "source": [
+    "# Illumina Mammalian Methylation Arrays"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8ddf439-0b19-4a1a-9491-3a0e1ee94447",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dc347ae0-41ae-46ac-ba50-08cacd4c9241",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5e6b7de-7f34-4ee5-935a-00a0e2fb8945",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c6ba751-380e-4b91-b4a2-c20e05711380",
+   "metadata": {},
+   "source": [
+    "Let's download the publicly avaiable dataset GSE223748 with Illumina's Mammalian Methylation array. The CpG coverage of the this array (~37k) spans highly conserved CpG sequences. Let's download a subset of that data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7a34a4cc-1756-485d-bec6-305693eea35b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/GSE223748_subset.pkl\n",
+      "|-----> 🎉 Done! [0.5310s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE223748')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6e929219-e691-4171-911e-46143ae94898",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/GSE223748_subset.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "03141fc7-f175-4ad3-86ed-22d51db5cadd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cg00000165</th>\n",
+       "      <th>cg00001209</th>\n",
+       "      <th>cg00001364</th>\n",
+       "      <th>cg00001582</th>\n",
+       "      <th>cg00002920</th>\n",
+       "      <th>cg00003994</th>\n",
+       "      <th>cg00004555</th>\n",
+       "      <th>cg00005112</th>\n",
+       "      <th>cg00005271</th>\n",
+       "      <th>cg00006213</th>\n",
+       "      <th>...</th>\n",
+       "      <th>rs7746156_II_F_C_37550</th>\n",
+       "      <th>rs798149_II_F_C_37528</th>\n",
+       "      <th>rs845016_II_F_C_37529</th>\n",
+       "      <th>rs877309_II_F_C_37552</th>\n",
+       "      <th>rs9292570_I_F_C_37499</th>\n",
+       "      <th>rs9363764_II_F_C_37541</th>\n",
+       "      <th>rs939290_II_F_C_37535</th>\n",
+       "      <th>rs951295_I_F_C_37507</th>\n",
+       "      <th>rs966367_II_F_C_37551</th>\n",
+       "      <th>rs9839873_II_F_C_37532</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>204509080002_R01C02</th>\n",
+       "      <td>0.094879</td>\n",
+       "      <td>0.916154</td>\n",
+       "      <td>0.890314</td>\n",
+       "      <td>0.053583</td>\n",
+       "      <td>0.490381</td>\n",
+       "      <td>0.034852</td>\n",
+       "      <td>0.159705</td>\n",
+       "      <td>0.763959</td>\n",
+       "      <td>0.973245</td>\n",
+       "      <td>0.928975</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.488592</td>\n",
+       "      <td>0.491361</td>\n",
+       "      <td>0.480024</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>0.484252</td>\n",
+       "      <td>0.489448</td>\n",
+       "      <td>0.505585</td>\n",
+       "      <td>0.505335</td>\n",
+       "      <td>0.485003</td>\n",
+       "      <td>0.510081</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202897220142_R04C02</th>\n",
+       "      <td>0.497077</td>\n",
+       "      <td>0.441263</td>\n",
+       "      <td>0.915314</td>\n",
+       "      <td>0.047339</td>\n",
+       "      <td>0.651029</td>\n",
+       "      <td>0.037774</td>\n",
+       "      <td>0.082634</td>\n",
+       "      <td>0.415800</td>\n",
+       "      <td>0.702857</td>\n",
+       "      <td>0.821715</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.508102</td>\n",
+       "      <td>0.500299</td>\n",
+       "      <td>0.507261</td>\n",
+       "      <td>0.490684</td>\n",
+       "      <td>0.499673</td>\n",
+       "      <td>0.497256</td>\n",
+       "      <td>0.564106</td>\n",
+       "      <td>0.482151</td>\n",
+       "      <td>0.486667</td>\n",
+       "      <td>0.505236</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204529320092_R01C02</th>\n",
+       "      <td>0.321141</td>\n",
+       "      <td>0.834158</td>\n",
+       "      <td>0.881194</td>\n",
+       "      <td>0.056124</td>\n",
+       "      <td>0.688350</td>\n",
+       "      <td>0.030225</td>\n",
+       "      <td>0.086776</td>\n",
+       "      <td>0.777588</td>\n",
+       "      <td>0.974587</td>\n",
+       "      <td>0.923934</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.520404</td>\n",
+       "      <td>0.509568</td>\n",
+       "      <td>0.507549</td>\n",
+       "      <td>0.501659</td>\n",
+       "      <td>0.492823</td>\n",
+       "      <td>0.487243</td>\n",
+       "      <td>0.516018</td>\n",
+       "      <td>0.471244</td>\n",
+       "      <td>0.491066</td>\n",
+       "      <td>0.491759</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202794570004_R02C01</th>\n",
+       "      <td>0.495226</td>\n",
+       "      <td>0.924121</td>\n",
+       "      <td>0.915812</td>\n",
+       "      <td>0.050866</td>\n",
+       "      <td>0.688335</td>\n",
+       "      <td>0.032344</td>\n",
+       "      <td>0.113318</td>\n",
+       "      <td>0.872094</td>\n",
+       "      <td>0.969189</td>\n",
+       "      <td>0.917076</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.499314</td>\n",
+       "      <td>0.516132</td>\n",
+       "      <td>0.487009</td>\n",
+       "      <td>0.487146</td>\n",
+       "      <td>0.469119</td>\n",
+       "      <td>0.495125</td>\n",
+       "      <td>0.548238</td>\n",
+       "      <td>0.512283</td>\n",
+       "      <td>0.514257</td>\n",
+       "      <td>0.492520</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203531420070_R05C02</th>\n",
+       "      <td>0.183954</td>\n",
+       "      <td>0.934332</td>\n",
+       "      <td>0.924153</td>\n",
+       "      <td>0.055032</td>\n",
+       "      <td>0.717495</td>\n",
+       "      <td>0.037108</td>\n",
+       "      <td>0.098632</td>\n",
+       "      <td>0.859614</td>\n",
+       "      <td>0.973422</td>\n",
+       "      <td>0.963446</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.501432</td>\n",
+       "      <td>0.509412</td>\n",
+       "      <td>0.485055</td>\n",
+       "      <td>0.497272</td>\n",
+       "      <td>0.480637</td>\n",
+       "      <td>0.467502</td>\n",
+       "      <td>0.494246</td>\n",
+       "      <td>0.500924</td>\n",
+       "      <td>0.531334</td>\n",
+       "      <td>0.503709</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 37554 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     cg00000165  cg00001209  cg00001364  cg00001582  \\\n",
+       "204509080002_R01C02    0.094879    0.916154    0.890314    0.053583   \n",
+       "202897220142_R04C02    0.497077    0.441263    0.915314    0.047339   \n",
+       "204529320092_R01C02    0.321141    0.834158    0.881194    0.056124   \n",
+       "202794570004_R02C01    0.495226    0.924121    0.915812    0.050866   \n",
+       "203531420070_R05C02    0.183954    0.934332    0.924153    0.055032   \n",
+       "\n",
+       "                     cg00002920  cg00003994  cg00004555  cg00005112  \\\n",
+       "204509080002_R01C02    0.490381    0.034852    0.159705    0.763959   \n",
+       "202897220142_R04C02    0.651029    0.037774    0.082634    0.415800   \n",
+       "204529320092_R01C02    0.688350    0.030225    0.086776    0.777588   \n",
+       "202794570004_R02C01    0.688335    0.032344    0.113318    0.872094   \n",
+       "203531420070_R05C02    0.717495    0.037108    0.098632    0.859614   \n",
+       "\n",
+       "                     cg00005271  cg00006213  ...  rs7746156_II_F_C_37550  \\\n",
+       "204509080002_R01C02    0.973245    0.928975  ...                0.488592   \n",
+       "202897220142_R04C02    0.702857    0.821715  ...                0.508102   \n",
+       "204529320092_R01C02    0.974587    0.923934  ...                0.520404   \n",
+       "202794570004_R02C01    0.969189    0.917076  ...                0.499314   \n",
+       "203531420070_R05C02    0.973422    0.963446  ...                0.501432   \n",
+       "\n",
+       "                     rs798149_II_F_C_37528  rs845016_II_F_C_37529  \\\n",
+       "204509080002_R01C02               0.491361               0.480024   \n",
+       "202897220142_R04C02               0.500299               0.507261   \n",
+       "204529320092_R01C02               0.509568               0.507549   \n",
+       "202794570004_R02C01               0.516132               0.487009   \n",
+       "203531420070_R05C02               0.509412               0.485055   \n",
+       "\n",
+       "                     rs877309_II_F_C_37552  rs9292570_I_F_C_37499  \\\n",
+       "204509080002_R01C02               0.500000               0.484252   \n",
+       "202897220142_R04C02               0.490684               0.499673   \n",
+       "204529320092_R01C02               0.501659               0.492823   \n",
+       "202794570004_R02C01               0.487146               0.469119   \n",
+       "203531420070_R05C02               0.497272               0.480637   \n",
+       "\n",
+       "                     rs9363764_II_F_C_37541  rs939290_II_F_C_37535  \\\n",
+       "204509080002_R01C02                0.489448               0.505585   \n",
+       "202897220142_R04C02                0.497256               0.564106   \n",
+       "204529320092_R01C02                0.487243               0.516018   \n",
+       "202794570004_R02C01                0.495125               0.548238   \n",
+       "203531420070_R05C02                0.467502               0.494246   \n",
+       "\n",
+       "                     rs951295_I_F_C_37507  rs966367_II_F_C_37551  \\\n",
+       "204509080002_R01C02              0.505335               0.485003   \n",
+       "202897220142_R04C02              0.482151               0.486667   \n",
+       "204529320092_R01C02              0.471244               0.491066   \n",
+       "202794570004_R02C01              0.512283               0.514257   \n",
+       "203531420070_R05C02              0.500924               0.531334   \n",
+       "\n",
+       "                     rs9839873_II_F_C_37532  \n",
+       "204509080002_R01C02                0.510081  \n",
+       "202897220142_R04C02                0.505236  \n",
+       "204529320092_R01C02                0.491759  \n",
+       "202794570004_R02C01                0.492520  \n",
+       "203531420070_R05C02                0.503709  \n",
+       "\n",
+       "[5 rows x 37554 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "be02f82e-69b3-4534-88e2-5ee8851f9319",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f7ddbab-c020-4a07-a95c-450f6f591e2f",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cb628052-a992-4e41-8d56-eb932f1579eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0119s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0019s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 100 observations\n",
+      "|-----------> There are 37554 features\n",
+      "|-----------> Total missing values: 0\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0128s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> No missing values found. No imputation necessary\n",
+      "|-----> ✅ Impute missing values finished [0.0079s]\n",
+      "|-----> 🎉 Done! [0.0404s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.pp.df_to_adata(df, imputer_strategy='knn')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36b4dc99-1069-45da-a988-9493de817d99",
+   "metadata": {},
+   "source": [
+    "This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e26459d3-276d-4cda-b3bf-a4147c397667",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 100 × 37554\n",
+       "    var: 'percent_na'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc9c675d-6196-4274-b209-1487656acc9f",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30e82111-8c24-4a4d-8f28-22e0c4d35413",
+   "metadata": {},
+   "source": [
+    "### Mammalian predictors without species declaration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90eb1122-ce2e-49ec-919b-c47bd85643dc",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. Let's first start with the mammalian clocks that do not need the species Latin name for the conversion of the output into units of years."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4a3484b8-ea3c-49dc-a2b1-4fb831906e29",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0013s]\n",
+      "|-----> 🕒 Processing clock: mammalian1\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian1.pt\n",
+      "|-----------> ✅ Load clock finished [0.5443s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian1]\n",
+      "|-----------> ✅ Check features in adata finished [0.0318s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is anti_logp2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0015s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: mammalianlifespan\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianlifespan.pt\n",
+      "|-----------> ✅ Load clock finished [0.4468s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianlifespan]\n",
+      "|-----------> ✅ Check features in adata finished [0.0127s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0006s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: mammalianfemale\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianfemale.pt\n",
+      "|-----------> ✅ Load clock finished [0.4320s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianfemale]\n",
+      "|-----------> ✅ Check features in adata finished [0.0095s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is sigmoid\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0015s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0005s]\n",
+      "|-----> 🎉 Done! [1.6454s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, ['Mammalian1', 'MammalianLifespan', 'MammalianFemale'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "98fbdf4c-57c2-4885-bc4a-96b4771a638b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian1</th>\n",
+       "      <th>mammalianlifespan</th>\n",
+       "      <th>mammalianfemale</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>204509080002_R01C02</th>\n",
+       "      <td>26.372437</td>\n",
+       "      <td>93.886067</td>\n",
+       "      <td>0.994351</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202897220142_R04C02</th>\n",
+       "      <td>1.176586</td>\n",
+       "      <td>6.999176</td>\n",
+       "      <td>0.991473</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204529320092_R01C02</th>\n",
+       "      <td>18.776438</td>\n",
+       "      <td>73.335119</td>\n",
+       "      <td>0.008419</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202794570004_R02C01</th>\n",
+       "      <td>0.890973</td>\n",
+       "      <td>5.332615</td>\n",
+       "      <td>0.941965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203531420070_R05C02</th>\n",
+       "      <td>10.371315</td>\n",
+       "      <td>68.409331</td>\n",
+       "      <td>0.009133</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     mammalian1  mammalianlifespan  mammalianfemale\n",
+       "204509080002_R01C02   26.372437          93.886067         0.994351\n",
+       "202897220142_R04C02    1.176586           6.999176         0.991473\n",
+       "204529320092_R01C02   18.776438          73.335119         0.008419\n",
+       "202794570004_R02C01    0.890973           5.332615         0.941965\n",
+       "203531420070_R05C02   10.371315          68.409331         0.009133"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f6620b4-3a88-4cfd-9f4b-0cbc8047adf9",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "64035819-8dd0-4917-96ed-55c36ef34a66",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian1</th>\n",
+       "      <th>mammalianlifespan</th>\n",
+       "      <th>mammalianfemale</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>204509080002_R01C02</th>\n",
+       "      <td>26.372437</td>\n",
+       "      <td>93.886067</td>\n",
+       "      <td>0.994351</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202897220142_R04C02</th>\n",
+       "      <td>1.176586</td>\n",
+       "      <td>6.999176</td>\n",
+       "      <td>0.991473</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204529320092_R01C02</th>\n",
+       "      <td>18.776438</td>\n",
+       "      <td>73.335119</td>\n",
+       "      <td>0.008419</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202794570004_R02C01</th>\n",
+       "      <td>0.890973</td>\n",
+       "      <td>5.332615</td>\n",
+       "      <td>0.941965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203531420070_R05C02</th>\n",
+       "      <td>10.371315</td>\n",
+       "      <td>68.409331</td>\n",
+       "      <td>0.009133</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     mammalian1  mammalianlifespan  mammalianfemale\n",
+       "204509080002_R01C02   26.372437          93.886067         0.994351\n",
+       "202897220142_R04C02    1.176586           6.999176         0.991473\n",
+       "204529320092_R01C02   18.776438          73.335119         0.008419\n",
+       "202794570004_R02C01    0.890973           5.332615         0.941965\n",
+       "203531420070_R05C02   10.371315          68.409331         0.009133"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE223748', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE223748_subset.pkl')\n",
+    "adata = pya.preprocess.df_to_adata(df, imputer_strategy='knn', verbose=False)\n",
+    "pya.pred.predict_age(adata, ['Mammalian1', 'MammalianLifespan', 'MammalianFemale'], verbose=False)\n",
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5d3a683-2ff4-436b-9a90-34a138a98a14",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0d069189-1750-42f6-89d9-73039dd07a00",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 100 × 37554\n",
+       "    obs: 'mammalian1', 'mammalianlifespan', 'mammalianfemale'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'mammalian1_percent_na', 'mammalian1_missing_features', 'mammalian1_metadata', 'mammalianlifespan_percent_na', 'mammalianlifespan_missing_features', 'mammalianlifespan_metadata', 'mammalianfemale_percent_na', 'mammalianfemale_missing_features', 'mammalianfemale_metadata'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd975ebd-88aa-49f5-889e-43c69eb79afc",
+   "metadata": {},
+   "source": [
+    "### Mammalian predictors with species declaration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ec840ce-2286-45a3-989e-d7a36469e1cd",
+   "metadata": {},
+   "source": [
+    "Mammalian2 and mammalian3 types of clocks require species declaration for the reverse transformation of the output into units of years. For the mammalian2 clocks, there are 1756 species in the dictionary with the available variables for reverse transformation; for the mammalian3, there are 1707 species. By default, Homo sapiens is the chosen species.\n",
+    "\n",
+    "Let's first have a look at the species that can be used for these clocks by loading the models themselves."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "be99c22b-dc46-419b-b415-1c024b5e35d0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> ⚙️ Load clock started\n",
+      "|-----------> Data found in pyaging_data/mammalian2.pt\n",
+      "|-----> ✅ Load clock finished [0.4540s]\n",
+      "|-----> ⚙️ Load clock started\n",
+      "|-----------> Data found in pyaging_data/mammalian3.pt\n",
+      "|-----> ✅ Load clock finished [0.4946s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "logger = pya.logger.Logger('test_logger')\n",
+    "device = 'cpu'\n",
+    "dir = 'pyaging_data'\n",
+    "indent_level = 1\n",
+    "\n",
+    "mammalian2_model = pya.pred.load_clock('Mammalian2', device, dir, logger, indent_level=indent_level)\n",
+    "mammalian3_model = pya.pred.load_clock('Mammalian3', device, dir, logger, indent_level=indent_level)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f6a5ec7-a9b2-44fc-af5e-d14ae910b935",
+   "metadata": {},
+   "source": [
+    "We need to filter the features for the ones that are not CpG sites."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a8e37387-a32f-4e2f-98b0-22000792c565",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1756 species Latin name features in mammalian2.\n",
+      "There are 1707 species Latin name features in mammalian3.\n"
+     ]
+    }
+   ],
+   "source": [
+    "mammalian2_species = [feature for feature in mammalian2_model.features if feature[0:2] != 'cg']\n",
+    "mammalian3_species = [feature for feature in mammalian3_model.features if feature[0:2] != 'cg']\n",
+    "print(f\"There are {len(mammalian2_species)} species Latin name features in mammalian2.\")\n",
+    "print(f\"There are {len(mammalian3_species)} species Latin name features in mammalian3.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "7a86f159-ccf2-4dbc-93b2-ea76558a1f81",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Anaxyrus americanus',\n",
+       " 'Anaxyrus boreas',\n",
+       " 'Anaxyrus canorus',\n",
+       " 'Anaxyrus cognatus',\n",
+       " 'Anaxyrus retiformis',\n",
+       " 'Anaxyrus terrestris',\n",
+       " 'Rhinella marina',\n",
+       " 'Dendrobates auratus',\n",
+       " 'Dendrobates leucomelas',\n",
+       " 'Hyla chrysoscelis']"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mammalian2_species[0:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "8e125e7a-7f8f-4360-9d69-be6815404128",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Anaxyrus americanus',\n",
+       " 'Anaxyrus boreas',\n",
+       " 'Anaxyrus canorus',\n",
+       " 'Anaxyrus cognatus',\n",
+       " 'Rhinella marina',\n",
+       " 'Dendrobates auratus',\n",
+       " 'Dendrobates leucomelas',\n",
+       " 'Phyllobates vittatus',\n",
+       " 'Hyla chrysoscelis',\n",
+       " 'Hyla versicolor']"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mammalian3_species[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "498ca26d-ab18-48d8-96a5-4c3915433a05",
+   "metadata": {},
+   "source": [
+    "To chose a species, simply add the Latin name as a feature with value 1. In this subset version of the GSE223748 dataset, the species names are not available. Therefore, let's use the naked mole rat (Heterocephalus glaber) as our species. \n",
+    "\n",
+    "Let's first check that it is available in the clocks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "47908607-99b8-411e-a91c-e4829cb2781c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "'Heterocephalus glaber' in  mammalian2_species"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "6bf84b35-2091-4297-892a-d7addb7badc6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "'Heterocephalus glaber' in  mammalian3_species"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f1e4c93-397a-401e-948f-b42fbb62f222",
+   "metadata": {},
+   "source": [
+    "Then, let's add it as a feature to the pandas dataframe and create a new adata object."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "09259244-661c-475c-bc9d-2168788f6226",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0291s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0006s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 100 observations\n",
+      "|-----------> There are 37555 features\n",
+      "|-----------> Total missing values: 0\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0045s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> No missing values found. No imputation necessary\n",
+      "|-----> ✅ Impute missing values finished [0.0040s]\n",
+      "|-----> 🎉 Done! [0.0415s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "df['Heterocephalus glaber'] = 1\n",
+    "adata = pya.pp.df_to_adata(df, imputer_strategy='knn')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd588d20-a333-4775-9862-75c28cecff51",
+   "metadata": {},
+   "source": [
+    "Finally, let's make the predictions using the multi-tissue mammalian2 and mammalian3 clocks plus the blood-specific and skin-specific versions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "f3f2a5a2-e546-4cbb-97ad-df8714b138c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0013s]\n",
+      "|-----> 🕒 Processing clock: mammalian2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian2.pt\n",
+      "|-----------> ✅ Load clock finished [0.4827s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1755 out of 2572 features (68.23%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalian2\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian2]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0556s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0042s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: mammalianskin2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianskin2.pt\n",
+      "|-----------> ✅ Load clock finished [0.4283s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1755 out of 2240 features (78.35%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalianskin2\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianskin2]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0447s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0022s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: mammalianblood2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianblood2.pt\n",
+      "|-----------> ✅ Load clock finished [0.4847s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1755 out of 2257 features (77.76%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalianblood2\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianblood2]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0799s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0013s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0005s]\n",
+      "|-----> 🕒 Processing clock: mammalian3\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian3.pt\n",
+      "|-----------> ✅ Load clock finished [0.5006s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1706 out of 2467 features (69.15%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalian3\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian3]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.1086s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian3\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0059s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0005s]\n",
+      "|-----> 🕒 Processing clock: mammalianskin3\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianskin3.pt\n",
+      "|-----------> ✅ Load clock finished [0.5248s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1706 out of 2055 features (83.02%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalianskin3\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianskin3]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0743s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian3\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0024s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0007s]\n",
+      "|-----> 🕒 Processing clock: mammalianblood3\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianblood3.pt\n",
+      "|-----------> ✅ Load clock finished [0.4440s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1706 out of 2097 features (81.35%) are missing: ['Anaxyrus americanus', 'Anaxyrus boreas', 'Anaxyrus canorus'], etc.\n",
+      "|-----------------> Using reference feature values for mammalianblood3\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianblood3]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0856s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian3\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0024s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🎉 Done! [3.6574s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, ['Mammalian2', 'MammalianSkin2', 'MammalianBlood2', 'Mammalian3', 'MammalianSkin3', 'MammalianBlood3'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b582309-e03a-4906-a35d-7bd556f9f5e7",
+   "metadata": {},
+   "source": [
+    "During age prediction, if the other species are not present in the input data, they will show up as missing features and the value will be automatically replaced with 0. Therefore, those missing features are not necessarily CpG sites. To double check, one can simply go to the adata.uns to check for missing features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "48d9c49e-ca16-43c8-8919-2e37ba092a56",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Anaxyrus americanus',\n",
+       " 'Anaxyrus boreas',\n",
+       " 'Anaxyrus canorus',\n",
+       " 'Anaxyrus cognatus',\n",
+       " 'Anaxyrus retiformis',\n",
+       " 'Anaxyrus terrestris',\n",
+       " 'Rhinella marina',\n",
+       " 'Dendrobates auratus',\n",
+       " 'Dendrobates leucomelas',\n",
+       " 'Hyla chrysoscelis']"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['mammalian2_missing_features'][0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6175d3a-c2a6-449f-b1c7-60e334b3cf7f",
+   "metadata": {},
+   "source": [
+    "Finally, let's look at the predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "bde7b46e-98ea-479c-a08c-f91483e60371",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian2</th>\n",
+       "      <th>mammalianskin2</th>\n",
+       "      <th>mammalianblood2</th>\n",
+       "      <th>mammalian3</th>\n",
+       "      <th>mammalianskin3</th>\n",
+       "      <th>mammalianblood3</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>204509080002_R01C02</th>\n",
+       "      <td>14.946335</td>\n",
+       "      <td>6.562780</td>\n",
+       "      <td>15.321810</td>\n",
+       "      <td>9.211476</td>\n",
+       "      <td>4.328208</td>\n",
+       "      <td>13.111774</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202897220142_R04C02</th>\n",
+       "      <td>17.037182</td>\n",
+       "      <td>3.966518</td>\n",
+       "      <td>4.293048</td>\n",
+       "      <td>17.737573</td>\n",
+       "      <td>0.900470</td>\n",
+       "      <td>4.240264</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204529320092_R01C02</th>\n",
+       "      <td>12.065347</td>\n",
+       "      <td>13.392643</td>\n",
+       "      <td>14.483315</td>\n",
+       "      <td>5.950473</td>\n",
+       "      <td>7.048246</td>\n",
+       "      <td>5.498531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202794570004_R02C01</th>\n",
+       "      <td>15.263569</td>\n",
+       "      <td>19.451386</td>\n",
+       "      <td>7.148743</td>\n",
+       "      <td>15.870878</td>\n",
+       "      <td>14.483290</td>\n",
+       "      <td>11.685419</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203531420070_R05C02</th>\n",
+       "      <td>6.689490</td>\n",
+       "      <td>6.809801</td>\n",
+       "      <td>7.602141</td>\n",
+       "      <td>4.106029</td>\n",
+       "      <td>2.040104</td>\n",
+       "      <td>6.293626</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>205128010037_R03C02</th>\n",
+       "      <td>22.698138</td>\n",
+       "      <td>22.948062</td>\n",
+       "      <td>25.326321</td>\n",
+       "      <td>14.432799</td>\n",
+       "      <td>14.013267</td>\n",
+       "      <td>21.264321</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>206116820044_R06C02</th>\n",
+       "      <td>11.146012</td>\n",
+       "      <td>9.927552</td>\n",
+       "      <td>16.813175</td>\n",
+       "      <td>5.092238</td>\n",
+       "      <td>4.367878</td>\n",
+       "      <td>6.303160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203203210055_R03C02</th>\n",
+       "      <td>2.220405</td>\n",
+       "      <td>3.114650</td>\n",
+       "      <td>7.204062</td>\n",
+       "      <td>1.323726</td>\n",
+       "      <td>1.719778</td>\n",
+       "      <td>6.897330</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203203210003_R06C02</th>\n",
+       "      <td>21.672136</td>\n",
+       "      <td>28.800661</td>\n",
+       "      <td>21.536502</td>\n",
+       "      <td>16.231949</td>\n",
+       "      <td>20.177062</td>\n",
+       "      <td>24.678961</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204027420026_R03C02</th>\n",
+       "      <td>4.925257</td>\n",
+       "      <td>6.656713</td>\n",
+       "      <td>5.481918</td>\n",
+       "      <td>4.350099</td>\n",
+       "      <td>4.850269</td>\n",
+       "      <td>3.033646</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>100 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     mammalian2  mammalianskin2  mammalianblood2  mammalian3  \\\n",
+       "204509080002_R01C02   14.946335        6.562780        15.321810    9.211476   \n",
+       "202897220142_R04C02   17.037182        3.966518         4.293048   17.737573   \n",
+       "204529320092_R01C02   12.065347       13.392643        14.483315    5.950473   \n",
+       "202794570004_R02C01   15.263569       19.451386         7.148743   15.870878   \n",
+       "203531420070_R05C02    6.689490        6.809801         7.602141    4.106029   \n",
+       "...                         ...             ...              ...         ...   \n",
+       "205128010037_R03C02   22.698138       22.948062        25.326321   14.432799   \n",
+       "206116820044_R06C02   11.146012        9.927552        16.813175    5.092238   \n",
+       "203203210055_R03C02    2.220405        3.114650         7.204062    1.323726   \n",
+       "203203210003_R06C02   21.672136       28.800661        21.536502   16.231949   \n",
+       "204027420026_R03C02    4.925257        6.656713         5.481918    4.350099   \n",
+       "\n",
+       "                     mammalianskin3  mammalianblood3  \n",
+       "204509080002_R01C02        4.328208        13.111774  \n",
+       "202897220142_R04C02        0.900470         4.240264  \n",
+       "204529320092_R01C02        7.048246         5.498531  \n",
+       "202794570004_R02C01       14.483290        11.685419  \n",
+       "203531420070_R05C02        2.040104         6.293626  \n",
+       "...                             ...              ...  \n",
+       "205128010037_R03C02       14.013267        21.264321  \n",
+       "206116820044_R06C02        4.367878         6.303160  \n",
+       "203203210055_R03C02        1.719778         6.897330  \n",
+       "203203210003_R06C02       20.177062        24.678961  \n",
+       "204027420026_R03C02        4.850269         3.033646  \n",
+       "\n",
+       "[100 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "333ff256-996c-4f41-8260-fffb5b248513",
+   "metadata": {},
+   "source": [
+    "For curiosity let's check the correlation between the clocks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "165d709e-73ce-4ccb-a0b2-25e485f16d92",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian2</th>\n",
+       "      <th>mammalianskin2</th>\n",
+       "      <th>mammalianblood2</th>\n",
+       "      <th>mammalian3</th>\n",
+       "      <th>mammalianskin3</th>\n",
+       "      <th>mammalianblood3</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>mammalian2</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.658934</td>\n",
+       "      <td>0.609392</td>\n",
+       "      <td>0.910119</td>\n",
+       "      <td>0.611179</td>\n",
+       "      <td>0.717135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mammalianskin2</th>\n",
+       "      <td>0.658934</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.418290</td>\n",
+       "      <td>0.607472</td>\n",
+       "      <td>0.900305</td>\n",
+       "      <td>0.594863</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mammalianblood2</th>\n",
+       "      <td>0.609392</td>\n",
+       "      <td>0.418290</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.463504</td>\n",
+       "      <td>0.388197</td>\n",
+       "      <td>0.754871</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mammalian3</th>\n",
+       "      <td>0.910119</td>\n",
+       "      <td>0.607472</td>\n",
+       "      <td>0.463504</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.675022</td>\n",
+       "      <td>0.729099</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mammalianskin3</th>\n",
+       "      <td>0.611179</td>\n",
+       "      <td>0.900305</td>\n",
+       "      <td>0.388197</td>\n",
+       "      <td>0.675022</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.646473</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mammalianblood3</th>\n",
+       "      <td>0.717135</td>\n",
+       "      <td>0.594863</td>\n",
+       "      <td>0.754871</td>\n",
+       "      <td>0.729099</td>\n",
+       "      <td>0.646473</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 mammalian2  mammalianskin2  mammalianblood2  mammalian3  \\\n",
+       "mammalian2         1.000000        0.658934         0.609392    0.910119   \n",
+       "mammalianskin2     0.658934        1.000000         0.418290    0.607472   \n",
+       "mammalianblood2    0.609392        0.418290         1.000000    0.463504   \n",
+       "mammalian3         0.910119        0.607472         0.463504    1.000000   \n",
+       "mammalianskin3     0.611179        0.900305         0.388197    0.675022   \n",
+       "mammalianblood3    0.717135        0.594863         0.754871    0.729099   \n",
+       "\n",
+       "                 mammalianskin3  mammalianblood3  \n",
+       "mammalian2             0.611179         0.717135  \n",
+       "mammalianskin2         0.900305         0.594863  \n",
+       "mammalianblood2        0.388197         0.754871  \n",
+       "mammalian3             0.675022         0.729099  \n",
+       "mammalianskin3         1.000000         0.646473  \n",
+       "mammalianblood3        0.646473         1.000000  "
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.corr('pearson')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7931df39-6108-408a-9214-829a8da9da9a",
+   "metadata": {},
+   "source": [
+    "Again, having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "6f00d62f-2f2c-4e90-bd2b-bd409b0131de",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian2</th>\n",
+       "      <th>mammalianskin2</th>\n",
+       "      <th>mammalianblood2</th>\n",
+       "      <th>mammalian3</th>\n",
+       "      <th>mammalianskin3</th>\n",
+       "      <th>mammalianblood3</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>204509080002_R01C02</th>\n",
+       "      <td>14.946335</td>\n",
+       "      <td>6.562780</td>\n",
+       "      <td>15.321810</td>\n",
+       "      <td>9.211476</td>\n",
+       "      <td>4.328208</td>\n",
+       "      <td>13.111774</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202897220142_R04C02</th>\n",
+       "      <td>17.037182</td>\n",
+       "      <td>3.966518</td>\n",
+       "      <td>4.293048</td>\n",
+       "      <td>17.737573</td>\n",
+       "      <td>0.900470</td>\n",
+       "      <td>4.240264</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>204529320092_R01C02</th>\n",
+       "      <td>12.065347</td>\n",
+       "      <td>13.392643</td>\n",
+       "      <td>14.483315</td>\n",
+       "      <td>5.950473</td>\n",
+       "      <td>7.048246</td>\n",
+       "      <td>5.498531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202794570004_R02C01</th>\n",
+       "      <td>15.263569</td>\n",
+       "      <td>19.451386</td>\n",
+       "      <td>7.148743</td>\n",
+       "      <td>15.870878</td>\n",
+       "      <td>14.483290</td>\n",
+       "      <td>11.685419</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>203531420070_R05C02</th>\n",
+       "      <td>6.689490</td>\n",
+       "      <td>6.809801</td>\n",
+       "      <td>7.602141</td>\n",
+       "      <td>4.106029</td>\n",
+       "      <td>2.040104</td>\n",
+       "      <td>6.293626</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     mammalian2  mammalianskin2  mammalianblood2  mammalian3  \\\n",
+       "204509080002_R01C02   14.946335        6.562780        15.321810    9.211476   \n",
+       "202897220142_R04C02   17.037182        3.966518         4.293048   17.737573   \n",
+       "204529320092_R01C02   12.065347       13.392643        14.483315    5.950473   \n",
+       "202794570004_R02C01   15.263569       19.451386         7.148743   15.870878   \n",
+       "203531420070_R05C02    6.689490        6.809801         7.602141    4.106029   \n",
+       "\n",
+       "                     mammalianskin3  mammalianblood3  \n",
+       "204509080002_R01C02        4.328208        13.111774  \n",
+       "202897220142_R04C02        0.900470         4.240264  \n",
+       "204529320092_R01C02        7.048246         5.498531  \n",
+       "202794570004_R02C01       14.483290        11.685419  \n",
+       "203531420070_R05C02        2.040104         6.293626  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE223748', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE223748_subset.pkl')\n",
+    "df['Heterocephalus glaber'] = 1\n",
+    "adata = pya.preprocess.df_to_adata(df, imputer_strategy='knn', verbose=False)\n",
+    "pya.pred.predict_age(adata, ['Mammalian2', 'MammalianSkin2', 'MammalianBlood2', 'Mammalian3', 'MammalianSkin3', 'MammalianBlood3'], verbose=False)\n",
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "61ad69dd-9e91-447a-80f9-95647a19a082",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f2fd36d5-c059-4680-b300-f8e9344186cd",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "300b40ad-68e7-49b3-a7f8-66c64436c80f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'mammalian2',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'multi',\n",
+       " 'year': 2023,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Lu, A. T., et al. \"Universal DNA methylation age across mammalian tissues.\" Nature aging 3.9 (2023): 1144-1166.',\n",
+       " 'doi': 'https://doi.org/10.1038/s43587-023-00462-6',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['mammalian2_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_dnam_rrbs.ipynb b/docs/source/tutorials/tutorial_dnam_rrbs.ipynb
new file mode 100644
index 0000000..664c0ea
--- /dev/null
+++ b/docs/source/tutorials/tutorial_dnam_rrbs.ipynb
@@ -0,0 +1,1656 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4e690b3c-4dec-450e-a7f8-f63987e60cdb",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_dnam.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62e1ac68-927d-4ca8-a2ab-bd99a7ee52ab",
+   "metadata": {},
+   "source": [
+    "# RRBS DNA methylation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9552602a-777c-42a5-900a-41c85096c3d8",
+   "metadata": {},
+   "source": [
+    "This tutorial focuses on predicting age from Mus musculus reduced-representation bisulfite sequencing (RRBS) data. There are a few clocks available that were trained on RRBS data. Moreover, it is possible to use Horvath's mammalian clocks by converting the genomic location to the probes in the Horvath methylation array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dc347ae0-41ae-46ac-ba50-08cacd4c9241",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya\n",
+    "import os\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e620ab9-4837-4a7a-83f1-726be9c9f7bf",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ff79235-46fb-4c59-a629-1f479f9f13a3",
+   "metadata": {},
+   "source": [
+    "Let's download the publicly available dataset GSE130735 with RRBS samples from mouse. Given it is RRBS, there are millions of CpG sites."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "332d96c6-6b12-4cd1-b216-c32ce21673b9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/GSE130735_subset.pkl\n",
+      "|-----> 🎉 Done! [0.5425s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE130735')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2a6fcd90-8ded-40d5-a606-e32e21816ebf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "df8ea1a3-313f-42bc-aeef-ec5349975b80",
+   "metadata": {},
+   "source": [
+    "It is important to note that the features for RRBS clocks are the genomic coordinates in the format below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c4d8245b-8d04-4ae0-945d-3aed4956a3bb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>chr1:3020814</th>\n",
+       "      <th>chr1:3020842</th>\n",
+       "      <th>chr1:3020877</th>\n",
+       "      <th>chr1:3020891</th>\n",
+       "      <th>chr1:3020945</th>\n",
+       "      <th>chr1:3020971</th>\n",
+       "      <th>chr1:3020987</th>\n",
+       "      <th>chr1:3021012</th>\n",
+       "      <th>chr1:3037802</th>\n",
+       "      <th>chr1:3037820</th>\n",
+       "      <th>...</th>\n",
+       "      <th>chrY:1825397</th>\n",
+       "      <th>chrY:4682362</th>\n",
+       "      <th>chrY:32122892</th>\n",
+       "      <th>chrY:85867071</th>\n",
+       "      <th>chrY:85867083</th>\n",
+       "      <th>chrY:85867117</th>\n",
+       "      <th>chrY:85867137</th>\n",
+       "      <th>chrY:85867139</th>\n",
+       "      <th>chrY:85867178</th>\n",
+       "      <th>chrY:88224179</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>0.609</td>\n",
+       "      <td>0.25</td>\n",
+       "      <td>0.408</td>\n",
+       "      <td>0.189</td>\n",
+       "      <td>0.068</td>\n",
+       "      <td>0.373</td>\n",
+       "      <td>0.571</td>\n",
+       "      <td>0.252</td>\n",
+       "      <td>0.333</td>\n",
+       "      <td>0.158</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.973</td>\n",
+       "      <td>0.984</td>\n",
+       "      <td>0.912</td>\n",
+       "      <td>0.915</td>\n",
+       "      <td>0.987</td>\n",
+       "      <td>0.974</td>\n",
+       "      <td>0.991</td>\n",
+       "      <td>0.932</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.526</td>\n",
+       "      <td>0.131</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.038</td>\n",
+       "      <td>0.469</td>\n",
+       "      <td>0.769</td>\n",
+       "      <td>0.772</td>\n",
+       "      <td>0.146</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>0.931</td>\n",
+       "      <td>0.92</td>\n",
+       "      <td>0.988</td>\n",
+       "      <td>0.949</td>\n",
+       "      <td>0.897</td>\n",
+       "      <td>0.921</td>\n",
+       "      <td>0.907</td>\n",
+       "      <td>0.958</td>\n",
+       "      <td>1.000</td>\n",
+       "      <td>0.867</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.205</td>\n",
+       "      <td>0.382</td>\n",
+       "      <td>0.091</td>\n",
+       "      <td>0.132</td>\n",
+       "      <td>0.174</td>\n",
+       "      <td>0.227</td>\n",
+       "      <td>0.108</td>\n",
+       "      <td>0.053</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 1778324 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            chr1:3020814  chr1:3020842  chr1:3020877  chr1:3020891  \\\n",
+       "GSM3752631         0.609          0.25         0.408         0.189   \n",
+       "GSM3752625           NaN           NaN         0.973         0.984   \n",
+       "GSM3752634           NaN           NaN         0.526         0.131   \n",
+       "GSM3752620         0.931          0.92         0.988         0.949   \n",
+       "GSM3752622           NaN           NaN         0.205         0.382   \n",
+       "\n",
+       "            chr1:3020945  chr1:3020971  chr1:3020987  chr1:3021012  \\\n",
+       "GSM3752631         0.068         0.373         0.571         0.252   \n",
+       "GSM3752625         0.912         0.915         0.987         0.974   \n",
+       "GSM3752634         0.000         0.038         0.469         0.769   \n",
+       "GSM3752620         0.897         0.921         0.907         0.958   \n",
+       "GSM3752622         0.091         0.132         0.174         0.227   \n",
+       "\n",
+       "            chr1:3037802  chr1:3037820  ...  chrY:1825397  chrY:4682362  \\\n",
+       "GSM3752631         0.333         0.158  ...           NaN           NaN   \n",
+       "GSM3752625         0.991         0.932  ...           NaN           NaN   \n",
+       "GSM3752634         0.772         0.146  ...           NaN           NaN   \n",
+       "GSM3752620         1.000         0.867  ...           NaN           NaN   \n",
+       "GSM3752622         0.108         0.053  ...           NaN           NaN   \n",
+       "\n",
+       "            chrY:32122892  chrY:85867071  chrY:85867083  chrY:85867117  \\\n",
+       "GSM3752631            NaN            NaN            NaN            NaN   \n",
+       "GSM3752625            NaN            NaN            NaN            NaN   \n",
+       "GSM3752634            NaN            NaN            NaN            NaN   \n",
+       "GSM3752620            NaN            NaN            NaN            NaN   \n",
+       "GSM3752622            NaN            NaN            NaN            NaN   \n",
+       "\n",
+       "            chrY:85867137  chrY:85867139  chrY:85867178  chrY:88224179  \n",
+       "GSM3752631            NaN            NaN            NaN            NaN  \n",
+       "GSM3752625            NaN            NaN            NaN            NaN  \n",
+       "GSM3752634            NaN            NaN            NaN            NaN  \n",
+       "GSM3752620            NaN            NaN            NaN            NaN  \n",
+       "GSM3752622            NaN            NaN            NaN            NaN  \n",
+       "\n",
+       "[5 rows x 1778324 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ea7c44d7-73c3-4cd7-844d-bab34aa2dcee",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04f2758f-fb8c-4a52-983a-29ec826dba6c",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "682f7e04-22a6-4561-b389-c8f336f19862",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.9882s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0006s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 14 observations\n",
+      "|-----------> There are 1778324 features\n",
+      "|-----------> Total missing values: 6322346\n",
+      "|-----------> Percentage of missing values: 25.39%\n",
+      "|-----> ✅ Log data statistics finished [0.0205s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> Imputing missing values using mean strategy\n",
+      "|-----> ✅ Impute missing values finished [0.4631s]\n",
+      "|-----> ⚙️ Add imputer strategy to adata.uns started\n",
+      "|-----> ✅ Add imputer strategy to adata.uns finished [0.0087s]\n",
+      "|-----> 🎉 Done! [1.4897s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.pp.df_to_adata(df, imputer_strategy='mean') # knn might be a bit slow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7349164c-f28b-4222-bf41-6f80d8b79c3b",
+   "metadata": {},
+   "source": [
+    "This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b5ff1ef-e724-407a-b6d4-9907558f21ba",
+   "metadata": {},
+   "source": [
+    "## Predict age with RRBS clocks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eb197ded-91dd-4319-8dbb-a635d09c8367",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. For convenience, let's simply input all four available mammalian clocks at once. The function is invariant to the capitalization of the clock name."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cdd18ee5-9af1-404e-80f6-42a83685273e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0033s]\n",
+      "|-----> 🕒 Processing clock: thompson\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/thompson.pt\n",
+      "|-----------> ✅ Load clock finished [0.5324s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 1 out of 582 features (0.17%) are missing: ['chr4:91376687'], etc.\n",
+      "|-----------------> Filling missing features entirely with 0\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_thompson]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0654s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0013s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0008s]\n",
+      "|-----> 🕒 Processing clock: meer\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/meer.pt\n",
+      "|-----------> ✅ Load clock finished [0.4402s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 225 out of 435 features (51.72%) are missing: ['chr10:111559529', 'chr10:115250413', 'chr10:127620127'], etc.\n",
+      "|-----------------> Filling missing features entirely with 0\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_meer]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0412s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0010s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: petkovich\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/petkovich.pt\n",
+      "|-----------> ✅ Load clock finished [0.5167s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 58 out of 90 features (64.44%) are missing: ['chr19:23893237', 'chr18:45589182', 'chr16:10502162'], etc.\n",
+      "|-----------------> Filling missing features entirely with 0\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_petkovich]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0161s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is petkovich\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0033s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0020s]\n",
+      "|-----> 🕒 Processing clock: stubbs\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/stubbs.pt\n",
+      "|-----------> ✅ Load clock finished [0.4679s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 8889 out of 17992 features (49.41%) are missing: ['chr1:10038066', 'chr1:106173313', 'chr1:106759301'], etc.\n",
+      "|-----------------> Using reference feature values for stubbs\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_stubbs]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.8672s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is quantile_normalization_and_scale_with_gold_standard\n",
+      "|-----------------> The postprocessing method is stubbs\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0263s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0014s]\n",
+      "|-----> 🎉 Done! [3.2757s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, ['Thompson', 'Meer', 'Petkovich', 'Stubbs'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3978afec-40a0-4e1f-8ff6-1d048da8a894",
+   "metadata": {},
+   "source": [
+    "All of the age predictions are in unit of months."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b04572f9-23dd-4eb1-8e84-16a9b25c2d6a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>thompson</th>\n",
+       "      <th>meer</th>\n",
+       "      <th>petkovich</th>\n",
+       "      <th>stubbs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>19.634113</td>\n",
+       "      <td>7.315183</td>\n",
+       "      <td>8.075177</td>\n",
+       "      <td>0.957770</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>-1.410461</td>\n",
+       "      <td>0.028221</td>\n",
+       "      <td>2.953822</td>\n",
+       "      <td>-0.074265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>61.058783</td>\n",
+       "      <td>21.322178</td>\n",
+       "      <td>9.640489</td>\n",
+       "      <td>1.389193</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>-2.663815</td>\n",
+       "      <td>1.611947</td>\n",
+       "      <td>3.019351</td>\n",
+       "      <td>-0.092710</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>20.594114</td>\n",
+       "      <td>7.592145</td>\n",
+       "      <td>7.104766</td>\n",
+       "      <td>0.667168</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             thompson       meer  petkovich    stubbs\n",
+       "GSM3752631  19.634113   7.315183   8.075177  0.957770\n",
+       "GSM3752625  -1.410461   0.028221   2.953822 -0.074265\n",
+       "GSM3752634  61.058783  21.322178   9.640489  1.389193\n",
+       "GSM3752620  -2.663815   1.611947   3.019351 -0.092710\n",
+       "GSM3752622  20.594114   7.592145   7.104766  0.667168"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4bb259c5-2cba-4dc1-b123-2387a5bb7749",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>thompson</th>\n",
+       "      <th>meer</th>\n",
+       "      <th>petkovich</th>\n",
+       "      <th>stubbs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>19.634113</td>\n",
+       "      <td>7.315183</td>\n",
+       "      <td>8.075177</td>\n",
+       "      <td>0.957770</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>-1.410461</td>\n",
+       "      <td>0.028221</td>\n",
+       "      <td>2.953822</td>\n",
+       "      <td>-0.074265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>61.058783</td>\n",
+       "      <td>21.322178</td>\n",
+       "      <td>9.640489</td>\n",
+       "      <td>1.389193</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>-2.663815</td>\n",
+       "      <td>1.611947</td>\n",
+       "      <td>3.019351</td>\n",
+       "      <td>-0.092710</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>20.594114</td>\n",
+       "      <td>7.592145</td>\n",
+       "      <td>7.104766</td>\n",
+       "      <td>0.667168</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             thompson       meer  petkovich    stubbs\n",
+       "GSM3752631  19.634113   7.315183   8.075177  0.957770\n",
+       "GSM3752625  -1.410461   0.028221   2.953822 -0.074265\n",
+       "GSM3752634  61.058783  21.322178   9.640489  1.389193\n",
+       "GSM3752620  -2.663815   1.611947   3.019351 -0.092710\n",
+       "GSM3752622  20.594114   7.592145   7.104766  0.667168"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8519affc-ffc8-4904-ad7a-bd6a6d6458cf",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "18b44cfa-36d5-49c9-badf-7ba9e189bbc0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.data.download_example_data('GSE130735', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')\n",
+    "adata = pya.preprocess.df_to_adata(df, imputer_strategy='mean', verbose=False)\n",
+    "pya.pred.predict_age(adata, ['Thompson', 'Meer', 'Petkovich', 'Stubbs'], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "a2520978-b693-474f-88cf-91bcde1a5d95",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>thompson</th>\n",
+       "      <th>meer</th>\n",
+       "      <th>petkovich</th>\n",
+       "      <th>stubbs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>19.634113</td>\n",
+       "      <td>7.315183</td>\n",
+       "      <td>8.075177</td>\n",
+       "      <td>0.957770</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>-1.410461</td>\n",
+       "      <td>0.028221</td>\n",
+       "      <td>2.953822</td>\n",
+       "      <td>-0.074265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>61.058783</td>\n",
+       "      <td>21.322178</td>\n",
+       "      <td>9.640489</td>\n",
+       "      <td>1.389193</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>-2.663815</td>\n",
+       "      <td>1.611947</td>\n",
+       "      <td>3.019351</td>\n",
+       "      <td>-0.092710</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>20.594114</td>\n",
+       "      <td>7.592145</td>\n",
+       "      <td>7.104766</td>\n",
+       "      <td>0.667168</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             thompson       meer  petkovich    stubbs\n",
+       "GSM3752631  19.634113   7.315183   8.075177  0.957770\n",
+       "GSM3752625  -1.410461   0.028221   2.953822 -0.074265\n",
+       "GSM3752634  61.058783  21.322178   9.640489  1.389193\n",
+       "GSM3752620  -2.663815   1.611947   3.019351 -0.092710\n",
+       "GSM3752622  20.594114   7.592145   7.104766  0.667168"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "33119798-f1b3-4c4c-9f18-e4e4b7ca21e8",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "92cfc16e-71ff-4767-9c75-04e52455eb6c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 14 × 1778324\n",
+       "    obs: 'thompson', 'meer', 'petkovich', 'stubbs'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'imputer_strategy', 'thompson_percent_na', 'thompson_missing_features', 'thompson_metadata', 'meer_percent_na', 'meer_missing_features', 'meer_metadata', 'petkovich_percent_na', 'petkovich_missing_features', 'petkovich_metadata', 'stubbs_percent_na', 'stubbs_missing_features', 'stubbs_metadata'\n",
+       "    layers: 'X_original', 'X_imputed'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "569b1ef8-dd55-45e4-8792-e919dc207808",
+   "metadata": {},
+   "source": [
+    "## Predict age with mammalian clocks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9266699-881f-41c7-9a13-c38307527bfa",
+   "metadata": {},
+   "source": [
+    "We can predict age by converting the genomic locations directly into the probes from Horvath's methylation array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a5f71a02-cbc0-4cd0-a123-e7226307ae84",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.system('git clone https://github.com/shorvath/MammalianMethylationConsortium.git')\n",
+    "\n",
+    "# Let's read the manifest from the mammalian consortium\n",
+    "annotation_df = pd.read_csv('MammalianMethylationConsortium/Annotations, Amin Haghani/Mammals/Mus_musculus.grcm38.100.HorvathMammalMethylChip40.v1.csv', index_col=0)\n",
+    "annotation_df = annotation_df[~annotation_df.seqnames.isna()]\n",
+    "mm_genomic_locations = 'chr' + annotation_df['seqnames'].astype(str) + ':' + annotation_df['CGstart'].astype(int).astype(str)\n",
+    "mm_genomic_locations = mm_genomic_locations.tolist()\n",
+    "mammalian_probes = annotation_df['CGid'].tolist()\n",
+    "mm_loc_to_probe = dict(zip(mm_genomic_locations, mammalian_probes))\n",
+    "\n",
+    "# Let's get the previous RRBS dataset and filter only for the genomic locations in the manifest file\n",
+    "df_columns_set = set(df.columns)\n",
+    "mm_loc_to_probe_set = set(mm_loc_to_probe.keys())\n",
+    "common_columns = df_columns_set.intersection(mm_loc_to_probe_set)\n",
+    "df_converted = df[list(common_columns)].copy()\n",
+    "\n",
+    "# Then, convert the genomic location to the probe name\n",
+    "df_converted.columns = [mm_loc_to_probe[col] for col in df_converted.columns]\n",
+    "\n",
+    "# Let's clean the GitHub\n",
+    "os.system('rm -r MammalianMethylationConsortium')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "6989844c-736d-4637-acfc-8f8f7ae58108",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cg05347424</th>\n",
+       "      <th>cg26718996</th>\n",
+       "      <th>cg07727941</th>\n",
+       "      <th>cg16852837</th>\n",
+       "      <th>cg12870762</th>\n",
+       "      <th>cg26080798</th>\n",
+       "      <th>cg02899039</th>\n",
+       "      <th>cg12839061</th>\n",
+       "      <th>cg05267150</th>\n",
+       "      <th>cg13170453</th>\n",
+       "      <th>...</th>\n",
+       "      <th>cg02179016</th>\n",
+       "      <th>cg20836420</th>\n",
+       "      <th>cg18831685</th>\n",
+       "      <th>cg08992395</th>\n",
+       "      <th>cg13679010</th>\n",
+       "      <th>cg12982463</th>\n",
+       "      <th>cg17146242</th>\n",
+       "      <th>cg13649253</th>\n",
+       "      <th>cg07588415</th>\n",
+       "      <th>cg14814195</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.015</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.005</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.023</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.028</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.018</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.021</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>0.938</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.596</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.895</td>\n",
+       "      <td>0.227</td>\n",
+       "      <td>0.156</td>\n",
+       "      <td>0.025</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>0.125</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.627</td>\n",
+       "      <td>0.017</td>\n",
+       "      <td>0.033</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.745</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.495</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.014</td>\n",
+       "      <td>0.278</td>\n",
+       "      <td>0.519</td>\n",
+       "      <td>0.786</td>\n",
+       "      <td>0.012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>0.769</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.091</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.070</td>\n",
+       "      <td>0.006</td>\n",
+       "      <td>0.012</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.607</td>\n",
+       "      <td>0.092</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.010</td>\n",
+       "      <td>0.054</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.933</td>\n",
+       "      <td>0.277</td>\n",
+       "      <td>0.148</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.052</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.064</td>\n",
+       "      <td>0.022</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 5149 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            cg05347424  cg26718996  cg07727941  cg16852837  cg12870762  \\\n",
+       "GSM3752631         NaN         NaN       0.000         0.0       0.015   \n",
+       "GSM3752625       0.938         NaN       0.000         NaN       0.000   \n",
+       "GSM3752634       0.125         NaN       0.000         NaN       0.627   \n",
+       "GSM3752620       0.769         NaN       0.091         0.0       0.070   \n",
+       "GSM3752622         NaN         NaN       0.000         NaN       0.000   \n",
+       "\n",
+       "            cg26080798  cg02899039  cg12839061  cg05267150  cg13170453  ...  \\\n",
+       "GSM3752631       0.000       0.005         NaN       0.023       0.000  ...   \n",
+       "GSM3752625       0.000       0.000         NaN       0.596         NaN  ...   \n",
+       "GSM3752634       0.017       0.033         NaN       0.745         NaN  ...   \n",
+       "GSM3752620       0.006       0.012         NaN       0.607       0.092  ...   \n",
+       "GSM3752622       0.000       0.000         NaN       0.052         NaN  ...   \n",
+       "\n",
+       "            cg02179016  cg20836420  cg18831685  cg08992395  cg13679010  \\\n",
+       "GSM3752631         NaN       0.000       0.028         NaN       0.000   \n",
+       "GSM3752625         NaN       0.000         NaN         NaN       0.000   \n",
+       "GSM3752634         NaN       0.495         NaN         NaN       0.014   \n",
+       "GSM3752620         NaN       0.010       0.054         NaN       0.000   \n",
+       "GSM3752622         NaN       0.000         NaN         NaN       0.000   \n",
+       "\n",
+       "            cg12982463  cg17146242  cg13649253  cg07588415  cg14814195  \n",
+       "GSM3752631       0.000       0.018       0.000       0.021         NaN  \n",
+       "GSM3752625       0.895       0.227       0.156       0.025         NaN  \n",
+       "GSM3752634       0.278       0.519       0.786       0.012         NaN  \n",
+       "GSM3752620       0.933       0.277       0.148       0.000         NaN  \n",
+       "GSM3752622       0.000       0.064       0.022       0.000         NaN  \n",
+       "\n",
+       "[5 rows x 5149 columns]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_converted.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e46ca085-026e-4e02-b316-97b880125507",
+   "metadata": {},
+   "source": [
+    "Now we can finally put the dataframe into pyaging after defining the species as Mus musculus."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "10bbdf6e-a63a-4c18-bc97-e0872fb9895f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0057s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0006s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 14 observations\n",
+      "|-----------> There are 5150 features\n",
+      "|-----------> Total missing values: 17862\n",
+      "|-----------> Percentage of missing values: 24.77%\n",
+      "|-----> ✅ Log data statistics finished [0.0013s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> Imputing missing values using mean strategy\n",
+      "|-----> ✅ Impute missing values finished [0.0060s]\n",
+      "|-----> ⚙️ Add imputer strategy to adata.uns started\n",
+      "|-----> ✅ Add imputer strategy to adata.uns finished [0.0004s]\n",
+      "|-----> 🎉 Done! [0.0174s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_converted['Mus musculus'] = 1\n",
+    "adata_mammalian = pya.pp.df_to_adata(df_converted, imputer_strategy='mean')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7edae9ce-a0bc-4b78-8575-23745714b42b",
+   "metadata": {},
+   "source": [
+    "Let's use these five mammalian predictors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "56dc5c27-e793-4343-85e2-9ce30a365d64",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0013s]\n",
+      "|-----> 🕒 Processing clock: mammalian1\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian1.pt\n",
+      "|-----------> ✅ Load clock finished [0.4780s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 274 out of 335 features (81.79%) are missing: ['cg00249943', 'cg00250826', 'cg00292639'], etc.\n",
+      "|-----------------> Filling missing features entirely with 0\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian1]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0173s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is anti_logp2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0083s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0017s]\n",
+      "|-----> 🕒 Processing clock: mammalian2\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian2.pt\n",
+      "|-----------> ✅ Load clock finished [0.4544s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 2406 out of 2572 features (93.55%) are missing: ['cg00020468', 'cg00096922', 'cg00098422'], etc.\n",
+      "|-----------------> Using reference feature values for mammalian2\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian2]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0407s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian2\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0196s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: mammalian3\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalian3.pt\n",
+      "|-----------> ✅ Load clock finished [0.5081s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 2299 out of 2467 features (93.19%) are missing: ['cg00101675', 'cg06259996', 'cg15168457'], etc.\n",
+      "|-----------------> Using reference feature values for mammalian3\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalian3]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0222s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is mammalian3\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0095s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: mammalianlifespan\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianlifespan.pt\n",
+      "|-----------> ✅ Load clock finished [0.4420s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 133 out of 152 features (87.50%) are missing: ['cg00039845', 'cg00300233', 'cg00810217'], etc.\n",
+      "|-----------------> Using reference feature values for mammalianlifespan\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianlifespan]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0043s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0018s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0009s]\n",
+      "|-----> 🕒 Processing clock: mammalianfemale\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/mammalianfemale.pt\n",
+      "|-----------> ✅ Load clock finished [0.4532s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------? 73 out of 101 features (72.28%) are missing: ['cg01145947', 'cg02053792', 'cg02407848'], etc.\n",
+      "|-----------------> Filling missing features entirely with 0\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_mammalianfemale]\n",
+      "|-----------> ⚠️ Check features in adata finished [0.0135s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> There is no preprocessing necessary\n",
+      "|-----------------> The postprocessing method is sigmoid\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0051s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0024s]\n",
+      "|-----> 🎉 Done! [3.0755s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata_mammalian, ['Mammalian1', 'Mammalian2', 'Mammalian3', \"MammalianLifespan\", \"MammalianFemale\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de8ed8d2-213a-4ca9-a0d0-e141ba7d1789",
+   "metadata": {},
+   "source": [
+    "Note that RRBS clocks are in units of months whereas the mammalian clocks are in units of years."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "32c3c7fe-acbb-4fe8-b438-5c664a533c41",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mammalian1</th>\n",
+       "      <th>mammalian2</th>\n",
+       "      <th>mammalian3</th>\n",
+       "      <th>mammalianlifespan</th>\n",
+       "      <th>mammalianfemale</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>GSM3752631</th>\n",
+       "      <td>2.537895</td>\n",
+       "      <td>-0.009800</td>\n",
+       "      <td>-0.048414</td>\n",
+       "      <td>1.202134</td>\n",
+       "      <td>0.732238</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752625</th>\n",
+       "      <td>3.353935</td>\n",
+       "      <td>0.064448</td>\n",
+       "      <td>-0.048203</td>\n",
+       "      <td>1.480080</td>\n",
+       "      <td>0.952105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752634</th>\n",
+       "      <td>4.490610</td>\n",
+       "      <td>0.813899</td>\n",
+       "      <td>-0.035059</td>\n",
+       "      <td>1.400278</td>\n",
+       "      <td>0.978554</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752620</th>\n",
+       "      <td>3.603802</td>\n",
+       "      <td>0.122934</td>\n",
+       "      <td>-0.046958</td>\n",
+       "      <td>1.644701</td>\n",
+       "      <td>0.948646</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752622</th>\n",
+       "      <td>2.951263</td>\n",
+       "      <td>0.005617</td>\n",
+       "      <td>-0.047725</td>\n",
+       "      <td>1.385282</td>\n",
+       "      <td>0.741336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752637</th>\n",
+       "      <td>5.718515</td>\n",
+       "      <td>0.895781</td>\n",
+       "      <td>-0.037649</td>\n",
+       "      <td>1.399769</td>\n",
+       "      <td>0.975114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4558216</th>\n",
+       "      <td>7.456245</td>\n",
+       "      <td>0.684752</td>\n",
+       "      <td>-0.012195</td>\n",
+       "      <td>1.448119</td>\n",
+       "      <td>0.785830</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752643</th>\n",
+       "      <td>5.881943</td>\n",
+       "      <td>0.880053</td>\n",
+       "      <td>-0.037656</td>\n",
+       "      <td>1.408483</td>\n",
+       "      <td>0.969979</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4558213</th>\n",
+       "      <td>6.720080</td>\n",
+       "      <td>0.855574</td>\n",
+       "      <td>-0.026211</td>\n",
+       "      <td>1.477236</td>\n",
+       "      <td>0.821308</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752640</th>\n",
+       "      <td>6.452934</td>\n",
+       "      <td>0.766701</td>\n",
+       "      <td>-0.031763</td>\n",
+       "      <td>1.371831</td>\n",
+       "      <td>0.938162</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4558222</th>\n",
+       "      <td>5.049247</td>\n",
+       "      <td>0.120607</td>\n",
+       "      <td>-0.045109</td>\n",
+       "      <td>1.366729</td>\n",
+       "      <td>0.789795</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM4558219</th>\n",
+       "      <td>6.098710</td>\n",
+       "      <td>0.857243</td>\n",
+       "      <td>-0.029878</td>\n",
+       "      <td>1.422144</td>\n",
+       "      <td>0.796568</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752628</th>\n",
+       "      <td>2.746949</td>\n",
+       "      <td>0.098284</td>\n",
+       "      <td>-0.047989</td>\n",
+       "      <td>1.480599</td>\n",
+       "      <td>0.794259</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GSM3752617</th>\n",
+       "      <td>2.739868</td>\n",
+       "      <td>0.078172</td>\n",
+       "      <td>-0.048249</td>\n",
+       "      <td>1.484091</td>\n",
+       "      <td>0.807197</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            mammalian1  mammalian2  mammalian3  mammalianlifespan  \\\n",
+       "GSM3752631    2.537895   -0.009800   -0.048414           1.202134   \n",
+       "GSM3752625    3.353935    0.064448   -0.048203           1.480080   \n",
+       "GSM3752634    4.490610    0.813899   -0.035059           1.400278   \n",
+       "GSM3752620    3.603802    0.122934   -0.046958           1.644701   \n",
+       "GSM3752622    2.951263    0.005617   -0.047725           1.385282   \n",
+       "GSM3752637    5.718515    0.895781   -0.037649           1.399769   \n",
+       "GSM4558216    7.456245    0.684752   -0.012195           1.448119   \n",
+       "GSM3752643    5.881943    0.880053   -0.037656           1.408483   \n",
+       "GSM4558213    6.720080    0.855574   -0.026211           1.477236   \n",
+       "GSM3752640    6.452934    0.766701   -0.031763           1.371831   \n",
+       "GSM4558222    5.049247    0.120607   -0.045109           1.366729   \n",
+       "GSM4558219    6.098710    0.857243   -0.029878           1.422144   \n",
+       "GSM3752628    2.746949    0.098284   -0.047989           1.480599   \n",
+       "GSM3752617    2.739868    0.078172   -0.048249           1.484091   \n",
+       "\n",
+       "            mammalianfemale  \n",
+       "GSM3752631         0.732238  \n",
+       "GSM3752625         0.952105  \n",
+       "GSM3752634         0.978554  \n",
+       "GSM3752620         0.948646  \n",
+       "GSM3752622         0.741336  \n",
+       "GSM3752637         0.975114  \n",
+       "GSM4558216         0.785830  \n",
+       "GSM3752643         0.969979  \n",
+       "GSM4558213         0.821308  \n",
+       "GSM3752640         0.938162  \n",
+       "GSM4558222         0.789795  \n",
+       "GSM4558219         0.796568  \n",
+       "GSM3752628         0.794259  \n",
+       "GSM3752617         0.807197  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata_mammalian.obs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "615f8fbf-f5e1-4af9-a2a0-5f4f781001fe",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0fe55edd-9271-4b41-857d-ef3fceafc2a6",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "cdf4c609-7a24-4c3f-a891-647315b77d54",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'thompson',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Mus musculus',\n",
+       " 'year': 2018,\n",
+       " 'approved_by_author': '✅',\n",
+       " 'citation': 'Thompson, Michael J., et al. \"A multi-tissue full lifespan epigenetic clock for mice.\" Aging (Albany NY) 10.10 (2018): 2832.',\n",
+       " 'doi': 'https://doi.org/10.18632/aging.101590',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['thompson_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "466b1bc5-6207-469e-b479-260bbf55f2a7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'meer',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Mus musculus',\n",
+       " 'year': 2018,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Meer, Margarita V., et al. \"A whole lifespan mouse multi-tissue DNA methylation clock.\" Elife 7 (2018): e40675.',\n",
+       " 'doi': 'https://doi.org/10.7554/eLife.40675',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['meer_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "2808782d-04d3-4527-8328-b18a583cf15b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'petkovich',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Mus musculus',\n",
+       " 'year': 2017,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Petkovich, Daniel A., et al. \"Using DNA methylation profiling to evaluate biological age and longevity interventions.\" Cell metabolism 25.4 (2017): 954-960.',\n",
+       " 'doi': 'https://doi.org/10.1016/j.cmet.2017.03.016',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['petkovich_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "ec210b8b-9ba1-45df-9b16-80a5f5ac86f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'stubbs',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Mus musculus',\n",
+       " 'year': 2017,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Stubbs, Thomas M., et al. \"Multi-tissue DNA methylation age predictor in mouse.\" Genome biology 18 (2017): 1-14.',\n",
+       " 'doi': 'https://doi.org/10.1186/s13059-017-1203-5',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['stubbs_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "78ed8018-bfdc-42b7-b814-d3a55184fc05",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'mammalian1',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'multi',\n",
+       " 'year': 2023,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Lu, A. T., et al. \"Universal DNA methylation age across mammalian tissues.\" Nature aging 3.9 (2023): 1144-1166.',\n",
+       " 'doi': 'https://doi.org/10.1038/s43587-023-00462-6',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata_mammalian.uns['mammalian1_metadata']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "f4b9c4c7-d23b-4850-abf2-5f9a092c7bc4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'mammalianlifespan',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'multi',\n",
+       " 'year': 2023,\n",
+       " 'approved_by_author': '⌛',\n",
+       " 'citation': 'Li, Caesar Z., et al. \"Epigenetic predictors of species maximum lifespan and other life history traits in mammals.\" bioRxiv (2023): 2023-11.',\n",
+       " 'doi': 'https://doi.org/10.1101/2023.11.02.565286',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata_mammalian.uns['mammalianlifespan_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_histonemarkchipseq.ipynb b/docs/source/tutorials/tutorial_histonemarkchipseq.ipynb
new file mode 100644
index 0000000..39d0a1c
--- /dev/null
+++ b/docs/source/tutorials/tutorial_histonemarkchipseq.ipynb
@@ -0,0 +1,298 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a76ae282-3b11-4246-8292-a9276267832d",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_histonemarkchipseq.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_histonemarkchipseq.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d444a24e-6a98-4db1-8688-7f3f80ed2876",
+   "metadata": {},
+   "source": [
+    "# Bulk histone mark ChIP-Seq"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "186154f3-1c8d-4284-a5a4-01f28d4db533",
+   "metadata": {},
+   "source": [
+    "This tutorial is a brief guide for the implementation of the seven histone-mark-specific clocks and the pan-histone-mark clock developed ourselves. Link to [preprint](https://www.biorxiv.org/content/10.1101/2023.08.21.554165v3)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "270379c1-9159-4677-92fa-10b08aa9f703",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dd281360-7e16-45d9-ae2b-8f8f3fff809d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6893601-615e-449b-829b-c144276f402f",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fd3e80a9-5361-40f0-bf3e-6f6057181594",
+   "metadata": {},
+   "source": [
+    "Let's download an example of H3K4me3 ChIP-Seq bigWig file from the ENCODE project."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85c15bf3-6cf1-4f71-abf2-d0d7ee81b86b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Downloading data to pyaging_data/ENCFF386QWG.bigWig\n",
+      "|-----------> in progress: 24.0057%"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('ENCFF386QWG')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3880246a-471e-4f75-bd2f-ed2623458a48",
+   "metadata": {},
+   "source": [
+    "To exemplify that multiple bigWigs can be turned into a df object at once, let's just repeat the file path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f65f5cc7-4c42-45a5-a04e-83e0520eccff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a24e0a5-f97f-4f01-95a7-dd96246d9eb2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.index = ['sample1', 'sample2'] # just to avoid an annoying anndata warning that samples have same names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "769858ac-9d6d-43f8-9c53-0f4a88c5484c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e303dc0f-9e77-4524-9c04-90540e9ee75d",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae8e44bc-67fc-4508-9623-faea44301fa8",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c167be6-1bd3-407c-ae12-771739189c3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata = pya.preprocess.df_to_adata(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f82813b-3db2-4570-9e4c-3dce08dc5108",
+   "metadata": {},
+   "source": [
+    "Note that the original DataFrame is stored in `X_original` under layers. This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "641a61a6-46fc-4d47-b176-eb39524ce94f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c72aa719-efd3-4094-90f5-bffcaea76a34",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aff9395b-4954-4148-9cbb-6681e7217cf3",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. For convenience, let's simply input a few clocks of interest at once. The function is invariant to the capitalization of the clock name. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c02455b4-06dd-44c2-b4b3-a2bb434eae7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.pred.predict_age(adata, ['CamilloH3K4me3', 'CamilloH3K9me3', 'CamilloPanHistone'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f64fb182-937b-4f67-b58e-5fffb0e2fad0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bbaa2243-e380-4020-bf04-f7aa7da83cd4",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8dd3457-8983-41a4-aaab-41563b91a866",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.data.download_example_data('ENCFF386QWG', verbose=False)\n",
+    "df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'], verbose=False)\n",
+    "df.index = ['sample1', 'sample2']\n",
+    "adata = pya.preprocess.df_to_adata(df, verbose=False)\n",
+    "pya.pred.predict_age(adata, ['CamilloH3K4me3', 'CamilloH3K9me3', 'CamilloPanHistone'], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8192ab67-a1cc-4728-8ca0-f81a56940fbf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9832aa0b-99a8-4938-a2a2-5e9b484a3353",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4b22bf1-116f-456f-82d2-58b300f863f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c08ff758-675c-4136-9fb8-c19f0e05fefd",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8407c418-6251-4b08-9d29-166f9a4339d2",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2946393e-a199-46ba-a9dd-80bc8fa88787",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata.uns['camilloh3k4me3_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_rnaseq.ipynb b/docs/source/tutorials/tutorial_rnaseq.ipynb
new file mode 100644
index 0000000..398eeec
--- /dev/null
+++ b/docs/source/tutorials/tutorial_rnaseq.ipynb
@@ -0,0 +1,683 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2089cc5b-a025-4928-a331-ad33fd1b6a85",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_rnaseq.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_rnaseq.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31cf37ce-09ee-49d7-a411-719bf65e186e",
+   "metadata": {},
+   "source": [
+    "# Bulk RNA-Seq"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ea2b570-56af-4e4f-9606-d4c6d071554c",
+   "metadata": {},
+   "source": [
+    "This tutorial is a brief guide for the implementation of BiT Age, a highly accurate bulk transcriptomic clock for C. elegans. Link to [paper](https://onlinelibrary.wiley.com/doi/full/10.1111/acel.13320)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a093c7d-dea7-4b34-91bf-08cde6c98011",
+   "metadata": {},
+   "source": [
+    "We just need two packages for this tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ad192191-e44f-4994-80ad-ab16cdb7c7e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d87488d5-731c-469e-ad6f-79c4c9662371",
+   "metadata": {},
+   "source": [
+    "## Download and load example data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c30471f-89e7-4e92-a176-aa3af14a5274",
+   "metadata": {},
+   "source": [
+    "Let's download the C. elegans RNA-seq dataset from the BiT Age paper."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "55bbd03e-3953-427e-ab7a-4d523e6bc985",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting download_example_data function\n",
+      "|-----------> Data found in pyaging_data/GSE65765_CPM.pkl\n",
+      "|-----> 🎉 Done! [0.5749s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.data.download_example_data('GSE65765')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "13aeb69a-4b0e-40f2-8094-194c9a6b42a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_pickle('pyaging_data/GSE65765_CPM.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7af12fc3-1418-49df-ba7f-e94730db706e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>WBGene00197333</th>\n",
+       "      <th>WBGene00198386</th>\n",
+       "      <th>WBGene00015153</th>\n",
+       "      <th>WBGene00002061</th>\n",
+       "      <th>WBGene00255704</th>\n",
+       "      <th>WBGene00235314</th>\n",
+       "      <th>WBGene00001177</th>\n",
+       "      <th>WBGene00169236</th>\n",
+       "      <th>WBGene00219784</th>\n",
+       "      <th>WBGene00015152</th>\n",
+       "      <th>...</th>\n",
+       "      <th>WBGene00010964</th>\n",
+       "      <th>WBGene00014467</th>\n",
+       "      <th>WBGene00014468</th>\n",
+       "      <th>WBGene00014469</th>\n",
+       "      <th>WBGene00014470</th>\n",
+       "      <th>WBGene00010965</th>\n",
+       "      <th>WBGene00014471</th>\n",
+       "      <th>WBGene00010966</th>\n",
+       "      <th>WBGene00010967</th>\n",
+       "      <th>WBGene00014473</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>SRR1793993</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.780174</td>\n",
+       "      <td>169.240815</td>\n",
+       "      <td>1.907427</td>\n",
+       "      <td>0.277444</td>\n",
+       "      <td>59.320986</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.283178</td>\n",
+       "      <td>...</td>\n",
+       "      <td>858.949156</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.052021</td>\n",
+       "      <td>234.526846</td>\n",
+       "      <td>0.017340</td>\n",
+       "      <td>54.483057</td>\n",
+       "      <td>78.117815</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793991</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.510354</td>\n",
+       "      <td>412.628597</td>\n",
+       "      <td>0.061861</td>\n",
+       "      <td>0.061861</td>\n",
+       "      <td>22.239044</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.015465</td>\n",
+       "      <td>0.201048</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1049.982885</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.015465</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.015465</td>\n",
+       "      <td>372.511713</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>54.545971</td>\n",
+       "      <td>59.618577</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793994</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4.718708</td>\n",
+       "      <td>274.733671</td>\n",
+       "      <td>1.234644</td>\n",
+       "      <td>0.118391</td>\n",
+       "      <td>42.400721</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.642691</td>\n",
+       "      <td>...</td>\n",
+       "      <td>664.255412</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.101478</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>253.220421</td>\n",
+       "      <td>0.033826</td>\n",
+       "      <td>19.483698</td>\n",
+       "      <td>86.492735</td>\n",
+       "      <td>0.016913</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793992</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.389905</td>\n",
+       "      <td>351.612558</td>\n",
+       "      <td>0.505892</td>\n",
+       "      <td>0.069778</td>\n",
+       "      <td>20.497358</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.017445</td>\n",
+       "      <td>1.308342</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1298.799849</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.034889</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>472.206803</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>89.508039</td>\n",
+       "      <td>76.459508</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4 rows × 46755 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            WBGene00197333  WBGene00198386  WBGene00015153  WBGene00002061  \\\n",
+       "SRR1793993             0.0             0.0        3.780174      169.240815   \n",
+       "SRR1793991             0.0             0.0        0.510354      412.628597   \n",
+       "SRR1793994             0.0             0.0        4.718708      274.733671   \n",
+       "SRR1793992             0.0             0.0        2.389905      351.612558   \n",
+       "\n",
+       "            WBGene00255704  WBGene00235314  WBGene00001177  WBGene00169236  \\\n",
+       "SRR1793993        1.907427        0.277444       59.320986             0.0   \n",
+       "SRR1793991        0.061861        0.061861       22.239044             0.0   \n",
+       "SRR1793994        1.234644        0.118391       42.400721             0.0   \n",
+       "SRR1793992        0.505892        0.069778       20.497358             0.0   \n",
+       "\n",
+       "            WBGene00219784  WBGene00015152  ...  WBGene00010964  \\\n",
+       "SRR1793993        0.000000        1.283178  ...      858.949156   \n",
+       "SRR1793991        0.015465        0.201048  ...     1049.982885   \n",
+       "SRR1793994        0.000000        0.642691  ...      664.255412   \n",
+       "SRR1793992        0.017445        1.308342  ...     1298.799849   \n",
+       "\n",
+       "            WBGene00014467  WBGene00014468  WBGene00014469  WBGene00014470  \\\n",
+       "SRR1793993             0.0        0.000000             0.0        0.052021   \n",
+       "SRR1793991             0.0        0.015465             0.0        0.015465   \n",
+       "SRR1793994             0.0        0.101478             0.0        0.000000   \n",
+       "SRR1793992             0.0        0.034889             0.0        0.000000   \n",
+       "\n",
+       "            WBGene00010965  WBGene00014471  WBGene00010966  WBGene00010967  \\\n",
+       "SRR1793993      234.526846        0.017340       54.483057       78.117815   \n",
+       "SRR1793991      372.511713        0.000000       54.545971       59.618577   \n",
+       "SRR1793994      253.220421        0.033826       19.483698       86.492735   \n",
+       "SRR1793992      472.206803        0.000000       89.508039       76.459508   \n",
+       "\n",
+       "            WBGene00014473  \n",
+       "SRR1793993        0.000000  \n",
+       "SRR1793991        0.000000  \n",
+       "SRR1793994        0.016913  \n",
+       "SRR1793992        0.000000  \n",
+       "\n",
+       "[4 rows x 46755 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45cbc6e1-9cf7-46a8-ac92-18924a7a5cf8",
+   "metadata": {},
+   "source": [
+    "## Convert data to AnnData object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae486006-b533-411b-b449-ff6d2261345a",
+   "metadata": {},
+   "source": [
+    "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "74a99c21-67a7-4adb-8ac9-ea404a6c1e02",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting df_to_adata function\n",
+      "|-----> ⚙️ Create anndata object started\n",
+      "|-----> ✅ Create anndata object finished [0.0190s]\n",
+      "|-----> ⚙️ Add metadata to anndata started\n",
+      "|-----------? No metadata provided. Leaving adata.obs empty\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0005s]\n",
+      "|-----> ⚙️ Log data statistics started\n",
+      "|-----------> There are 4 observations\n",
+      "|-----------> There are 46755 features\n",
+      "|-----------> Total missing values: 0\n",
+      "|-----------> Percentage of missing values: 0.00%\n",
+      "|-----> ✅ Log data statistics finished [0.0011s]\n",
+      "|-----> ⚙️ Impute missing values started\n",
+      "|-----------> No missing values found. No imputation necessary\n",
+      "|-----> ✅ Impute missing values finished [0.0013s]\n",
+      "|-----> 🎉 Done! [0.0239s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "adata = pya.preprocess.df_to_adata(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94035d2e-2e6b-4927-bb2b-0ddcd1b3cd4e",
+   "metadata": {},
+   "source": [
+    "Note that the original DataFrame is stored in `X_original` under layers. is This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "5d8b68ec-d3aa-4a10-b7e5-54811bddd68c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 4 × 46755\n",
+       "    var: 'percent_na'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2277ede6-ab9e-487b-a58d-c01cb21b6b68",
+   "metadata": {},
+   "source": [
+    "## Predict age"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "889d2d5f-a596-41d0-b849-560b6bc856a1",
+   "metadata": {},
+   "source": [
+    "We can either predict one clock at once or all at the same time. Given we only have one clock of interest for this tutorial, let's go with one. The function is invariant to the capitalization of the clock name. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ba48641d-ac0d-430c-9905-30a1349b7c50",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting predict_age function\n",
+      "|-----> ⚙️ Set PyTorch device started\n",
+      "|-----------> Using device: cpu\n",
+      "|-----> ✅ Set PyTorch device finished [0.0006s]\n",
+      "|-----> 🕒 Processing clock: bitage\n",
+      "|-----------> ⚙️ Load clock started\n",
+      "|-----------------> Data found in pyaging_data/bitage.pt\n",
+      "|-----------> ✅ Load clock finished [0.5446s]\n",
+      "|-----------> ⚙️ Check features in adata started\n",
+      "|-----------------> All features are present in adata.var_names.\n",
+      "|-----------------> Added prepared input matrix to adata.obsm[X_bitage]\n",
+      "|-----------> ✅ Check features in adata finished [0.0424s]\n",
+      "|-----------> ⚙️ Predict ages with model started\n",
+      "|-----------------> The preprocessing method is binarize\n",
+      "|-----------------> There is no postprocessing necessary\n",
+      "|-----------------> in progress: 100.0000%\n",
+      "|-----------> ✅ Predict ages with model finished [0.0044s]\n",
+      "|-----------> ⚙️ Add predicted ages and clock metadata to adata started\n",
+      "|-----------> ✅ Add predicted ages and clock metadata to adata finished [0.0006s]\n",
+      "|-----> 🎉 Done! [0.6613s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.pred.predict_age(adata, 'BiTAge')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "032382f5-7d98-465e-a3cb-51165eeb7025",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bitage</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>SRR1793993</th>\n",
+       "      <td>182.353658</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793991</th>\n",
+       "      <td>27.337245</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793994</th>\n",
+       "      <td>241.629584</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793992</th>\n",
+       "      <td>32.178003</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                bitage\n",
+       "SRR1793993  182.353658\n",
+       "SRR1793991   27.337245\n",
+       "SRR1793994  241.629584\n",
+       "SRR1793992   32.178003"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2acc80b1-f936-40e4-900a-ef4deb304558",
+   "metadata": {},
+   "source": [
+    "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a587f129-a88b-46ec-a249-ac62737a0cb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pya.data.download_example_data('GSE65765', verbose=False)\n",
+    "df = pd.read_pickle('pyaging_data/GSE65765_CPM.pkl')\n",
+    "adata = pya.preprocess.df_to_adata(df, verbose=False)\n",
+    "pya.pred.predict_age(adata, ['BiTAge'], verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "99fbe406-d076-4979-a2f4-70469755937f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bitage</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>SRR1793993</th>\n",
+       "      <td>182.353658</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793991</th>\n",
+       "      <td>27.337245</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793994</th>\n",
+       "      <td>241.629584</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SRR1793992</th>\n",
+       "      <td>32.178003</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                bitage\n",
+       "SRR1793993  182.353658\n",
+       "SRR1793991   27.337245\n",
+       "SRR1793994  241.629584\n",
+       "SRR1793992   32.178003"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.obs.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "25aedb7e-5cff-42da-a0ea-cc0780395ea7",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "61dcb82f-e7f0-4064-8e67-b47b07b48a55",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 4 × 46755\n",
+       "    obs: 'bitage'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'bitage_percent_na', 'bitage_missing_features', 'bitage_metadata'\n",
+       "    layers: 'X_original'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a73e164-a610-4cb6-93f5-6f8ac7d8d56f",
+   "metadata": {},
+   "source": [
+    "## Get citation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6c7a070c-c448-4ad7-ae0b-21857dafd00e",
+   "metadata": {},
+   "source": [
+    "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "9908d25a-9639-4684-9da6-353c7eb4a555",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'bitage',\n",
+       " 'data_type': 'transcriptomics',\n",
+       " 'species': 'C elegans',\n",
+       " 'year': 2021,\n",
+       " 'approved_by_author': '✅',\n",
+       " 'citation': 'Meyer, David H., and Björn Schumacher. \"BiT age: A transcriptome‐based aging clock near the theoretical limit of accuracy.\" Aging cell 20.3 (2021): e13320.',\n",
+       " 'doi': 'https://doi.org/10.1111/acel.13320',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata.uns['bitage_metadata']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials/tutorial_utils.ipynb b/docs/source/tutorials/tutorial_utils.ipynb
new file mode 100644
index 0000000..17a6066
--- /dev/null
+++ b/docs/source/tutorials/tutorial_utils.ipynb
@@ -0,0 +1,522 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cfeef651-0cae-4d14-b011-3b78c46fa2e4",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_utils.ipynb) [![Open In nbviewer](https://img.shields.io/badge/View%20in-nbviewer-orange)](https://nbviewer.jupyter.org/github/rsinghlab/pyaging/blob/main/tutorials/tutorial_utils.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c5aac698-f7ed-4489-8c6c-f75ec3f3df73",
+   "metadata": {},
+   "source": [
+    "# Search, cite, get metadata and clock parameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9d132efc-15f4-4b4d-bdd7-c1aa21f7d13e",
+   "metadata": {},
+   "source": [
+    "This tutorial shows the use of some `pyaging` helper functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "445d6e41-c30f-4cbc-b5b3-97dc1020f09e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyaging as pya"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72174a28-ddef-4ab1-ad7b-7c306920df23",
+   "metadata": {},
+   "source": [
+    "## Search"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aac87908-def9-40d9-a2cc-ffb34fae2dc5",
+   "metadata": {},
+   "source": [
+    "There are two main ways to search for a clock in `pyaging`. The first is through the doi of the paper in which the clock was developed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d5281558-f1b6-4a5c-ba52-708d31265374",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting find_clock_by_doi function\n",
+      "|-----> ⚙️ Load all clock metadata started\n",
+      "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
+      "|-----> ✅ Load all clock metadata finished [0.4988s]\n",
+      "|-----> ⚙️ Searching for clock based on DOI started\n",
+      "|-----------> in progress: 100.0000%\n",
+      "|-----------> Clocks with DOI https://doi.org/10.1038/s43587-022-00248-2: pchorvath2013, pcphenoage, pcgrimage, pchannum, pcdnamtl, hrsinchphenoage, pcskinandblood\n",
+      "|-----> ✅ Searching for clock based on DOI finished [0.0485s]\n",
+      "|-----> 🎉 Done! [0.5502s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.utils.find_clock_by_doi('https://doi.org/10.1038/s43587-022-00248-2')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "125e90ad-25f5-46e2-a2ad-0f87c6729ea9",
+   "metadata": {},
+   "source": [
+    "The second way is by simply showing the names of all the clocks that are available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "48c37121-3b2d-40cf-80aa-c1f58fbad127",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting show_all_clocks function\n",
+      "|-----> ⚙️ Load all clock metadata started\n",
+      "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
+      "|-----> ✅ Load all clock metadata finished [0.4589s]\n",
+      "|-----> ⚙️ Showing all available clock names started\n",
+      "|-----------> altumage\n",
+      "|-----------> bitage\n",
+      "|-----------> camilloh3k27ac\n",
+      "|-----------> camilloh3k27me3\n",
+      "|-----------> camilloh3k36me3\n",
+      "|-----------> camilloh3k4me1\n",
+      "|-----------> camilloh3k4me3\n",
+      "|-----------> camilloh3k9ac\n",
+      "|-----------> camilloh3k9me3\n",
+      "|-----------> camillopanhistone\n",
+      "|-----------> dnamphenoage\n",
+      "|-----------> dnamtl\n",
+      "|-----------> dunedinpace\n",
+      "|-----------> encen100\n",
+      "|-----------> encen40\n",
+      "|-----------> grimage\n",
+      "|-----------> grimage2\n",
+      "|-----------> han\n",
+      "|-----------> hannum\n",
+      "|-----------> horvath2013\n",
+      "|-----------> hrsinchphenoage\n",
+      "|-----------> knight\n",
+      "|-----------> leecontrol\n",
+      "|-----------> leerefinedrobust\n",
+      "|-----------> leerobust\n",
+      "|-----------> lin\n",
+      "|-----------> mammalian1\n",
+      "|-----------> mammalian2\n",
+      "|-----------> mammalian3\n",
+      "|-----------> mammalianblood2\n",
+      "|-----------> mammalianblood3\n",
+      "|-----------> mammalianfemale\n",
+      "|-----------> mammalianlifespan\n",
+      "|-----------> mammalianskin2\n",
+      "|-----------> mammalianskin3\n",
+      "|-----------> meer\n",
+      "|-----------> ocampoatac1\n",
+      "|-----------> ocampoatac2\n",
+      "|-----------> pcdnamtl\n",
+      "|-----------> pcgrimage\n",
+      "|-----------> pchannum\n",
+      "|-----------> pchorvath2013\n",
+      "|-----------> pcphenoage\n",
+      "|-----------> pcskinandblood\n",
+      "|-----------> pedbe\n",
+      "|-----------> petkovich\n",
+      "|-----------> phenoage\n",
+      "|-----------> replitali\n",
+      "|-----------> skinandblood\n",
+      "|-----------> stubbs\n",
+      "|-----------> thompson\n",
+      "|-----------> zhangblup\n",
+      "|-----------> zhangen\n",
+      "|-----------> zhangmortality\n",
+      "|-----> ✅ Showing all available clock names finished [0.0280s]\n",
+      "|-----> 🎉 Done! [0.4903s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.utils.show_all_clocks()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e579e9f-073d-4a26-9bd6-271068ac8601",
+   "metadata": {},
+   "source": [
+    "## Cite"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fb15a3ba-4dd1-4526-ab45-732979fcd676",
+   "metadata": {},
+   "source": [
+    "`pyaging` also provides citations for all available clocks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "61f0ac9d-b26c-493b-a576-5d7cde67775c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting cite_clock function\n",
+      "|-----> ⚙️ Load all clock metadata started\n",
+      "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
+      "|-----> ✅ Load all clock metadata finished [0.5150s]\n",
+      "|-----> ⚙️ Searching for citation of clock altumage started\n",
+      "|-----------> Citation for altumage:\n",
+      "|-----------> de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.\n",
+      "|-----------> Please also consider citing pyaging :)\n",
+      "|-----------> de Lima Camillo, Lucas Paulo. \"pyaging: a Python-based compendium of GPU-optimized aging clocks.\" bioRxiv (2023): 2023-11.\n",
+      "|-----> ✅ Searching for citation of clock altumage finished [0.0024s]\n",
+      "|-----> 🎉 Done! [0.5205s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.utils.cite_clock('AltumAge')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9838c1b6-47a6-44a4-8b3e-8c5e760c6172",
+   "metadata": {},
+   "source": [
+    "## Get metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99fc2253-7f33-479f-b946-97d0f77c6d19",
+   "metadata": {},
+   "source": [
+    "To get all of the metadata for a clock, including citation and doi, just run the following."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "2b65ef03-485a-4631-b831-8e87d3ce0f64",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> 🏗️ Starting get_clock_metadata function\n",
+      "|-----> ⚙️ Load all clock metadata started\n",
+      "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
+      "|-----> ✅ Load all clock metadata finished [0.5505s]\n",
+      "|-----> ⚙️ Showing altumage metadata started\n",
+      "|-----------> clock_name: altumage\n",
+      "|-----------> data_type: methylation\n",
+      "|-----------> species: Homo sapiens\n",
+      "|-----------> year: 2022\n",
+      "|-----------> approved_by_author: ✅\n",
+      "|-----------> citation: de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.\n",
+      "|-----------> doi: https://doi.org/10.1038/s41514-022-00085-y\n",
+      "|-----------> notes: None\n",
+      "|-----------> version: None\n",
+      "|-----------> reference_values: True\n",
+      "|-----------> preprocess: scale\n",
+      "|-----> ✅ Showing altumage metadata finished [0.0062s]\n",
+      "|-----> 🎉 Done! [0.5622s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "pya.utils.get_clock_metadata('AltumAge')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b95a3ad8-efdd-4c73-8851-ec17071a6e78",
+   "metadata": {},
+   "source": [
+    "## Get clock parameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "02a494ca-d06b-4b0b-90b8-f4013a1c100c",
+   "metadata": {},
+   "source": [
+    "To easily analyze the weights and features of a particular clock, please use:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7c853695-bc26-4f66-bd9d-39ec2c381f80",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|-----> ⚙️ Load clock started\n",
+      "|-----------> Data found in pyaging_data/altumage.pt\n",
+      "|-----> ✅ Load clock finished [0.5409s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "logger = pya.logger.Logger('test_logger')\n",
+    "device = 'cpu'\n",
+    "dir = 'pyaging_data'\n",
+    "indent_level = 1\n",
+    "\n",
+    "clock = pya.pred.load_clock('AltumAge', device, dir, logger, indent_level=indent_level)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a9b2e3bc-d8a2-4625-9d6a-77ba1dede403",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AltumAge(\n",
+       "  (base_model): AltumAgeNeuralNetwork(\n",
+       "    (linear1): Linear(in_features=20318, out_features=32, bias=True)\n",
+       "    (linear2): Linear(in_features=32, out_features=32, bias=True)\n",
+       "    (linear3): Linear(in_features=32, out_features=32, bias=True)\n",
+       "    (linear4): Linear(in_features=32, out_features=32, bias=True)\n",
+       "    (linear5): Linear(in_features=32, out_features=32, bias=True)\n",
+       "    (linear6): Linear(in_features=32, out_features=1, bias=True)\n",
+       "    (bn1): BatchNorm1d(20318, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "    (bn2): BatchNorm1d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "    (bn3): BatchNorm1d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "    (bn4): BatchNorm1d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "    (bn5): BatchNorm1d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "    (bn6): BatchNorm1d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clock"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f25f68da-0c13-4403-85ab-88f8a8d70c29",
+   "metadata": {},
+   "source": [
+    "Let's check the weights of the first linear layer for AltumAge."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "8660aec5-82a8-4b70-a71a-bd3571a81ded",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Parameter containing:\n",
+       "tensor([[ 1.2465e-05, -2.4719e-04,  5.4308e-02,  ..., -2.5304e-02,\n",
+       "          5.2822e-02,  8.9800e-02],\n",
+       "        [ 3.5401e-04, -3.0528e-03,  2.8799e-02,  ...,  6.8214e-03,\n",
+       "          6.9691e-02,  1.2179e-01],\n",
+       "        [ 1.6119e-04, -6.7272e-06, -4.6887e-02,  ...,  1.3132e-02,\n",
+       "          9.2417e-02, -4.2074e-02],\n",
+       "        ...,\n",
+       "        [ 1.9902e-04,  9.0495e-04, -8.5197e-03,  ..., -9.6892e-02,\n",
+       "          2.9396e-02,  5.9170e-02],\n",
+       "        [-1.2038e-04,  3.7530e-04,  1.7924e-01,  ..., -4.9997e-02,\n",
+       "         -1.2819e-02,  2.8045e-02],\n",
+       "        [ 1.1584e-04,  2.2752e-04, -3.0746e-02,  ...,  1.7930e-02,\n",
+       "          8.3116e-03, -2.0979e-02]], dtype=torch.float64, requires_grad=True)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clock.base_model.linear1.weight"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a508d495-e2dd-4757-ad4d-732836f08c64",
+   "metadata": {},
+   "source": [
+    "A quick look at the features:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7e5ec51f-146a-45a6-ba7f-5a825b8315fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['cg00000292',\n",
+       " 'cg00002426',\n",
+       " 'cg00003994',\n",
+       " 'cg00007981',\n",
+       " 'cg00008493',\n",
+       " 'cg00008713',\n",
+       " 'cg00009407',\n",
+       " 'cg00011459',\n",
+       " 'cg00012199',\n",
+       " 'cg00012386']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(clock.features[0:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a6b9591-5c1e-4665-88fd-ee633cd92798",
+   "metadata": {},
+   "source": [
+    "And the reference values used:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2f73647b-277f-4b93-b49d-a4907f49a892",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0.7598633952352156,\n",
+       " 0.7863788078967272,\n",
+       " 0.06324422321924528,\n",
+       " 0.029943418029386736,\n",
+       " 0.9363471225552753,\n",
+       " 0.05054944899168823,\n",
+       " 0.0351571456459043,\n",
+       " 0.9114132733331861,\n",
+       " 0.037064057665286136,\n",
+       " 0.039170308280475935]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(clock.reference_values[0:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6429c0d5-6737-4c76-b8c4-16b030325ad6",
+   "metadata": {},
+   "source": [
+    "We can also get the metadata directly from the clock object:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "cac05fd5-5480-460e-801f-d3504a37f8d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'clock_name': 'altumage',\n",
+       " 'data_type': 'methylation',\n",
+       " 'species': 'Homo sapiens',\n",
+       " 'year': 2022,\n",
+       " 'approved_by_author': '✅',\n",
+       " 'citation': 'de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.',\n",
+       " 'doi': 'https://doi.org/10.1038/s41514-022-00085-y',\n",
+       " 'notes': None,\n",
+       " 'version': None}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clock.metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c4987574-3efc-4b63-8dcc-9938c94bc275",
+   "metadata": {},
+   "source": [
+    "For a more in depth look at how the clock was setup, including the model type and the source of the weights, please look at our [clocks notebook folder](https://github.com/rsinghlab/pyaging/tree/main/clocks/notebooks) on GitHub."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/tutorial_histonemarkchipseq.ipynb b/tutorials/tutorial_histonemarkchipseq.ipynb
index 4ac8d48..39d0a1c 100644
--- a/tutorials/tutorial_histonemarkchipseq.ipynb
+++ b/tutorials/tutorial_histonemarkchipseq.ipynb
@@ -71,7 +71,7 @@
      "text": [
       "|-----> 🏗️ Starting download_example_data function\n",
       "|-----------> Downloading data to pyaging_data/ENCFF386QWG.bigWig\n",
-      "|-----------> in progress: 19.0045%"
+      "|-----------> in progress: 24.0057%"
      ]
     }
    ],