rsinghlab
diff --git a/Diff for: ‎docs/source/conf.py
+1-1 b/Diff for: ‎docs/source/conf.py
+1-1
diff --git a/Diff for: ‎pyaging/preprocess/_preprocess.py
+2-1 b/Diff for: ‎pyaging/preprocess/_preprocess.py
+2-1
diff --git a/Diff for: ‎pyproject.toml
+1-1 b/Diff for: ‎pyproject.toml
+1-1
diff --git a/Diff for: ‎tutorials/tutorial_atacseq.ipynb
+111-40 b/Diff for: ‎tutorials/tutorial_atacseq.ipynb
+111-40
@@ -13,7 +13,7 @@
 project = 'pyaging'
 copyright = '2023, Lucas Paulo de Lima Camillo'
 author = 'Lucas Paulo de Lima Camillo'
-release = '0.0.1'
+release = '0.0.2'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
@@ -127,7 +127,8 @@ def df_to_adata(
         The DataFrame containing biological data. Rows represent samples, and columns represent features.
 
     metadata_cols : List[str]
-        A list with the name of the columns in 'df' which are part of the metadata.
+        A list with the name of the columns in 'df' which are part of the metadata. They will be added 
+        to adata.obs rather than adata.X.
 
     imputer_strategy : str, optional
         The strategy for imputing missing values in 'df'. Supported strategies include 'mean',
 
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pyaging"
-version = "0.0.1"
+version = "0.0.2"
 description = "A Python-based compendium of GPU-optimized aging clocks."
 authors = ["Lucas Paulo de Lima Camillo <[email protected]>"]
 license = "BSD"
 
@@ -72,8 +72,8 @@
       "|-----> 🏗️ Starting download_example_data function\n",
       "|-----> ⚙️ Download data started\n",
       "|-----------> Data found in pyaging_data/atac_example.pkl\n",
-      "|-----> ✅ Download data finished [0.0007s]\n",
-      "|-----> 🎉 Done! [0.0014s]\n"
+      "|-----> ✅ Download data finished [0.0008s]\n",
+      "|-----> 🎉 Done! [0.0017s]\n"
      ]
     }
    ],
@@ -308,6 +308,14 @@
     "df.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "4ca435ec-3480-4167-ad55-b8a28b23a843",
+   "metadata": {},
+   "source": [
+    "This is what the adata object looks like:"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "251495e7-082f-45ae-841c-a2dd86a3cb15",
@@ -336,29 +344,60 @@
      "text": [
       "|-----> 🏗️ Starting df_to_adata function\n",
       "|-----> ⚙️ Create anndata object started\n",
-      "|-----> ✅ Create anndata object finished [0.0018s]\n",
+      "|-----> ✅ Create anndata object finished [0.0009s]\n",
       "|-----> ⚙️ Add metadata to anndata started\n",
       "|-----------? No metadata provided. Leaving adata.obs empty\n",
-      "|-----> ⚠️ Add metadata to anndata finished [0.0082s]\n",
+      "|-----> ⚠️ Add metadata to anndata finished [0.0021s]\n",
       "|-----> ⚙️ Log data statistics started\n",
       "|-----------> There are 10 observations\n",
       "|-----------> There are 80400 features\n",
       "|-----------> Total missing values: 0\n",
       "|-----------> Percentage of missing values: 0.00%\n",
-      "|-----> ✅ Log data statistics finished [0.0026s]\n",
+      "|-----> ✅ Log data statistics finished [0.0011s]\n",
       "|-----> ⚙️ Impute missing values started\n",
       "|-----------> No missing values found. No imputation necessary\n",
-      "|-----> ✅ Impute missing values finished [0.0056s]\n",
+      "|-----> ✅ Impute missing values finished [0.0022s]\n",
       "|-----> ⚙️ Add unstructured data to anndata started\n",
-      "|-----> ✅ Add unstructured data to anndata finished [0.0074s]\n",
-      "|-----> 🎉 Done! [0.0288s]\n"
+      "|-----> ✅ Add unstructured data to anndata finished [0.0042s]\n",
+      "|-----> 🎉 Done! [0.0111s]\n"
      ]
     }
    ],
    "source": [
     "adata = pya.preprocess.df_to_adata(df)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5042e04f-17c0-4eb2-8c5d-2c2fc5d6d2d6",
+   "metadata": {},
+   "source": [
+    "This is what the `adata` object looks like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "503da312-2256-4e67-9747-107f5c4587ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 10 × 80400\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'imputer_strategy', 'data_type'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "c072990d-0f54-49b3-bb7a-7bbd13301e2a",
@@ -377,7 +416,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "26398785-d1ea-4ce8-b1d9-7234f8f46ef6",
    "metadata": {},
    "outputs": [
@@ -388,70 +427,70 @@
       "|-----> 🏗️ Starting predict_age function\n",
       "|-----> ⚙️ Set PyTorch device started\n",
       "|-----------> Using device: cpu\n",
-      "|-----> ✅ Set PyTorch device finished [0.0021s]\n",
+      "|-----> ✅ Set PyTorch device finished [0.0009s]\n",
       "|-----> Processing clock: OcampoATAC1\n",
       "|-----------> ⚙️ Load clock started\n",
       "|-----------> ⚙️ Download data started\n",
       "|-----------> Data found in pyaging_data/ocampoatac1.pt\n",
-      "|-----------> ✅ Download data finished [0.0006s]\n",
-      "|-----------> ✅ Load clock finished [0.0006s]\n",
+      "|-----------> ✅ Download data finished [0.0003s]\n",
+      "|-----------> ✅ Load clock finished [0.0003s]\n",
       "|-----------> ⚙️ Check features in adata started\n",
       "|-----------> All features are present in adata.var_names.\n",
-      "|-----------> ✅ Check features in adata finished [0.0062s]\n",
+      "|-----------> ✅ Check features in adata finished [0.0030s]\n",
       "|-----------> ⚙️ Filter features and extract data matrix started\n",
-      "|-----------> ✅ Filter features and extract data matrix finished [0.0030s]\n",
+      "|-----------> ✅ Filter features and extract data matrix finished [0.0012s]\n",
       "|-----------> ⚙️ Preprocess data started\n",
       "|-----------------> Preprocessing data with function log1p\n",
-      "|-----------> ✅ Preprocess data finished [0.0054s]\n",
+      "|-----------> ✅ Preprocess data finished [0.0027s]\n",
       "|-----------> ⚙️ Convert numpy array to tensor started\n",
-      "|-----------> ✅ Convert numpy array to tensor finished [0.0028s]\n",
+      "|-----------> ✅ Convert numpy array to tensor finished [0.0015s]\n",
       "|-----------> ⚙️ Initialize model started\n",
-      "|-----------> ✅ Initialize model finished [0.0023s]\n",
+      "|-----------> ✅ Initialize model finished [0.0013s]\n",
       "|-----------> ⚙️ Predict ages with model started\n",
-      "|-----------> ✅ Predict ages with model finished [0.0036s]\n",
+      "|-----------> ✅ Predict ages with model finished [0.0015s]\n",
       "|-----------> ⚙️ Convert tensor to numpy array started\n",
-      "|-----------> ✅ Convert tensor to numpy array finished [0.0016s]\n",
+      "|-----------> ✅ Convert tensor to numpy array finished [0.0015s]\n",
       "|-----------> ⚙️ Add predicted ages to adata started\n",
       "|-----------> ✅ Add predicted ages to adata finished [0.0013s]\n",
       "|-----------> ⚙️ Load all clock metadata started\n",
       "|-----------> ⚙️ Download data started\n",
       "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
-      "|-----------> ✅ Download data finished [0.0005s]\n",
-      "|-----------> ✅ Load all clock metadata finished [0.0005s]\n",
+      "|-----------> ✅ Download data finished [0.0003s]\n",
+      "|-----------> ✅ Load all clock metadata finished [0.0003s]\n",
       "|-----------> ⚙️ Add clock metadata to adata.uns started\n",
       "|-----------> ✅ Add clock metadata to adata.uns finished [0.0039s]\n",
       "|-----> Processing clock: OcampoATAC2\n",
       "|-----------> ⚙️ Load clock started\n",
       "|-----------> ⚙️ Download data started\n",
       "|-----------> Data found in pyaging_data/ocampoatac2.pt\n",
-      "|-----------> ✅ Download data finished [0.0004s]\n",
-      "|-----------> ✅ Load clock finished [0.0004s]\n",
+      "|-----------> ✅ Download data finished [0.0006s]\n",
+      "|-----------> ✅ Load clock finished [0.0006s]\n",
       "|-----------> ⚙️ Check features in adata started\n",
       "|-----------> All features are present in adata.var_names.\n",
-      "|-----------> ✅ Check features in adata finished [0.0034s]\n",
+      "|-----------> ✅ Check features in adata finished [0.0027s]\n",
       "|-----------> ⚙️ Filter features and extract data matrix started\n",
-      "|-----------> ✅ Filter features and extract data matrix finished [0.0022s]\n",
+      "|-----------> ✅ Filter features and extract data matrix finished [0.0013s]\n",
       "|-----------> ⚙️ Preprocess data started\n",
       "|-----------------> Preprocessing data with function log1p\n",
-      "|-----------> ✅ Preprocess data finished [0.0024s]\n",
+      "|-----------> ✅ Preprocess data finished [0.0016s]\n",
       "|-----------> ⚙️ Convert numpy array to tensor started\n",
-      "|-----------> ✅ Convert numpy array to tensor finished [0.0019s]\n",
+      "|-----------> ✅ Convert numpy array to tensor finished [0.0009s]\n",
       "|-----------> ⚙️ Initialize model started\n",
-      "|-----------> ✅ Initialize model finished [0.0021s]\n",
+      "|-----------> ✅ Initialize model finished [0.0007s]\n",
       "|-----------> ⚙️ Predict ages with model started\n",
-      "|-----------> ✅ Predict ages with model finished [0.0020s]\n",
+      "|-----------> ✅ Predict ages with model finished [0.0012s]\n",
       "|-----------> ⚙️ Convert tensor to numpy array started\n",
-      "|-----------> ✅ Convert tensor to numpy array finished [0.0010s]\n",
+      "|-----------> ✅ Convert tensor to numpy array finished [0.0013s]\n",
       "|-----------> ⚙️ Add predicted ages to adata started\n",
-      "|-----------> ✅ Add predicted ages to adata finished [0.0008s]\n",
+      "|-----------> ✅ Add predicted ages to adata finished [0.0011s]\n",
       "|-----------> ⚙️ Load all clock metadata started\n",
       "|-----------> ⚙️ Download data started\n",
       "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
-      "|-----------> ✅ Download data finished [0.0005s]\n",
-      "|-----------> ✅ Load all clock metadata finished [0.0005s]\n",
+      "|-----------> ✅ Download data finished [0.0003s]\n",
+      "|-----------> ✅ Load all clock metadata finished [0.0003s]\n",
       "|-----------> ⚙️ Add clock metadata to adata.uns started\n",
-      "|-----------> ✅ Add clock metadata to adata.uns finished [0.0025s]\n",
-      "|-----> 🎉 Done! [0.0612s]\n"
+      "|-----------> ✅ Add clock metadata to adata.uns finished [0.0023s]\n",
+      "|-----> 🎉 Done! [0.0396s]\n"
      ]
     }
    ],
@@ -469,7 +508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "055761d9-7e22-49f3-a1db-31c3ed3749ba",
    "metadata": {},
    "outputs": [],
@@ -482,7 +521,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "fdd9d6c2-7f0a-4f96-a095-4a492ed73f8d",
    "metadata": {},
    "outputs": [
@@ -550,7 +589,7 @@
        "Sample_5    38.929848    33.717129"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -559,6 +598,38 @@
     "adata.obs.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "4bad3df8-f868-4cf5-be74-00ffd02c18f5",
+   "metadata": {},
+   "source": [
+    "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0d13fb55-8a12-4d28-83e9-ec7c9fbbe30c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 10 × 80400\n",
+       "    obs: 'ocampoatac1', 'ocampoatac2'\n",
+       "    var: 'percent_na'\n",
+       "    uns: 'imputer_strategy', 'data_type', 'ocampoatac1_percent_na', 'ocampoatac1_metadata', 'ocampoatac2_percent_na', 'ocampoatac2_metadata'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adata"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a4e7ad8d-44ae-4ced-a626-f9e3b2d04114",
@@ -577,7 +648,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "6b368506-55d1-4b74-be61-817bcf575ade",
    "metadata": {},
    "outputs": [
@@ -591,7 +662,7 @@
        " 'doi': 'https://doi.org/10.1007/s11357-023-00986-0'}"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }