|
76 | 76 | },
|
77 | 77 | "outputs": [],
|
78 | 78 | "source": [
|
| 79 | + "from time import time\n", |
| 80 | + "\n", |
79 | 81 | "import pandas as pd\n",
|
80 | 82 | "from tqdm import tqdm\n",
|
81 | 83 | "\n",
|
82 | 84 | "from ccc import conf\n",
|
| 85 | + "from ccc.utils import simplify_string\n", |
83 | 86 | "from ccc.corr import pearson"
|
84 | 87 | ]
|
85 | 88 | },
|
|
125 | 128 | "GENE_SELECTION_STRATEGY = \"var_pc_log2\""
|
126 | 129 | ]
|
127 | 130 | },
|
| 131 | + { |
| 132 | + "cell_type": "code", |
| 133 | + "execution_count": 3, |
| 134 | + "id": "ac2eaa49-c49e-4f3c-83c5-4f8c321d25a7", |
| 135 | + "metadata": { |
| 136 | + "execution": { |
| 137 | + "iopub.execute_input": "2022-05-24T14:46:59.207591Z", |
| 138 | + "iopub.status.busy": "2022-05-24T14:46:59.207409Z", |
| 139 | + "iopub.status.idle": "2022-05-24T14:46:59.210688Z", |
| 140 | + "shell.execute_reply": "2022-05-24T14:46:59.210092Z" |
| 141 | + }, |
| 142 | + "papermill": { |
| 143 | + "duration": 0.010126, |
| 144 | + "end_time": "2022-05-24T14:46:59.212259", |
| 145 | + "exception": false, |
| 146 | + "start_time": "2022-05-24T14:46:59.202133", |
| 147 | + "status": "completed" |
| 148 | + }, |
| 149 | + "tags": [] |
| 150 | + }, |
| 151 | + "outputs": [], |
| 152 | + "source": [ |
| 153 | + "# select the top 5 tissues (according to sample size, see nbs/05_preprocessing/00-gtex_v8-split_by_tissue.ipynb)\n", |
| 154 | + "TISSUES = [\n", |
| 155 | + " # \"Muscle - Skeletal\",\n", |
| 156 | + " \"Whole Blood\",\n", |
| 157 | + " # \"Skin - Sun Exposed (Lower leg)\",\n", |
| 158 | + " # \"Adipose - Subcutaneous\",\n", |
| 159 | + " # \"Artery - Tibial\",\n", |
| 160 | + "]" |
| 161 | + ] |
| 162 | + }, |
128 | 163 | {
|
129 | 164 | "cell_type": "code",
|
130 | 165 | "execution_count": 3,
|
|
341 | 376 | "output_type": "display_data"
|
342 | 377 | }
|
343 | 378 | ],
|
| 379 | + "source": [ |
| 380 | + "tissue_in_file_names = [f\"_data_{simplify_string(t.lower())}-\" for t in TISSUES]" |
| 381 | + ] |
| 382 | + }, |
| 383 | + { |
| 384 | + "cell_type": "code", |
| 385 | + "execution_count": null, |
| 386 | + "id": "cd78a8c5", |
| 387 | + "metadata": { |
| 388 | + "tags": [] |
| 389 | + }, |
| 390 | + "outputs": [], |
344 | 391 | "source": [
|
345 | 392 | "input_files = sorted(list(INPUT_DIR.glob(f\"*-{GENE_SELECTION_STRATEGY}.pkl\")))\n",
|
| 393 | + "input_files = [\n", |
| 394 | + " f for f in input_files if any(tn in f.name for tn in tissue_in_file_names)\n", |
| 395 | + "]\n", |
346 | 396 | "display(len(input_files))\n",
|
347 | 397 | "\n",
|
348 |
| - "assert len(input_files) == conf.GTEX[\"N_TISSUES\"], len(input_files)\n", |
349 |
| - "display(input_files[:5])" |
| 398 | + "assert len(input_files) == len(TISSUES), len(TISSUES)\n", |
| 399 | + "display(input_files)" |
350 | 400 | ]
|
351 | 401 | },
|
352 | 402 | {
|
|
914 | 964 | }
|
915 | 965 | ],
|
916 | 966 | "source": [
|
917 |
| - "%timeit CORRELATION_METHOD(test_data)" |
| 967 | + "%timeit -r1 CORRELATION_METHOD(test_data)" |
918 | 968 | ]
|
919 | 969 | },
|
920 | 970 | {
|
|
973 | 1023 | " data = pd.read_pickle(tissue_data_file)\n",
|
974 | 1024 | "\n",
|
975 | 1025 | " # compute correlations\n",
|
| 1026 | + " start_time = time()\n", |
| 1027 | + "\n", |
976 | 1028 | " data_corrs = CORRELATION_METHOD(data)\n",
|
977 | 1029 | "\n",
|
| 1030 | + " end_time = time()\n", |
| 1031 | + " elapsed_time = end_time - start_time\n", |
| 1032 | + " display(elapsed_time)\n", |
| 1033 | + "\n", |
978 | 1034 | " # save\n",
|
979 | 1035 | " output_filename = f\"{tissue_data_file.stem}-{method_name}.pkl\"\n",
|
980 | 1036 | " data_corrs.to_pickle(path=OUTPUT_DIR / output_filename)"
|
|
1017 | 1073 | "name": "python",
|
1018 | 1074 | "nbconvert_exporter": "python",
|
1019 | 1075 | "pygments_lexer": "ipython3",
|
1020 |
| - "version": "3.9.9" |
| 1076 | + "version": "3.9.12" |
1021 | 1077 | },
|
1022 | 1078 | "papermill": {
|
1023 | 1079 | "default_parameters": {},
|
|
0 commit comments