diff --git a/nbs/25_pvalue/01-ccc_pvalue_dist-generate-gene_pairs.ipynb b/nbs/25_pvalue/01-ccc_pvalue_dist-generate-gene_pairs.ipynb index dd15e537..8ae98c1e 100644 --- a/nbs/25_pvalue/01-ccc_pvalue_dist-generate-gene_pairs.ipynb +++ b/nbs/25_pvalue/01-ccc_pvalue_dist-generate-gene_pairs.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1ffa1a96-7545-40b9-ac8b-8627e13de8d4", "metadata": { "papermill": { @@ -70,8 +70,7 @@ "outputs": [], "source": [ "import numpy as np\n", - "from scipy.spatial.distance import squareform\n", - "from sklearn.metrics import pairwise_distances\n", + "from joblib import Parallel, delayed\n", "\n", "from ccc.coef import ccc\n", "from ccc import conf" @@ -96,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a8dfa548-6ce1-4edd-bef2-a919fc6ad850", "metadata": { "papermill": { @@ -115,7 +114,21 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, + "id": "74f8f565-ed30-488f-966a-d153550931ed", + "metadata": {}, + "outputs": [], + "source": [ + "N_JOBS = conf.GENERAL[\"N_JOBS\"] // 2\n", + "display(N_JOBS)\n", + "\n", + "PVALUE_N_JOBS = 2\n", + "display(PVALUE_N_JOBS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "fd167aff-e768-416f-a078-f926f6023a1e", "metadata": { "papermill": { @@ -152,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "170ee0f3-a6dd-4c8b-9a99-ec6d02df8e2e", "metadata": { "papermill": { @@ -172,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "d083d95e-247e-45cc-bc28-36cf8144383c", "metadata": { "papermill": { @@ -184,18 +197,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/opt/data/results/ccc_null-pvalues')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "OUTPUT_DIR" ] @@ -219,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "472ff1df-b4f6-417f-b396-58a55ce0e39a", "metadata": { "papermill": { @@ -238,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "acd7a9c0-d8a8-46f5-ab60-2478347adf36", "metadata": { "papermill": { @@ -250,18 +252,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "(10, 1000)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data.shape" ] @@ -285,35 +276,74 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "c8a85ce0-4c5a-4ed9-8ad6-24b21fb10b1e", "metadata": {}, "outputs": [], "source": [ "def ccc_single(x, y):\n", - " return ccc(x, y, n_jobs=1, pvalue_n_perms=PVALUE_N_PERMS, pvalue_n_jobs=conf.GENERAL[\"N_JOBS\"])" + " return ccc(\n", + " x, y, n_jobs=1, pvalue_n_perms=PVALUE_N_PERMS, pvalue_n_jobs=PVALUE_N_JOBS\n", + " )" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "6a5158c1-904a-42a7-9ead-4fc72ef9c720", + "execution_count": null, + "id": "6f32ad1a-3b2f-4e08-8a53-35cfb68e3970", "metadata": {}, "outputs": [], "source": [ - "cm_values = []\n", - "cm_pvalues = []\n", - "\n", - "for i in range(data.shape[0] - 1):\n", - " for j in range(i+1, data.shape[0]):\n", - " v, p = ccc_single(data[i], data[j])\n", - " cm_values.append(v)\n", - " cm_pvalues.append(p)" + "results = Parallel(n_jobs=N_JOBS)(\n", + " delayed(ccc_single)(data[i], data[j])\n", + " for i in range(data.shape[0] - 1)\n", + " for j in range(i + 1, data.shape[0])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e68a65a5-8bba-4a79-a740-26d722dc670e", + "metadata": {}, + "outputs": [], + "source": [ + "assert len(results) == (DATA_N_OBJS * (DATA_N_OBJS - 1)) / 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "907fa03e-616a-4463-83e7-4175d714167f", + "metadata": {}, + "outputs": [], + "source": [ + "results[0]" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, + "id": "5ed53d3b-e78d-4f4b-a262-c6abe7f8840d", + "metadata": {}, + "outputs": [], + "source": [ + "cm_values = [x[0] for x in results]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2972d472-725d-4fe0-83d8-57e2f68f8ecb", + "metadata": {}, + "outputs": [], + "source": [ + "cm_pvalues = [x[1] for x in results]" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "003f5e04-5e2e-477f-b66a-ea28ac1a8abc", "metadata": {}, "outputs": [], @@ -324,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "5525b4ef-2e2e-4338-b52a-37d8308e237d", "metadata": { "papermill": { @@ -344,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "e08382ef-423a-4114-9a8f-f1b5abc48055", "metadata": { "papermill": { @@ -356,54 +386,24 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "(45,)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cm_values.shape" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "93c4f9d7-6727-4db1-8bcc-1b618ecf41fe", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.00254684, 0.00104179, 0.00320558, 0.00018284, 0.00186997,\n", - " 0.00147106, 0.00177705, 0.00194291, 0.00049431, 0.00425941,\n", - " 0.00148615, 0.00019465, 0.00363023, 0.0056535 , 0.00274262,\n", - " 0.00522602, 0.0022903 , 0.00320755, 0.00099358, 0.00532412,\n", - " 0.00253045, 0.00149274, 0.00629346, 0.00221865, 0.00627013,\n", - " 0.00389841, 0.00138057, 0.00221203, 0.00417506, 0.00241475,\n", - " 0.00504645, 0.00137032, 0.00529612, 0.00326284, 0.00375165,\n", - " 0.00377352, 0.00323483, 0.00277389, 0.00797598, 0.0026016 ,\n", - " 0.00238008, 0.00171082, 0.00084283, 0.0051361 , 0.00122446])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cm_values" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "46e8560e-4c1b-4e2b-b373-f42ca0a59819", "metadata": { "papermill": { @@ -415,25 +415,14 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "(45,)" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cm_pvalues.shape" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "31ce94b0-ce31-4de6-9848-c1644268bd2b", "metadata": { "papermill": { @@ -445,26 +434,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.45454545, 0.81818182, 0.36363636, 1. , 0.81818182,\n", - " 0.72727273, 0.90909091, 0.81818182, 1. , 0.18181818,\n", - " 0.90909091, 1. , 0.09090909, 0.09090909, 0.45454545,\n", - " 0.27272727, 0.54545455, 0.36363636, 0.90909091, 0.09090909,\n", - " 0.63636364, 0.90909091, 0.09090909, 0.72727273, 0.09090909,\n", - " 0.27272727, 0.90909091, 0.81818182, 0.27272727, 0.72727273,\n", - " 0.27272727, 1. , 0.09090909, 0.54545455, 0.36363636,\n", - " 0.36363636, 0.45454545, 0.36363636, 0.09090909, 0.45454545,\n", - " 0.54545455, 0.90909091, 0.90909091, 0.09090909, 0.81818182])" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cm_pvalues" ] @@ -488,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "b11f71f7-bab8-4d83-bf49-fd9419648a3d", "metadata": { "papermill": { @@ -500,17 +470,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/opt/data/results/ccc_null-pvalues/gene_pairs-cm_values.npy')" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "output_file = OUTPUT_DIR / \"gene_pairs-cm_values.npy\"\n", "display(output_file)\n", @@ -520,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "12968ead-2e56-4214-956c-08f4f02952e9", "metadata": { "papermill": { diff --git a/nbs/25_pvalue/py/01-ccc_pvalue_dist-generate-gene_pairs.py b/nbs/25_pvalue/py/01-ccc_pvalue_dist-generate-gene_pairs.py index d3ccfe1a..45ab6792 100644 --- a/nbs/25_pvalue/py/01-ccc_pvalue_dist-generate-gene_pairs.py +++ b/nbs/25_pvalue/py/01-ccc_pvalue_dist-generate-gene_pairs.py @@ -26,8 +26,7 @@ # %% tags=[] import numpy as np -from scipy.spatial.distance import squareform -from sklearn.metrics import pairwise_distances +from joblib import Parallel, delayed from ccc.coef import ccc from ccc import conf @@ -38,6 +37,13 @@ # %% tags=[] rs = np.random.RandomState(0) +# %% +N_JOBS = conf.GENERAL["N_JOBS"] // 2 +display(N_JOBS) + +PVALUE_N_JOBS = 2 +display(PVALUE_N_JOBS) + # %% tags=[] DATA_N_OBJS, DATA_N_FEATURES = 100, 1000 PVALUE_N_PERMS = 1000 @@ -67,18 +73,29 @@ # %% def ccc_single(x, y): - return ccc(x, y, n_jobs=1, pvalue_n_perms=PVALUE_N_PERMS, pvalue_n_jobs=conf.GENERAL["N_JOBS"]) + return ccc( + x, y, n_jobs=1, pvalue_n_perms=PVALUE_N_PERMS, pvalue_n_jobs=PVALUE_N_JOBS + ) + + +# %% +results = Parallel(n_jobs=N_JOBS)( + delayed(ccc_single)(data[i], data[j]) + for i in range(data.shape[0] - 1) + for j in range(i + 1, data.shape[0]) +) +# %% +assert len(results) == (DATA_N_OBJS * (DATA_N_OBJS - 1)) / 2 + +# %% +results[0] + +# %% +cm_values = [x[0] for x in results] # %% -cm_values = [] -cm_pvalues = [] - -for i in range(data.shape[0] - 1): - for j in range(i+1, data.shape[0]): - v, p = ccc_single(data[i], data[j]) - cm_values.append(v) - cm_pvalues.append(p) +cm_pvalues = [x[1] for x in results] # %% assert len(cm_values) == len(cm_pvalues)