From c2c5c533bcc48f35ae9e86fbb4cc5bbe0d6bf102 Mon Sep 17 00:00:00 2001 From: Milton Pividori Date: Thu, 7 Sep 2023 23:11:25 -0600 Subject: [PATCH] ccc profiling: add notebook to test many samples with multiple CPUs --- .../17_00-2_CPUs-default-many_samples.ipynb | 660 ++++++++++++++++ ...01-2_CPUs-disable_numba-many_samples.ipynb | 739 ++++++++++++++++++ ...ari_numba-many_samples-use_ari_numba.ipynb | 655 ++++++++++++++++ .../py/17_00-2_CPUs-default-many_samples.py | 103 +++ ...17_01-2_CPUs-disable_numba-many_samples.py | 115 +++ ...se_ari_numba-many_samples-use_ari_numba.py | 103 +++ 6 files changed, 2375 insertions(+) create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.ipynb create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.ipynb create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.ipynb create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/py/17_00-2_CPUs-default-many_samples.py create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/py/17_01-2_CPUs-disable_numba-many_samples.py create mode 100644 nbs/others/05_ccc_profiling/12_cm_optimized/py/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.py diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.ipynb b/nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.ipynb new file mode 100644 index 00000000..0b318692 --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.ipynb @@ -0,0 +1,660 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "88f4752f-914d-427a-9865-ff645ae4a61e", + "metadata": { + "papermill": { + "duration": 0.001561, + "end_time": "2023-09-07T21:50:24.251694", + "exception": false, + "start_time": "2023-09-07T21:50:24.250133", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "id": "97b23c3d-292e-47ec-b76d-4b70f5653f4e", + "metadata": { + "papermill": { + "duration": 0.004786, + "end_time": "2023-09-07T21:50:24.266016", + "exception": false, + "start_time": "2023-09-07T21:50:24.261230", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Like `07_00` but using more CPU cores." + ] + }, + { + "cell_type": "markdown", + "id": "274fca68-c617-481c-9615-dc6920ea24dc", + "metadata": { + "papermill": { + "duration": 0.002767, + "end_time": "2023-09-07T21:50:24.271162", + "exception": false, + "start_time": "2023-09-07T21:50:24.268395", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Use multiple CPU core" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "48959a89-b227-4487-9b0e-d1499064d13b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:24.277398Z", + "iopub.status.busy": "2023-09-07T21:50:24.277273Z", + "iopub.status.idle": "2023-09-07T21:50:24.282384Z", + "shell.execute_reply": "2023-09-07T21:50:24.282138Z" + }, + "papermill": { + "duration": 0.009092, + "end_time": "2023-09-07T21:50:24.282970", + "exception": false, + "start_time": "2023-09-07T21:50:24.273878", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: CM_N_JOBS=2\n", + "env: NUMBA_NUM_THREADS=2\n", + "env: MKL_NUM_THREADS=2\n", + "env: OPEN_BLAS_NUM_THREADS=2\n", + "env: NUMEXPR_NUM_THREADS=2\n", + "env: OMP_NUM_THREADS=2\n" + ] + } + ], + "source": [ + "%env CM_N_JOBS=2\n", + "%env NUMBA_NUM_THREADS=2\n", + "%env MKL_NUM_THREADS=2\n", + "%env OPEN_BLAS_NUM_THREADS=2\n", + "%env NUMEXPR_NUM_THREADS=2\n", + "%env OMP_NUM_THREADS=2" + ] + }, + { + "cell_type": "markdown", + "id": "325cb06a-df7f-43e8-be3e-f704aaf015b0", + "metadata": { + "papermill": { + "duration": 0.001104, + "end_time": "2023-09-07T21:50:24.285235", + "exception": false, + "start_time": "2023-09-07T21:50:24.284131", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Remove pycache dir" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "73f954a6-1776-4b92-bd0e-fc3caf5df081", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:24.288343Z", + "iopub.status.busy": "2023-09-07T21:50:24.288276Z", + "iopub.status.idle": "2023-09-07T21:50:24.471706Z", + "shell.execute_reply": "2023-09-07T21:50:24.471438Z" + }, + "papermill": { + "duration": 0.185755, + "end_time": "2023-09-07T21:50:24.472387", + "exception": false, + "start_time": "2023-09-07T21:50:24.286632", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code\n" + ] + } + ], + "source": [ + "!echo ${CODE_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d17492bb-34fe-4c34-a693-419180ba068e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:24.483752Z", + "iopub.status.busy": "2023-09-07T21:50:24.483680Z", + "iopub.status.idle": "2023-09-07T21:50:24.673641Z", + "shell.execute_reply": "2023-09-07T21:50:24.673440Z" + }, + "papermill": { + "duration": 0.200674, + "end_time": "2023-09-07T21:50:24.674313", + "exception": false, + "start_time": "2023-09-07T21:50:24.473639", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code/libs/ccc/coef/__pycache__\n", + "/opt/code/libs/ccc/pytorch/__pycache__\n", + "/opt/code/libs/ccc/scipy/__pycache__\n", + "/opt/code/libs/ccc/utils/__pycache__\n", + "/opt/code/libs/ccc/__pycache__\n", + "/opt/code/libs/ccc/sklearn/__pycache__\n" + ] + } + ], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5683e330-1782-43b3-bb78-255198f03620", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:24.685783Z", + "iopub.status.busy": "2023-09-07T21:50:24.685672Z", + "iopub.status.idle": "2023-09-07T21:50:24.876601Z", + "shell.execute_reply": "2023-09-07T21:50:24.876228Z" + }, + "papermill": { + "duration": 0.201811, + "end_time": "2023-09-07T21:50:24.877421", + "exception": false, + "start_time": "2023-09-07T21:50:24.675610", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -prune -exec rm -rf {} \\;" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5cf4ce29-d611-4fc8-8880-293c09e5ab9a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:24.889188Z", + "iopub.status.busy": "2023-09-07T21:50:24.889097Z", + "iopub.status.idle": "2023-09-07T21:50:25.077437Z", + "shell.execute_reply": "2023-09-07T21:50:25.077119Z" + }, + "papermill": { + "duration": 0.199437, + "end_time": "2023-09-07T21:50:25.078312", + "exception": false, + "start_time": "2023-09-07T21:50:24.878875", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "markdown", + "id": "35a04385-a901-4726-82a6-a01f16281efe", + "metadata": { + "papermill": { + "duration": 0.009595, + "end_time": "2023-09-07T21:50:25.089323", + "exception": false, + "start_time": "2023-09-07T21:50:25.079728", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a75c4496-d379-4668-905d-0e9136981f0c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:25.097159Z", + "iopub.status.busy": "2023-09-07T21:50:25.096955Z", + "iopub.status.idle": "2023-09-07T21:50:28.783808Z", + "shell.execute_reply": "2023-09-07T21:50:28.783556Z" + }, + "papermill": { + "duration": 3.690496, + "end_time": "2023-09-07T21:50:28.784522", + "exception": false, + "start_time": "2023-09-07T21:50:25.094026", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from ccc.coef import ccc" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1a58ccf8-1bf5-4177-9b06-944a0d57655a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.796329Z", + "iopub.status.busy": "2023-09-07T21:50:28.796236Z", + "iopub.status.idle": "2023-09-07T21:50:28.800266Z", + "shell.execute_reply": "2023-09-07T21:50:28.800135Z" + }, + "papermill": { + "duration": 0.014803, + "end_time": "2023-09-07T21:50:28.800794", + "exception": false, + "start_time": "2023-09-07T21:50:28.785991", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.20454545454545456" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let numba compile all the code before profiling\n", + "ccc(np.random.rand(10), np.random.rand(10))" + ] + }, + { + "cell_type": "markdown", + "id": "2c92a1ad-2fc9-4a16-a5f8-fce685246996", + "metadata": { + "papermill": { + "duration": 0.001156, + "end_time": "2023-09-07T21:50:28.803229", + "exception": false, + "start_time": "2023-09-07T21:50:28.802073", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2316ffcd-a6e4-453f-bb52-779685c5c5bf", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.808468Z", + "iopub.status.busy": "2023-09-07T21:50:28.808398Z", + "iopub.status.idle": "2023-09-07T21:50:28.809674Z", + "shell.execute_reply": "2023-09-07T21:50:28.809542Z" + }, + "papermill": { + "duration": 0.004923, + "end_time": "2023-09-07T21:50:28.810233", + "exception": false, + "start_time": "2023-09-07T21:50:28.805310", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "n_genes, n_samples = 10, 30000" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b2f92fb1-113d-479b-8bbf-2be229e26e8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.813938Z", + "iopub.status.busy": "2023-09-07T21:50:28.813857Z", + "iopub.status.idle": "2023-09-07T21:50:28.815118Z", + "shell.execute_reply": "2023-09-07T21:50:28.814988Z" + }, + "papermill": { + "duration": 0.004026, + "end_time": "2023-09-07T21:50:28.815525", + "exception": false, + "start_time": "2023-09-07T21:50:28.811499", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "np.random.seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "63638c0b-b436-48a9-93e0-db2adb939a61", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.821382Z", + "iopub.status.busy": "2023-09-07T21:50:28.821248Z", + "iopub.status.idle": "2023-09-07T21:50:28.823688Z", + "shell.execute_reply": "2023-09-07T21:50:28.823563Z" + }, + "papermill": { + "duration": 0.004618, + "end_time": "2023-09-07T21:50:28.824154", + "exception": false, + "start_time": "2023-09-07T21:50:28.819536", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "data = np.random.rand(n_genes, n_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "808017ed-9a8a-4bf7-a3dd-42317a39ce8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.827384Z", + "iopub.status.busy": "2023-09-07T21:50:28.827128Z", + "iopub.status.idle": "2023-09-07T21:50:28.829441Z", + "shell.execute_reply": "2023-09-07T21:50:28.829254Z" + }, + "papermill": { + "duration": 0.004407, + "end_time": "2023-09-07T21:50:28.829888", + "exception": false, + "start_time": "2023-09-07T21:50:28.825481", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 30000)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "markdown", + "id": "716e4219-cad5-453b-8331-47d310689e03", + "metadata": { + "papermill": { + "duration": 0.001246, + "end_time": "2023-09-07T21:50:28.832473", + "exception": false, + "start_time": "2023-09-07T21:50:28.831227", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With default `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "67807856-f337-4c6e-ae31-cd306577a314", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.835528Z", + "iopub.status.busy": "2023-09-07T21:50:28.835388Z", + "iopub.status.idle": "2023-09-07T21:50:28.837187Z", + "shell.execute_reply": "2023-09-07T21:50:28.837009Z" + }, + "papermill": { + "duration": 0.003781, + "end_time": "2023-09-07T21:50:28.837599", + "exception": false, + "start_time": "2023-09-07T21:50:28.833818", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 10 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2965a695-5c0c-4e9e-8435-dcbfa610eb81", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:50:28.840637Z", + "iopub.status.busy": "2023-09-07T21:50:28.840500Z", + "iopub.status.idle": "2023-09-07T21:51:18.270578Z", + "shell.execute_reply": "2023-09-07T21:51:18.270078Z" + }, + "papermill": { + "duration": 49.433011, + "end_time": "2023-09-07T21:51:18.271967", + "exception": false, + "start_time": "2023-09-07T21:50:28.838956", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.07 s ± 1.62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "markdown", + "id": "025974ff-06d4-4d82-9b05-03f7a84d7211", + "metadata": { + "papermill": { + "duration": 0.00249, + "end_time": "2023-09-07T21:51:21.366393", + "exception": false, + "start_time": "2023-09-07T21:51:21.363903", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With reduced `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c20c4ecf-8060-495e-92b0-eb8e0d0dfbf3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:51:21.371305Z", + "iopub.status.busy": "2023-09-07T21:51:21.371197Z", + "iopub.status.idle": "2023-09-07T21:51:21.373116Z", + "shell.execute_reply": "2023-09-07T21:51:21.372833Z" + }, + "papermill": { + "duration": 0.004744, + "end_time": "2023-09-07T21:51:21.373590", + "exception": false, + "start_time": "2023-09-07T21:51:21.368846", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 5 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "11259d8c-3bf3-4299-b47b-211556c3bc08", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-07T21:51:21.376796Z", + "iopub.status.busy": "2023-09-07T21:51:21.376723Z", + "iopub.status.idle": "2023-09-07T21:51:28.388036Z", + "shell.execute_reply": "2023-09-07T21:51:28.387633Z" + }, + "papermill": { + "duration": 7.013668, + "end_time": "2023-09-07T21:51:28.388694", + "exception": false, + "start_time": "2023-09-07T21:51:21.375026", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "438 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2556204-1c10-4e01-8c6c-ea63ddb37530", + "metadata": { + "papermill": { + "duration": 0.005575, + "end_time": "2023-09-07T21:51:28.851104", + "exception": false, + "start_time": "2023-09-07T21:51:28.845529", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-execution,-papermill,-trusted", + "notebook_metadata_filter": "-jupytext.text_representation.jupytext_version", + "text_representation": { + "extension": ".py", + "format_name": "percent", + "format_version": "1.3" + } + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 65.47885, + "end_time": "2023-09-07T21:51:29.069018", + "environment_variables": {}, + "exception": null, + "input_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.ipynb", + "output_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_00-2_CPUs-default-many_samples.run.ipynb", + "parameters": {}, + "start_time": "2023-09-07T21:50:23.590168", + "version": "2.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.ipynb b/nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.ipynb new file mode 100644 index 00000000..a832caae --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.ipynb @@ -0,0 +1,739 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4ac1bef9-a7b7-468d-9b3f-24a591c70af5", + "metadata": { + "papermill": { + "duration": 0.005746, + "end_time": "2023-09-08T05:00:53.948601", + "exception": false, + "start_time": "2023-09-08T05:00:53.942855", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "id": "d5dbb153-96e6-45d9-b6b9-e5d15ee1ec7f", + "metadata": { + "papermill": { + "duration": 0.003163, + "end_time": "2023-09-08T05:00:53.954950", + "exception": false, + "start_time": "2023-09-08T05:00:53.951787", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Like `07_01` but using more CPU cores." + ] + }, + { + "cell_type": "markdown", + "id": "a2ba0f9f-aa06-40d0-becc-b9f347f44336", + "metadata": { + "papermill": { + "duration": 0.002838, + "end_time": "2023-09-08T05:00:53.960306", + "exception": false, + "start_time": "2023-09-08T05:00:53.957468", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Use multiple CPU core" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "70532b5b-b477-41f9-b29e-899ed234b1ce", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:53.966295Z", + "iopub.status.busy": "2023-09-08T05:00:53.966162Z", + "iopub.status.idle": "2023-09-08T05:00:53.971655Z", + "shell.execute_reply": "2023-09-08T05:00:53.971399Z" + }, + "papermill": { + "duration": 0.009689, + "end_time": "2023-09-08T05:00:53.972568", + "exception": false, + "start_time": "2023-09-08T05:00:53.962879", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: CM_N_JOBS=2\n", + "env: NUMBA_NUM_THREADS=2\n", + "env: MKL_NUM_THREADS=2\n", + "env: OPEN_BLAS_NUM_THREADS=2\n", + "env: NUMEXPR_NUM_THREADS=2\n", + "env: OMP_NUM_THREADS=2\n" + ] + } + ], + "source": [ + "%env CM_N_JOBS=2\n", + "%env NUMBA_NUM_THREADS=2\n", + "%env MKL_NUM_THREADS=2\n", + "%env OPEN_BLAS_NUM_THREADS=2\n", + "%env NUMEXPR_NUM_THREADS=2\n", + "%env OMP_NUM_THREADS=2" + ] + }, + { + "cell_type": "markdown", + "id": "874ffd00-afe1-4b51-bb22-350cbd9479f1", + "metadata": { + "papermill": { + "duration": 0.001613, + "end_time": "2023-09-08T05:00:53.976349", + "exception": false, + "start_time": "2023-09-08T05:00:53.974736", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Disable numba" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "502fe9ff-d27d-43bd-aa37-73edf7ba4f24", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:53.980252Z", + "iopub.status.busy": "2023-09-08T05:00:53.980203Z", + "iopub.status.idle": "2023-09-08T05:00:53.981834Z", + "shell.execute_reply": "2023-09-08T05:00:53.981690Z" + }, + "papermill": { + "duration": 0.004234, + "end_time": "2023-09-08T05:00:53.982334", + "exception": false, + "start_time": "2023-09-08T05:00:53.978100", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: NUMBA_DISABLE_JIT=1\n" + ] + } + ], + "source": [ + "%env NUMBA_DISABLE_JIT=1" + ] + }, + { + "cell_type": "markdown", + "id": "325cb06a-df7f-43e8-be3e-f704aaf015b0", + "metadata": { + "papermill": { + "duration": 0.001163, + "end_time": "2023-09-08T05:00:53.984723", + "exception": false, + "start_time": "2023-09-08T05:00:53.983560", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Remove pycache dir" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "73f954a6-1776-4b92-bd0e-fc3caf5df081", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:53.987333Z", + "iopub.status.busy": "2023-09-08T05:00:53.987241Z", + "iopub.status.idle": "2023-09-08T05:00:54.175946Z", + "shell.execute_reply": "2023-09-08T05:00:54.175076Z" + }, + "papermill": { + "duration": 0.192744, + "end_time": "2023-09-08T05:00:54.178662", + "exception": false, + "start_time": "2023-09-08T05:00:53.985918", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code\r\n" + ] + } + ], + "source": [ + "!echo ${CODE_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d17492bb-34fe-4c34-a693-419180ba068e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:54.196166Z", + "iopub.status.busy": "2023-09-08T05:00:54.195873Z", + "iopub.status.idle": "2023-09-08T05:00:54.404869Z", + "shell.execute_reply": "2023-09-08T05:00:54.403939Z" + }, + "papermill": { + "duration": 0.223311, + "end_time": "2023-09-08T05:00:54.407528", + "exception": false, + "start_time": "2023-09-08T05:00:54.184217", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code/libs/ccc/coef/__pycache__\r\n", + "/opt/code/libs/ccc/pytorch/__pycache__\r\n", + "/opt/code/libs/ccc/scipy/__pycache__\r\n", + "/opt/code/libs/ccc/utils/__pycache__\r\n", + "/opt/code/libs/ccc/__pycache__\r\n", + "/opt/code/libs/ccc/sklearn/__pycache__\r\n" + ] + } + ], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5683e330-1782-43b3-bb78-255198f03620", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:54.424215Z", + "iopub.status.busy": "2023-09-08T05:00:54.423812Z", + "iopub.status.idle": "2023-09-08T05:00:54.632666Z", + "shell.execute_reply": "2023-09-08T05:00:54.631336Z" + }, + "papermill": { + "duration": 0.221592, + "end_time": "2023-09-08T05:00:54.635797", + "exception": false, + "start_time": "2023-09-08T05:00:54.414205", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -prune -exec rm -rf {} \\;" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5cf4ce29-d611-4fc8-8880-293c09e5ab9a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:54.654315Z", + "iopub.status.busy": "2023-09-08T05:00:54.653925Z", + "iopub.status.idle": "2023-09-08T05:00:54.859654Z", + "shell.execute_reply": "2023-09-08T05:00:54.859102Z" + }, + "papermill": { + "duration": 0.218446, + "end_time": "2023-09-08T05:00:54.861009", + "exception": false, + "start_time": "2023-09-08T05:00:54.642563", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "markdown", + "id": "35a04385-a901-4726-82a6-a01f16281efe", + "metadata": { + "papermill": { + "duration": 0.009754, + "end_time": "2023-09-08T05:00:54.874289", + "exception": false, + "start_time": "2023-09-08T05:00:54.864535", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a75c4496-d379-4668-905d-0e9136981f0c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:54.881414Z", + "iopub.status.busy": "2023-09-08T05:00:54.881172Z", + "iopub.status.idle": "2023-09-08T05:00:54.995178Z", + "shell.execute_reply": "2023-09-08T05:00:54.994989Z" + }, + "papermill": { + "duration": 0.11861, + "end_time": "2023-09-08T05:00:54.995954", + "exception": false, + "start_time": "2023-09-08T05:00:54.877344", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from ccc.coef import ccc" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1a58ccf8-1bf5-4177-9b06-944a0d57655a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:54.999234Z", + "iopub.status.busy": "2023-09-08T05:00:54.999177Z", + "iopub.status.idle": "2023-09-08T05:00:55.003427Z", + "shell.execute_reply": "2023-09-08T05:00:55.003294Z" + }, + "papermill": { + "duration": 0.006394, + "end_time": "2023-09-08T05:00:55.003896", + "exception": false, + "start_time": "2023-09-08T05:00:54.997502", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.20454545454545456" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let numba compile all the code before profiling\n", + "ccc(np.random.rand(10), np.random.rand(10))" + ] + }, + { + "cell_type": "markdown", + "id": "2c92a1ad-2fc9-4a16-a5f8-fce685246996", + "metadata": { + "papermill": { + "duration": 0.00129, + "end_time": "2023-09-08T05:00:55.006717", + "exception": false, + "start_time": "2023-09-08T05:00:55.005427", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2316ffcd-a6e4-453f-bb52-779685c5c5bf", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.009652Z", + "iopub.status.busy": "2023-09-08T05:00:55.009583Z", + "iopub.status.idle": "2023-09-08T05:00:55.010906Z", + "shell.execute_reply": "2023-09-08T05:00:55.010771Z" + }, + "papermill": { + "duration": 0.003378, + "end_time": "2023-09-08T05:00:55.011447", + "exception": false, + "start_time": "2023-09-08T05:00:55.008069", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "n_genes, n_samples = 10, 30000" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b2f92fb1-113d-479b-8bbf-2be229e26e8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.014434Z", + "iopub.status.busy": "2023-09-08T05:00:55.014326Z", + "iopub.status.idle": "2023-09-08T05:00:55.015556Z", + "shell.execute_reply": "2023-09-08T05:00:55.015414Z" + }, + "papermill": { + "duration": 0.003101, + "end_time": "2023-09-08T05:00:55.015931", + "exception": false, + "start_time": "2023-09-08T05:00:55.012830", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "np.random.seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "63638c0b-b436-48a9-93e0-db2adb939a61", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.018963Z", + "iopub.status.busy": "2023-09-08T05:00:55.018855Z", + "iopub.status.idle": "2023-09-08T05:00:55.021241Z", + "shell.execute_reply": "2023-09-08T05:00:55.021109Z" + }, + "papermill": { + "duration": 0.004286, + "end_time": "2023-09-08T05:00:55.021621", + "exception": false, + "start_time": "2023-09-08T05:00:55.017335", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "data = np.random.rand(n_genes, n_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "808017ed-9a8a-4bf7-a3dd-42317a39ce8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.024584Z", + "iopub.status.busy": "2023-09-08T05:00:55.024472Z", + "iopub.status.idle": "2023-09-08T05:00:55.026043Z", + "shell.execute_reply": "2023-09-08T05:00:55.025903Z" + }, + "papermill": { + "duration": 0.003521, + "end_time": "2023-09-08T05:00:55.026504", + "exception": false, + "start_time": "2023-09-08T05:00:55.022983", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 30000)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "markdown", + "id": "716e4219-cad5-453b-8331-47d310689e03", + "metadata": { + "papermill": { + "duration": 0.001292, + "end_time": "2023-09-08T05:00:55.029223", + "exception": false, + "start_time": "2023-09-08T05:00:55.027931", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With default `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "67807856-f337-4c6e-ae31-cd306577a314", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.032221Z", + "iopub.status.busy": "2023-09-08T05:00:55.032157Z", + "iopub.status.idle": "2023-09-08T05:00:55.033514Z", + "shell.execute_reply": "2023-09-08T05:00:55.033383Z" + }, + "papermill": { + "duration": 0.003351, + "end_time": "2023-09-08T05:00:55.033936", + "exception": false, + "start_time": "2023-09-08T05:00:55.030585", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 10 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "2965a695-5c0c-4e9e-8435-dcbfa610eb81", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:00:55.037170Z", + "iopub.status.busy": "2023-09-08T05:00:55.037067Z", + "iopub.status.idle": "2023-09-08T05:01:33.383384Z", + "shell.execute_reply": "2023-09-08T05:01:33.383211Z" + }, + "papermill": { + "duration": 38.350271, + "end_time": "2023-09-08T05:01:33.385683", + "exception": false, + "start_time": "2023-09-08T05:00:55.035412", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.39 s ± 17.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "markdown", + "id": "cd74d8b8-517c-42cf-9dbf-27474b2c3822", + "metadata": { + "papermill": { + "duration": 0.001501, + "end_time": "2023-09-08T05:01:33.397834", + "exception": false, + "start_time": "2023-09-08T05:01:33.396333", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "These results are just slightly worse than the numba-compiled version (notebook `07`)." + ] + }, + { + "cell_type": "markdown", + "id": "025974ff-06d4-4d82-9b05-03f7a84d7211", + "metadata": { + "papermill": { + "duration": 0.0014, + "end_time": "2023-09-08T05:01:33.400682", + "exception": false, + "start_time": "2023-09-08T05:01:33.399282", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With reduced `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c20c4ecf-8060-495e-92b0-eb8e0d0dfbf3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:01:33.404036Z", + "iopub.status.busy": "2023-09-08T05:01:33.403948Z", + "iopub.status.idle": "2023-09-08T05:01:33.405983Z", + "shell.execute_reply": "2023-09-08T05:01:33.405660Z" + }, + "papermill": { + "duration": 0.004442, + "end_time": "2023-09-08T05:01:33.406560", + "exception": false, + "start_time": "2023-09-08T05:01:33.402118", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 5 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "11259d8c-3bf3-4299-b47b-211556c3bc08", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:01:33.409870Z", + "iopub.status.busy": "2023-09-08T05:01:33.409812Z", + "iopub.status.idle": "2023-09-08T05:01:38.276121Z", + "shell.execute_reply": "2023-09-08T05:01:38.275871Z" + }, + "papermill": { + "duration": 4.868725, + "end_time": "2023-09-08T05:01:38.276808", + "exception": false, + "start_time": "2023-09-08T05:01:33.408083", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "303 ms ± 3.08 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "markdown", + "id": "ba154ea5-5301-4fd4-8fc7-71534435a2a5", + "metadata": { + "papermill": { + "duration": 0.009806, + "end_time": "2023-09-08T05:01:38.288275", + "exception": false, + "start_time": "2023-09-08T05:01:38.278469", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "These results are slightly better than the numba-compiled version (notebook `07`), which is surprising. In the future, it would be interesting to disable threading here to get accurate profiling results to debug this issue." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2556204-1c10-4e01-8c6c-ea63ddb37530", + "metadata": { + "papermill": { + "duration": 0.001449, + "end_time": "2023-09-08T05:01:38.291365", + "exception": false, + "start_time": "2023-09-08T05:01:38.289916", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-execution,-papermill,-trusted", + "notebook_metadata_filter": "-jupytext.text_representation.jupytext_version" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 45.129102, + "end_time": "2023-09-08T05:01:38.408234", + "environment_variables": {}, + "exception": null, + "input_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.ipynb", + "output_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_01-2_CPUs-disable_numba-many_samples.run.ipynb", + "parameters": {}, + "start_time": "2023-09-08T05:00:53.279132", + "version": "2.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.ipynb b/nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.ipynb new file mode 100644 index 00000000..edb7f9c9 --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.ipynb @@ -0,0 +1,655 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d08e8972-7897-4568-a2cc-a88e60f35329", + "metadata": { + "papermill": { + "duration": 0.005433, + "end_time": "2023-09-08T05:03:10.035747", + "exception": false, + "start_time": "2023-09-08T05:03:10.030314", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "id": "1c0e6677-6256-4ba6-ba2c-ddca92a31e5c", + "metadata": { + "papermill": { + "duration": 0.005768, + "end_time": "2023-09-08T05:03:10.051408", + "exception": false, + "start_time": "2023-09-08T05:03:10.045640", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Like `07_02` but using more CPU cores." + ] + }, + { + "cell_type": "markdown", + "id": "fb686b34-1c32-4b3e-bf15-4d95303369d6", + "metadata": { + "papermill": { + "duration": 0.001439, + "end_time": "2023-09-08T05:03:10.059273", + "exception": false, + "start_time": "2023-09-08T05:03:10.057834", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Use multiple CPU core" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bda7d08f-862b-4843-97e5-a5c648c05480", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.064850Z", + "iopub.status.busy": "2023-09-08T05:03:10.064568Z", + "iopub.status.idle": "2023-09-08T05:03:10.071664Z", + "shell.execute_reply": "2023-09-08T05:03:10.071151Z" + }, + "papermill": { + "duration": 0.011119, + "end_time": "2023-09-08T05:03:10.072288", + "exception": false, + "start_time": "2023-09-08T05:03:10.061169", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: CM_N_JOBS=2\n", + "env: NUMBA_NUM_THREADS=2\n", + "env: MKL_NUM_THREADS=2\n", + "env: OPEN_BLAS_NUM_THREADS=2\n", + "env: NUMEXPR_NUM_THREADS=2\n", + "env: OMP_NUM_THREADS=2\n" + ] + } + ], + "source": [ + "%env CM_N_JOBS=2\n", + "%env NUMBA_NUM_THREADS=2\n", + "%env MKL_NUM_THREADS=2\n", + "%env OPEN_BLAS_NUM_THREADS=2\n", + "%env NUMEXPR_NUM_THREADS=2\n", + "%env OMP_NUM_THREADS=2" + ] + }, + { + "cell_type": "markdown", + "id": "325cb06a-df7f-43e8-be3e-f704aaf015b0", + "metadata": { + "papermill": { + "duration": 0.001666, + "end_time": "2023-09-08T05:03:10.075218", + "exception": false, + "start_time": "2023-09-08T05:03:10.073552", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Remove pycache dir" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "73f954a6-1776-4b92-bd0e-fc3caf5df081", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.080278Z", + "iopub.status.busy": "2023-09-08T05:03:10.080067Z", + "iopub.status.idle": "2023-09-08T05:03:10.272483Z", + "shell.execute_reply": "2023-09-08T05:03:10.271593Z" + }, + "papermill": { + "duration": 0.197838, + "end_time": "2023-09-08T05:03:10.275353", + "exception": false, + "start_time": "2023-09-08T05:03:10.077515", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code\r\n" + ] + } + ], + "source": [ + "!echo ${CODE_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d17492bb-34fe-4c34-a693-419180ba068e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.293997Z", + "iopub.status.busy": "2023-09-08T05:03:10.293500Z", + "iopub.status.idle": "2023-09-08T05:03:10.497334Z", + "shell.execute_reply": "2023-09-08T05:03:10.496046Z" + }, + "papermill": { + "duration": 0.218401, + "end_time": "2023-09-08T05:03:10.500077", + "exception": false, + "start_time": "2023-09-08T05:03:10.281676", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/code/libs/ccc/coef/__pycache__\r\n", + "/opt/code/libs/ccc/pytorch/__pycache__\r\n", + "/opt/code/libs/ccc/scipy/__pycache__\r\n", + "/opt/code/libs/ccc/utils/__pycache__\r\n", + "/opt/code/libs/ccc/__pycache__\r\n", + "/opt/code/libs/ccc/sklearn/__pycache__\r\n" + ] + } + ], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5683e330-1782-43b3-bb78-255198f03620", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.519179Z", + "iopub.status.busy": "2023-09-08T05:03:10.518731Z", + "iopub.status.idle": "2023-09-08T05:03:10.726062Z", + "shell.execute_reply": "2023-09-08T05:03:10.724984Z" + }, + "papermill": { + "duration": 0.222731, + "end_time": "2023-09-08T05:03:10.729457", + "exception": false, + "start_time": "2023-09-08T05:03:10.506726", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -prune -exec rm -rf {} \\;" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5cf4ce29-d611-4fc8-8880-293c09e5ab9a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.746661Z", + "iopub.status.busy": "2023-09-08T05:03:10.746461Z", + "iopub.status.idle": "2023-09-08T05:03:10.947894Z", + "shell.execute_reply": "2023-09-08T05:03:10.946701Z" + }, + "papermill": { + "duration": 0.215285, + "end_time": "2023-09-08T05:03:10.950772", + "exception": false, + "start_time": "2023-09-08T05:03:10.735487", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!find ${CODE_DIR} -regex '^.*\\(__pycache__\\)$' -print" + ] + }, + { + "cell_type": "markdown", + "id": "35a04385-a901-4726-82a6-a01f16281efe", + "metadata": { + "papermill": { + "duration": 0.009972, + "end_time": "2023-09-08T05:03:10.967145", + "exception": false, + "start_time": "2023-09-08T05:03:10.957173", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a75c4496-d379-4668-905d-0e9136981f0c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:10.979172Z", + "iopub.status.busy": "2023-09-08T05:03:10.979005Z", + "iopub.status.idle": "2023-09-08T05:03:14.684587Z", + "shell.execute_reply": "2023-09-08T05:03:14.684201Z" + }, + "papermill": { + "duration": 3.711968, + "end_time": "2023-09-08T05:03:14.685357", + "exception": false, + "start_time": "2023-09-08T05:03:10.973389", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from ccc.coef import ccc" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1a58ccf8-1bf5-4177-9b06-944a0d57655a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.688456Z", + "iopub.status.busy": "2023-09-08T05:03:14.688396Z", + "iopub.status.idle": "2023-09-08T05:03:14.692601Z", + "shell.execute_reply": "2023-09-08T05:03:14.692408Z" + }, + "papermill": { + "duration": 0.006224, + "end_time": "2023-09-08T05:03:14.693029", + "exception": false, + "start_time": "2023-09-08T05:03:14.686805", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let numba compile all the code before profiling\n", + "ccc(np.random.rand(10), np.random.rand(10))" + ] + }, + { + "cell_type": "markdown", + "id": "2c92a1ad-2fc9-4a16-a5f8-fce685246996", + "metadata": { + "papermill": { + "duration": 0.00134, + "end_time": "2023-09-08T05:03:14.695691", + "exception": false, + "start_time": "2023-09-08T05:03:14.694351", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2316ffcd-a6e4-453f-bb52-779685c5c5bf", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.698676Z", + "iopub.status.busy": "2023-09-08T05:03:14.698544Z", + "iopub.status.idle": "2023-09-08T05:03:14.699967Z", + "shell.execute_reply": "2023-09-08T05:03:14.699784Z" + }, + "papermill": { + "duration": 0.003378, + "end_time": "2023-09-08T05:03:14.700373", + "exception": false, + "start_time": "2023-09-08T05:03:14.696995", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "n_genes, n_samples = 10, 30000" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b2f92fb1-113d-479b-8bbf-2be229e26e8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.703201Z", + "iopub.status.busy": "2023-09-08T05:03:14.703150Z", + "iopub.status.idle": "2023-09-08T05:03:14.704507Z", + "shell.execute_reply": "2023-09-08T05:03:14.704332Z" + }, + "papermill": { + "duration": 0.003208, + "end_time": "2023-09-08T05:03:14.704908", + "exception": false, + "start_time": "2023-09-08T05:03:14.701700", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "np.random.seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "63638c0b-b436-48a9-93e0-db2adb939a61", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.707778Z", + "iopub.status.busy": "2023-09-08T05:03:14.707680Z", + "iopub.status.idle": "2023-09-08T05:03:14.710246Z", + "shell.execute_reply": "2023-09-08T05:03:14.710059Z" + }, + "papermill": { + "duration": 0.004443, + "end_time": "2023-09-08T05:03:14.710664", + "exception": false, + "start_time": "2023-09-08T05:03:14.706221", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "data = np.random.rand(n_genes, n_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "808017ed-9a8a-4bf7-a3dd-42317a39ce8f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.713555Z", + "iopub.status.busy": "2023-09-08T05:03:14.713440Z", + "iopub.status.idle": "2023-09-08T05:03:14.715163Z", + "shell.execute_reply": "2023-09-08T05:03:14.714996Z" + }, + "papermill": { + "duration": 0.003608, + "end_time": "2023-09-08T05:03:14.715576", + "exception": false, + "start_time": "2023-09-08T05:03:14.711968", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 30000)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "markdown", + "id": "716e4219-cad5-453b-8331-47d310689e03", + "metadata": { + "papermill": { + "duration": 0.001264, + "end_time": "2023-09-08T05:03:14.718203", + "exception": false, + "start_time": "2023-09-08T05:03:14.716939", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With default `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "67807856-f337-4c6e-ae31-cd306577a314", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.721070Z", + "iopub.status.busy": "2023-09-08T05:03:14.720978Z", + "iopub.status.idle": "2023-09-08T05:03:14.722492Z", + "shell.execute_reply": "2023-09-08T05:03:14.722312Z" + }, + "papermill": { + "duration": 0.003352, + "end_time": "2023-09-08T05:03:14.722854", + "exception": false, + "start_time": "2023-09-08T05:03:14.719502", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 10 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2965a695-5c0c-4e9e-8435-dcbfa610eb81", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:03:14.725798Z", + "iopub.status.busy": "2023-09-08T05:03:14.725724Z", + "iopub.status.idle": "2023-09-08T05:04:04.880078Z", + "shell.execute_reply": "2023-09-08T05:04:04.879625Z" + }, + "papermill": { + "duration": 50.157331, + "end_time": "2023-09-08T05:04:04.881548", + "exception": false, + "start_time": "2023-09-08T05:03:14.724217", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.07 s ± 1.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "markdown", + "id": "025974ff-06d4-4d82-9b05-03f7a84d7211", + "metadata": { + "papermill": { + "duration": 0.002933, + "end_time": "2023-09-08T05:04:04.893577", + "exception": false, + "start_time": "2023-09-08T05:04:04.890644", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# With reduced `internal_n_clusters`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c20c4ecf-8060-495e-92b0-eb8e0d0dfbf3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:04:04.897237Z", + "iopub.status.busy": "2023-09-08T05:04:04.897120Z", + "iopub.status.idle": "2023-09-08T05:04:04.899551Z", + "shell.execute_reply": "2023-09-08T05:04:04.899153Z" + }, + "papermill": { + "duration": 0.005369, + "end_time": "2023-09-08T05:04:04.900359", + "exception": false, + "start_time": "2023-09-08T05:04:04.894990", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def func():\n", + " n_clust = list(range(2, 5 + 1))\n", + " return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True, n_jobs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "11259d8c-3bf3-4299-b47b-211556c3bc08", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-08T05:04:04.905493Z", + "iopub.status.busy": "2023-09-08T05:04:04.905347Z", + "iopub.status.idle": "2023-09-08T05:04:11.910573Z", + "shell.execute_reply": "2023-09-08T05:04:11.910156Z" + }, + "papermill": { + "duration": 7.008557, + "end_time": "2023-09-08T05:04:11.911267", + "exception": false, + "start_time": "2023-09-08T05:04:04.902710", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "437 ms ± 631 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit func()\n", + "func()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2556204-1c10-4e01-8c6c-ea63ddb37530", + "metadata": { + "papermill": { + "duration": 0.009712, + "end_time": "2023-09-08T05:04:11.922464", + "exception": false, + "start_time": "2023-09-08T05:04:11.912752", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-execution,-papermill,-trusted", + "notebook_metadata_filter": "-jupytext.text_representation.jupytext_version" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 62.77804, + "end_time": "2023-09-08T05:04:12.142929", + "environment_variables": {}, + "exception": null, + "input_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.ipynb", + "output_path": "nbs/others/05_ccc_profiling/12_cm_optimized/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.run.ipynb", + "parameters": {}, + "start_time": "2023-09-08T05:03:09.364889", + "version": "2.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_00-2_CPUs-default-many_samples.py b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_00-2_CPUs-default-many_samples.py new file mode 100644 index 00000000..d3af0d04 --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_00-2_CPUs-default-many_samples.py @@ -0,0 +1,103 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: all,-execution,-papermill,-trusted +# notebook_metadata_filter: -jupytext.text_representation.jupytext_version +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # Description + +# %% [markdown] tags=[] +# Like `07_00` but using more CPU cores. + +# %% [markdown] tags=[] +# # Use multiple CPU core + +# %% tags=[] +# %env CM_N_JOBS=2 +# %env NUMBA_NUM_THREADS=2 +# %env MKL_NUM_THREADS=2 +# %env OPEN_BLAS_NUM_THREADS=2 +# %env NUMEXPR_NUM_THREADS=2 +# %env OMP_NUM_THREADS=2 + +# %% [markdown] tags=[] +# # Remove pycache dir + +# %% tags=[] +# !echo ${CODE_DIR} + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \; + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% [markdown] tags=[] +# # Modules + +# %% tags=[] +import numpy as np + +from ccc.coef import ccc + +# %% tags=[] +# let numba compile all the code before profiling +ccc(np.random.rand(10), np.random.rand(10)) + +# %% [markdown] tags=[] +# # Data + +# %% tags=[] +n_genes, n_samples = 10, 30000 + +# %% tags=[] +np.random.seed(0) + +# %% tags=[] +data = np.random.rand(n_genes, n_samples) + +# %% tags=[] +data.shape + + +# %% [markdown] tags=[] +# # With default `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 10 + 1)) + return ccc(data, internal_n_clusters=n_clust, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + + +# %% [markdown] tags=[] +# # With reduced `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 5 + 1)) + return ccc(data, internal_n_clusters=n_clust, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + +# %% tags=[] diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_01-2_CPUs-disable_numba-many_samples.py b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_01-2_CPUs-disable_numba-many_samples.py new file mode 100644 index 00000000..7bc0a85e --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_01-2_CPUs-disable_numba-many_samples.py @@ -0,0 +1,115 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: all,-execution,-papermill,-trusted +# notebook_metadata_filter: -jupytext.text_representation.jupytext_version +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # Description + +# %% [markdown] tags=[] +# Like `07_01` but using more CPU cores. + +# %% [markdown] tags=[] +# # Use multiple CPU core + +# %% tags=[] +# %env CM_N_JOBS=2 +# %env NUMBA_NUM_THREADS=2 +# %env MKL_NUM_THREADS=2 +# %env OPEN_BLAS_NUM_THREADS=2 +# %env NUMEXPR_NUM_THREADS=2 +# %env OMP_NUM_THREADS=2 + +# %% [markdown] tags=[] +# # Disable numba + +# %% tags=[] +# %env NUMBA_DISABLE_JIT=1 + +# %% [markdown] tags=[] +# # Remove pycache dir + +# %% tags=[] +# !echo ${CODE_DIR} + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \; + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% [markdown] tags=[] +# # Modules + +# %% tags=[] +import numpy as np + +from ccc.coef import ccc + +# %% tags=[] +# let numba compile all the code before profiling +ccc(np.random.rand(10), np.random.rand(10)) + +# %% [markdown] tags=[] +# # Data + +# %% tags=[] +n_genes, n_samples = 10, 30000 + +# %% tags=[] +np.random.seed(0) + +# %% tags=[] +data = np.random.rand(n_genes, n_samples) + +# %% tags=[] +data.shape + + +# %% [markdown] tags=[] +# # With default `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 10 + 1)) + return ccc(data, internal_n_clusters=n_clust, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + + +# %% [markdown] tags=[] +# These results are just slightly worse than the numba-compiled version (notebook `07`). + +# %% [markdown] tags=[] +# # With reduced `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 5 + 1)) + return ccc(data, internal_n_clusters=n_clust, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + +# %% [markdown] tags=[] +# These results are slightly better than the numba-compiled version (notebook `07`), which is surprising. In the future, it would be interesting to disable threading here to get accurate profiling results to debug this issue. + +# %% tags=[] diff --git a/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.py b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.py new file mode 100644 index 00000000..160c33d5 --- /dev/null +++ b/nbs/others/05_ccc_profiling/12_cm_optimized/py/17_02-2_CPUs-use_ari_numba-many_samples-use_ari_numba.py @@ -0,0 +1,103 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: all,-execution,-papermill,-trusted +# notebook_metadata_filter: -jupytext.text_representation.jupytext_version +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # Description + +# %% [markdown] tags=[] +# Like `07_02` but using more CPU cores. + +# %% [markdown] tags=[] +# # Use multiple CPU core + +# %% tags=[] +# %env CM_N_JOBS=2 +# %env NUMBA_NUM_THREADS=2 +# %env MKL_NUM_THREADS=2 +# %env OPEN_BLAS_NUM_THREADS=2 +# %env NUMEXPR_NUM_THREADS=2 +# %env OMP_NUM_THREADS=2 + +# %% [markdown] tags=[] +# # Remove pycache dir + +# %% tags=[] +# !echo ${CODE_DIR} + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \; + +# %% tags=[] +# !find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print + +# %% [markdown] tags=[] +# # Modules + +# %% tags=[] +import numpy as np + +from ccc.coef import ccc + +# %% tags=[] +# let numba compile all the code before profiling +ccc(np.random.rand(10), np.random.rand(10)) + +# %% [markdown] tags=[] +# # Data + +# %% tags=[] +n_genes, n_samples = 10, 30000 + +# %% tags=[] +np.random.seed(0) + +# %% tags=[] +data = np.random.rand(n_genes, n_samples) + +# %% tags=[] +data.shape + + +# %% [markdown] tags=[] +# # With default `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 10 + 1)) + return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + + +# %% [markdown] tags=[] +# # With reduced `internal_n_clusters` + +# %% tags=[] +def func(): + n_clust = list(range(2, 5 + 1)) + return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True, n_jobs=2) + + +# %% tags=[] +# %%timeit func() +func() + +# %% tags=[]