Skip to content

Commit

Permalink
Merge pull request #1947 from microsoft/miguel/rerun_cpu_deeps
Browse files Browse the repository at this point in the history
Rerun and clean notebooks with Python 3.9
  • Loading branch information
miguelgfierro committed Jun 30, 2023
2 parents 044832a + f59d1c5 commit d284e5e
Show file tree
Hide file tree
Showing 6 changed files with 337 additions and 216 deletions.
51 changes: 22 additions & 29 deletions examples/01_prepare_data/data_split.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.7.15 (default, Nov 24 2022, 21:12:53) \n",
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n",
"Pyspark version: 3.3.1\n"
"Pyspark version: 3.2.4\n"
]
}
],
Expand All @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -106,14 +106,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████| 1.93k/1.93k [00:00<00:00, 1.94kKB/s]\n"
"100%|████████████████████████████████████████████████████████████████████████████| 1.93k/1.93k [00:08<00:00, 217KB/s]\n"
]
}
],
Expand Down Expand Up @@ -749,17 +749,17 @@
" <td>1997-11-03 07:33:03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16314</th>\n",
" <th>202</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <th>16314</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -770,10 +770,10 @@
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand Down Expand Up @@ -806,10 +806,10 @@
"1989 1 90 4 1997-11-03 07:31:40\n",
"11807 1 219 1 1997-11-03 07:32:07\n",
"50026 1 167 2 1997-11-03 07:33:03\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"16314 1 230 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"43280 1 162 4 1997-11-03 07:33:40\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"820 1 265 4 1997-11-03 07:34:01\n",
"11154 1 112 1 1997-11-03 07:34:01\n",
"45732 1 57 5 1997-11-03 07:34:19"
Expand Down Expand Up @@ -893,16 +893,16 @@
" <td>1997-11-03 07:35:52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96699</th>\n",
" <th>4280</th>\n",
" <td>1</td>\n",
" <td>152</td>\n",
" <td>82</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4280</th>\n",
" <th>96699</th>\n",
" <td>1</td>\n",
" <td>82</td>\n",
" <td>152</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
Expand Down Expand Up @@ -944,8 +944,8 @@
"24493 1 30 3 1997-11-03 07:35:15\n",
"6234 1 233 2 1997-11-03 07:35:52\n",
"39865 1 131 1 1997-11-03 07:35:52\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"4280 1 82 5 1997-11-03 07:36:29\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"25721 1 141 3 1997-11-03 07:36:48\n",
"5842 1 72 4 1997-11-03 07:37:58\n",
"333 1 33 4 1997-11-03 07:38:19\n",
Expand Down Expand Up @@ -1133,13 +1133,6 @@
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
Expand Down Expand Up @@ -1197,7 +1190,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
8 changes: 4 additions & 4 deletions examples/01_prepare_data/data_transform.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.7.15 (default, Nov 24 2022, 21:12:53) \n",
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n",
"NumPy version: 1.21.6\n",
"Pandas version: 1.3.5\n"
"NumPy version: 1.24.3\n",
"Pandas version: 1.5.3\n"
]
}
],
Expand Down Expand Up @@ -1719,7 +1719,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
90 changes: 67 additions & 23 deletions examples/01_prepare_data/mind_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.11 | packaged by conda-forge | (default, Nov 27 2020, 18:51:43) \n",
"[GCC Clang 11.0.0]\n"
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n"
]
}
],
"source": [
"import sys\n",
"import os\n",
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"from collections import Counter\n",
"from tqdm import tqdm\n",
"import pickle\n",
"import numpy as np\n",
"import scrapbook as sb\n",
"\n",
"from collections import Counter\n",
"from tempfile import TemporaryDirectory\n",
"from recommenders.datasets.mind import (download_mind,\n",
" extract_mind,\n",
Expand Down Expand Up @@ -84,8 +83,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 17.0k/17.0k [00:42<00:00, 403KB/s]\n",
"100%|██████████| 9.84k/9.84k [00:14<00:00, 694KB/s] \n"
"100%|██████████████████████████████████████████████████████████████████████████| 17.0k/17.0k [00:05<00:00, 2.92kKB/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████| 9.84k/9.84k [00:01<00:00, 6.80kKB/s]\n"
]
}
],
Expand Down Expand Up @@ -260,7 +259,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 26740/26740 [00:11<00:00, 2257.38it/s]\n"
"100%|████████████████████████████████████████████████████████████████████████| 26740/26740 [00:02<00:00, 9093.49it/s]\n"
]
}
],
Expand Down Expand Up @@ -321,7 +320,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 842k/842k [06:30<00:00, 2.15kKB/s] \n"
"100%|████████████████████████████████████████████████████████████████████████████| 842k/842k [02:45<00:00, 5.08kKB/s]\n"
]
}
],
Expand All @@ -338,8 +337,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"400000it [00:08, 47154.93it/s]\n",
"400000it [00:09, 43258.03it/s]\n"
"400000it [00:06, 60728.10it/s]\n",
"400000it [00:07, 50299.10it/s]\n"
]
}
],
Expand Down Expand Up @@ -374,7 +373,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"22034it [00:00, 370992.88it/s]\n"
"22034it [00:00, 89146.42it/s]\n"
]
}
],
Expand All @@ -400,9 +399,26 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 19,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"{'vert_num': 17,\n",
" 'subvert_num': 17,\n",
" 'word_num': 23404,\n",
" 'word_num_all': 41074,\n",
" 'embedding_exist_num': 22408,\n",
" 'embedding_exist_num_all': 37634,\n",
" 'uid2index': 5000}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"utils_state = {\n",
" 'vert_num': len(vert_dict),\n",
Expand All @@ -412,21 +428,49 @@
" 'embedding_exist_num': len(exist_word),\n",
" 'embedding_exist_num_all': len(exist_all_word),\n",
" 'uid2index': len(uid2index)\n",
"}"
"}\n",
"utils_state"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/scrapbook.scrap.json+json": {
"data": {
"embedding_exist_num": 22408,
"embedding_exist_num_all": 37634,
"subvert_num": 17,
"uid2index": 5000,
"vert_num": 17,
"word_num": 23404,
"word_num_all": 41074
},
"encoder": "json",
"name": "utils_state",
"version": 1
}
},
"metadata": {
"scrapbook": {
"data": true,
"display": false,
"name": "utils_state"
}
},
"output_type": "display_data"
}
],
"source": [
"sb.glue(\"utils_state\", utils_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -437,9 +481,9 @@
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python (reco_base)",
"display_name": "Python (recommenders)",
"language": "python",
"name": "reco_base"
"name": "recommenders"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -451,9 +495,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.11"
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
93 changes: 53 additions & 40 deletions examples/01_prepare_data/wikidata_knowledge_graph.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit d284e5e

Please sign in to comment.