Skip to content

Commit

Permalink
Merge branch 'staging' into miguel/review_notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelgfierro authored Jul 4, 2023
2 parents 3350e6f + d284e5e commit 6799739
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 213 deletions.
43 changes: 18 additions & 25 deletions examples/01_prepare_data/data_split.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -106,7 +106,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -749,17 +749,17 @@
" <td>1997-11-03 07:33:03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16314</th>\n",
" <th>202</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <th>16314</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -770,10 +770,10 @@
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand Down Expand Up @@ -806,10 +806,10 @@
"1989 1 90 4 1997-11-03 07:31:40\n",
"11807 1 219 1 1997-11-03 07:32:07\n",
"50026 1 167 2 1997-11-03 07:33:03\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"16314 1 230 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"43280 1 162 4 1997-11-03 07:33:40\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"820 1 265 4 1997-11-03 07:34:01\n",
"11154 1 112 1 1997-11-03 07:34:01\n",
"45732 1 57 5 1997-11-03 07:34:19"
Expand Down Expand Up @@ -893,16 +893,16 @@
" <td>1997-11-03 07:35:52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96699</th>\n",
" <th>4280</th>\n",
" <td>1</td>\n",
" <td>152</td>\n",
" <td>82</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4280</th>\n",
" <th>96699</th>\n",
" <td>1</td>\n",
" <td>82</td>\n",
" <td>152</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
Expand Down Expand Up @@ -944,8 +944,8 @@
"24493 1 30 3 1997-11-03 07:35:15\n",
"6234 1 233 2 1997-11-03 07:35:52\n",
"39865 1 131 1 1997-11-03 07:35:52\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"4280 1 82 5 1997-11-03 07:36:29\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"25721 1 141 3 1997-11-03 07:36:48\n",
"5842 1 72 4 1997-11-03 07:37:58\n",
"333 1 33 4 1997-11-03 07:38:19\n",
Expand Down Expand Up @@ -1133,13 +1133,6 @@
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
Expand Down
8 changes: 4 additions & 4 deletions examples/01_prepare_data/data_transform.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.7.15 (default, Nov 24 2022, 21:12:53) \n",
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n",
"NumPy version: 1.21.6\n",
"Pandas version: 1.3.5\n"
"NumPy version: 1.24.3\n",
"Pandas version: 1.5.3\n"
]
}
],
Expand Down Expand Up @@ -1719,7 +1719,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
90 changes: 67 additions & 23 deletions examples/01_prepare_data/mind_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.11 | packaged by conda-forge | (default, Nov 27 2020, 18:51:43) \n",
"[GCC Clang 11.0.0]\n"
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n"
]
}
],
"source": [
"import sys\n",
"import os\n",
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"from collections import Counter\n",
"from tqdm import tqdm\n",
"import pickle\n",
"import numpy as np\n",
"import scrapbook as sb\n",
"\n",
"from collections import Counter\n",
"from tempfile import TemporaryDirectory\n",
"from recommenders.datasets.mind import (download_mind,\n",
" extract_mind,\n",
Expand Down Expand Up @@ -84,8 +83,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 17.0k/17.0k [00:42<00:00, 403KB/s]\n",
"100%|██████████| 9.84k/9.84k [00:14<00:00, 694KB/s] \n"
"100%|██████████████████████████████████████████████████████████████████████████| 17.0k/17.0k [00:05<00:00, 2.92kKB/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████| 9.84k/9.84k [00:01<00:00, 6.80kKB/s]\n"
]
}
],
Expand Down Expand Up @@ -260,7 +259,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 26740/26740 [00:11<00:00, 2257.38it/s]\n"
"100%|████████████████████████████████████████████████████████████████████████| 26740/26740 [00:02<00:00, 9093.49it/s]\n"
]
}
],
Expand Down Expand Up @@ -321,7 +320,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 842k/842k [06:30<00:00, 2.15kKB/s] \n"
"100%|████████████████████████████████████████████████████████████████████████████| 842k/842k [02:45<00:00, 5.08kKB/s]\n"
]
}
],
Expand All @@ -338,8 +337,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"400000it [00:08, 47154.93it/s]\n",
"400000it [00:09, 43258.03it/s]\n"
"400000it [00:06, 60728.10it/s]\n",
"400000it [00:07, 50299.10it/s]\n"
]
}
],
Expand Down Expand Up @@ -374,7 +373,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"22034it [00:00, 370992.88it/s]\n"
"22034it [00:00, 89146.42it/s]\n"
]
}
],
Expand All @@ -400,9 +399,26 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 19,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"{'vert_num': 17,\n",
" 'subvert_num': 17,\n",
" 'word_num': 23404,\n",
" 'word_num_all': 41074,\n",
" 'embedding_exist_num': 22408,\n",
" 'embedding_exist_num_all': 37634,\n",
" 'uid2index': 5000}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"utils_state = {\n",
" 'vert_num': len(vert_dict),\n",
Expand All @@ -412,21 +428,49 @@
" 'embedding_exist_num': len(exist_word),\n",
" 'embedding_exist_num_all': len(exist_all_word),\n",
" 'uid2index': len(uid2index)\n",
"}"
"}\n",
"utils_state"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/scrapbook.scrap.json+json": {
"data": {
"embedding_exist_num": 22408,
"embedding_exist_num_all": 37634,
"subvert_num": 17,
"uid2index": 5000,
"vert_num": 17,
"word_num": 23404,
"word_num_all": 41074
},
"encoder": "json",
"name": "utils_state",
"version": 1
}
},
"metadata": {
"scrapbook": {
"data": true,
"display": false,
"name": "utils_state"
}
},
"output_type": "display_data"
}
],
"source": [
"sb.glue(\"utils_state\", utils_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -437,9 +481,9 @@
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python (reco_base)",
"display_name": "Python (recommenders)",
"language": "python",
"name": "reco_base"
"name": "recommenders"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -451,9 +495,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.11"
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
93 changes: 53 additions & 40 deletions examples/01_prepare_data/wikidata_knowledge_graph.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 6799739

Please sign in to comment.