diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml
index 3770298d2..85ae9f84a 100644
--- a/.github/actions/azureml-test/action.yml
+++ b/.github/actions/azureml-test/action.yml
@@ -69,56 +69,45 @@ runs:
using: "composite"
steps:
- name: Setup python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install azureml-core and azure-cli on a GitHub hosted server
shell: bash
run: pip install --quiet "azureml-core>1,<2" "azure-cli>2,<3"
- name: Log in to Azure
- uses: azure/login@v1
+ uses: azure/login@v2
with:
creds: ${{inputs.AZUREML_TEST_CREDENTIALS}}
- name: Install wheel package
shell: bash
run: pip install --quiet wheel
- - name: Submit CPU tests to AzureML
+ - name: Submit tests to AzureML
shell: bash
- if: contains(inputs.TEST_GROUP, 'cpu')
run: >-
- python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}}
- --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }}
- --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}}
- --testlogs ${{inputs.TEST_LOGS_PATH}} --testkind ${{inputs.TEST_KIND}}
- --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}}
- --disable-warnings --sha "${GITHUB_SHA}"
- - name: Submit GPU tests to AzureML
- shell: bash
- if: contains(inputs.TEST_GROUP, 'gpu')
- run: >-
- python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.GPU_CLUSTER_NAME}}
- --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }}
- --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}}
- --testlogs ${{inputs.TEST_LOGS_PATH}} --add_gpu_dependencies --testkind ${{inputs.TEST_KIND}}
- --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}}
- --disable-warnings --sha "${GITHUB_SHA}"
- - name: Submit PySpark tests to AzureML
- shell: bash
- if: contains(inputs.TEST_GROUP, 'spark')
- run: >-
- python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}}
- --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }}
- --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}}
- --testlogs ${{inputs.TEST_LOGS_PATH}} --add_spark_dependencies --testkind ${{inputs.TEST_KIND}}
- --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}}
- --disable-warnings --sha "${GITHUB_SHA}"
+ python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py \
+ --subid ${{inputs.AZUREML_TEST_SUBID}} \
+ --reponame "recommenders" \
+ --branch ${{ github.ref }} \
+ --rg ${{inputs.RG}} \
+ --wsname ${{inputs.WS}} \
+ --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} \
+ --testlogs ${{inputs.TEST_LOGS_PATH}} \
+ --testkind ${{inputs.TEST_KIND}} \
+ --conda_pkg_python ${{inputs.PYTHON_VERSION}} \
+ --testgroup ${{inputs.TEST_GROUP}} \
+ --disable-warnings \
+ --sha "${GITHUB_SHA}" \
+ --clustername $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "${{inputs.GPU_CLUSTER_NAME}}"; else echo "${{inputs.CPU_CLUSTER_NAME}}"; fi) \
+ $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "--add_gpu_dependencies"; fi) \
+ $(if [[ ${{inputs.TEST_GROUP}} =~ "spark" ]]; then echo "--add_spark_dependencies"; fi)
- name: Get exit status
shell: bash
id: exit_status
run: echo "code=$(cat ${{inputs.PYTEST_EXIT_CODE}})" >> $GITHUB_OUTPUT
- name: Check Success/Failure
if: ${{ steps.exit_status.outputs.code != 0 }}
- uses: actions/github-script@v3
+ uses: actions/github-script@v7
with:
script: |
core.setFailed('All tests did not pass!')
diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index e2142a231..93e414564 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -47,7 +47,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Get test group names
id: get_test_groups
uses: ./.github/actions/get-test-groups
@@ -67,11 +67,11 @@ jobs:
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
- python-version: ['"python=3.8"', '"python=3.9"']
+ python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Execute tests
uses: ./.github/actions/azureml-test
id: execute_tests
diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index dcf0bc102..3b9f6d6b4 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -47,7 +47,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Get test group names
id: get_test_groups
uses: ./.github/actions/get-test-groups
@@ -67,11 +67,11 @@ jobs:
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
- python-version: ['"python=3.8"', '"python=3.9"']
+ python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Execute tests
uses: ./.github/actions/azureml-test
id: execute_tests
diff --git a/.github/workflows/azureml-release-pipeline.yml b/.github/workflows/azureml-release-pipeline.yml
index 8475a9a2f..d9899658e 100644
--- a/.github/workflows/azureml-release-pipeline.yml
+++ b/.github/workflows/azureml-release-pipeline.yml
@@ -33,9 +33,9 @@ jobs:
needs: [unit-test-workflow, cpu-nightly-workflow, gpu-nightly-workflow, spark-nightly-workflow]
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Setup python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install wheel package
diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index 434929c2e..8f28be6f2 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -46,7 +46,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Get test group names
id: get_test_groups
uses: ./.github/actions/get-test-groups
@@ -66,11 +66,11 @@ jobs:
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
- python-version: ['"python=3.8"', '"python=3.9"']
+ python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Execute tests
uses: ./.github/actions/azureml-test
id: execute_tests
diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index a175b7247..b39268318 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -36,7 +36,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Get test group names
id: get_test_groups
uses: ./.github/actions/get-test-groups
@@ -56,11 +56,11 @@ jobs:
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
- python-version: ['"python=3.8"', '"python=3.9"']
+ python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
steps:
- name: Check out repository code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Execute tests
uses: ./.github/actions/azureml-test
id: execute_tests
diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index 6df1c6c9b..90d03fef6 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -39,12 +39,12 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
- python-version: ["3.8", "3.9"]
+ python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@@ -96,7 +96,7 @@ jobs:
- name: Upload Python wheel as GitHub artifact when merged into main
# Upload the whl file of the specific python version
if: github.ref == 'refs/heads/main'
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-cp${{ matrix.python-version }}-wheel
path: ${{ env.PYTHON_ROOT }}/dist/*.whl
@@ -104,7 +104,7 @@ jobs:
- name: Upload Python source as GitHub artifact when merged into main
# Only one pysarplus source tar file is needed
if: github.ref == 'refs/heads/main' && matrix.python-version == '3.10'
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-source
path: ${{ env.PYTHON_ROOT }}/dist/*.tar.gz
@@ -131,7 +131,7 @@ jobs:
hadoop-version: "3.3.1"
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Test
run: |
@@ -180,7 +180,7 @@ jobs:
- name: Upload Scala bundle as GitHub artifact when merged into main
if: github.ref == 'refs/heads/main'
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: sarplus-${{ env.sarplus_version }}-bundle_2.12-spark-${{ matrix.spark-version }}-jar
path: ${{ env.SCALA_ROOT }}/target/scala-2.12/*bundle*.jar
diff --git a/.github/workflows/update_documentation.yml b/.github/workflows/update_documentation.yml
index 9779e5eed..30e2eadf1 100644
--- a/.github/workflows/update_documentation.yml
+++ b/.github/workflows/update_documentation.yml
@@ -16,12 +16,12 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
- python-version: 3.10
+ python-version: "3.10"
- name: Install dependencies
run: |
diff --git a/README.md b/README.md
index bdc82c96a..89ef90ecf 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,7 @@ The nightly build tests are run daily on AzureML.
## References
-- D. Li, J. Lian, L. Zhang, K. Ren, D. Lu, T. Wu, X. Xie, "Recommender Systems: Frontiers and Practices" (in Chinese), Publishing House of Electronics Industry, Beijing 2022.
+- D. Li, J. Lian, L. Zhang, K. Ren, D. Lu, T. Wu, X. Xie, "Recommender Systems: Frontiers and Practices", Springer, Beijing, 2024. [Available on this link](https://www.amazon.com/Recommender-Systems-Frontiers-Practices-Dongsheng/dp/9819989639/).
- A. Argyriou, M. González-Fierro, and L. Zhang, "Microsoft Recommenders: Best Practices for Production-Ready Recommendation Systems", *WWW 2020: International World Wide Web Conference Taipei*, 2020. Available online: https://dl.acm.org/doi/abs/10.1145/3366424.3382692
-- L. Zhang, T. Wu, X. Xie, A. Argyriou, M. González-Fierro and J. Lian, "Building Production-Ready Recommendation System at Scale", *ACM SIGKDD Conference on Knowledge Discovery and Data Mining 2019 (KDD 2019)*, 2019.
- S. Graham, J.K. Min, T. Wu, "Microsoft recommenders: tools to accelerate developing recommender systems", *RecSys '19: Proceedings of the 13th ACM Conference on Recommender Systems*, 2019. Available online: https://dl.acm.org/doi/10.1145/3298689.3346967
+- L. Zhang, T. Wu, X. Xie, A. Argyriou, M. González-Fierro and J. Lian, "Building Production-Ready Recommendation System at Scale", *ACM SIGKDD Conference on Knowledge Discovery and Data Mining 2019 (KDD 2019)*, 2019.
diff --git a/contrib/sarplus/python/setup.py b/contrib/sarplus/python/setup.py
index 4009ec751..f755f5310 100644
--- a/contrib/sarplus/python/setup.py
+++ b/contrib/sarplus/python/setup.py
@@ -42,6 +42,7 @@ def __str__(self):
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Mathematics",
@@ -49,7 +50,7 @@ def __str__(self):
setup_requires=["pytest-runner"],
install_requires=DEPENDENCIES,
tests_require=["pytest"],
- python_requires=">=3.6,<3.11",
+ python_requires=">=3.6,<3.12",
packages=["pysarplus"],
package_data={"": ["VERSION"]},
ext_modules=[
diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index 517673178..944b92623 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -27,17 +27,21 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "System version: 3.6.11 | packaged by conda-forge | (default, Aug 5 2020, 20:09:42) \n",
- "[GCC 7.5.0]\n",
- "Pandas version: 0.25.3\n",
- "Fast AI version: 1.0.46\n",
- "Torch version: 1.4.0\n",
- "Cuda Available: False\n",
+ "System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
+ "[GCC 11.2.0]\n",
+ "Pandas version: 1.5.3\n",
+ "Fast AI version: 2.7.11\n",
+ "Torch version: 1.13.1+cu117\n",
+ "CUDA Available: True\n",
"CuDNN Enabled: True\n"
]
}
],
"source": [
+ "# Suppress all warnings\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
"import os\n",
"import sys\n",
"import numpy as np\n",
@@ -46,7 +50,7 @@
"import fastai\n",
"from tempfile import TemporaryDirectory\n",
"\n",
- "from fastai.collab import collab_learner, CollabDataBunch, load_learner\n",
+ "from fastai.collab import collab_learner, CollabDataLoaders, load_learner\n",
"\n",
"from recommenders.utils.constants import (\n",
" DEFAULT_USER_COL as USER, \n",
@@ -67,7 +71,7 @@
"print(\"Pandas version: {}\".format(pd.__version__))\n",
"print(\"Fast AI version: {}\".format(fastai.__version__))\n",
"print(\"Torch version: {}\".format(torch.__version__))\n",
- "print(\"Cuda Available: {}\".format(torch.cuda.is_available()))\n",
+ "print(\"CUDA Available: {}\".format(torch.cuda.is_available()))\n",
"print(\"CuDNN Enabled: {}\".format(torch.backends.cudnn.enabled))"
]
},
@@ -80,7 +84,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {
"tags": [
"parameters"
@@ -101,14 +105,14 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "100%|██████████| 4.81k/4.81k [00:01<00:00, 4.49kKB/s]\n"
+ "100%|██████████| 4.81k/4.81k [00:01<00:00, 3.52kKB/s]\n"
]
},
{
@@ -132,10 +136,10 @@
" \n",
" \n",
" | \n",
- " UserId | \n",
- " MovieId | \n",
- " Rating | \n",
- " Timestamp | \n",
+ " userID | \n",
+ " itemID | \n",
+ " rating | \n",
+ " timestamp | \n",
"
\n",
" \n",
"
\n",
@@ -179,15 +183,15 @@
""
],
"text/plain": [
- " UserId MovieId Rating Timestamp\n",
- "0 196 242 3.0 881250949\n",
- "1 186 302 3.0 891717742\n",
- "2 22 377 1.0 878887116\n",
- "3 244 51 2.0 880606923\n",
- "4 166 346 1.0 886397596"
+ " userID itemID rating timestamp\n",
+ "0 196 242 3.0 881250949\n",
+ "1 186 302 3.0 891717742\n",
+ "2 22 377 1.0 878887116\n",
+ "3 244 51 2.0 880606923\n",
+ "4 166 346 1.0 886397596"
]
},
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -207,7 +211,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -224,7 +228,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -258,11 +262,11 @@
"outputs": [],
"source": [
"with Timer() as preprocess_time:\n",
- " data = CollabDataBunch.from_df(train_valid_df, \n",
- " user_name=USER, \n",
- " item_name=ITEM, \n",
- " rating_name=RATING, \n",
- " valid_pct=0)\n"
+ " data = CollabDataLoaders.from_df(train_valid_df, \n",
+ " user_name=USER, \n",
+ " item_name=ITEM, \n",
+ " rating_name=RATING, \n",
+ " valid_pct=0)\n"
]
},
{
@@ -276,37 +280,73 @@
"\n",
" \n",
" \n",
- " UserId | \n",
- " MovieId | \n",
- " target | \n",
+ " | \n",
+ " userID | \n",
+ " itemID | \n",
+ " rating | \n",
"
\n",
" \n",
" \n",
" \n",
- " 543 | \n",
- " 1555 | \n",
- " 3.0 | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 840 | \n",
+ " 1.0 | \n",
"
\n",
" \n",
- " 90 | \n",
- " 945 | \n",
- " 5.0 | \n",
+ " 1 | \n",
+ " 881 | \n",
+ " 112 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 746 | \n",
+ " 506 | \n",
+ " 3.0 | \n",
"
\n",
" \n",
- " 292 | \n",
- " 515 | \n",
+ " 3 | \n",
+ " 104 | \n",
+ " 257 | \n",
" 4.0 | \n",
"
\n",
" \n",
- " 303 | \n",
- " 1092 | \n",
- " 1.0 | \n",
+ " 4 | \n",
+ " 511 | \n",
+ " 1527 | \n",
+ " 4.0 | \n",
"
\n",
" \n",
+ " 5 | \n",
" 497 | \n",
- " 946 | \n",
+ " 763 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 407 | \n",
+ " 869 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 291 | \n",
+ " 924 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 109 | \n",
+ " 94 | \n",
" 4.0 | \n",
"
\n",
+ " \n",
+ " 9 | \n",
+ " 82 | \n",
+ " 597 | \n",
+ " 3.0 | \n",
+ "
\n",
" \n",
"
"
],
@@ -369,6 +409,33 @@
"execution_count": 10,
"metadata": {},
"outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
{
"data": {
"text/html": [
@@ -383,34 +450,34 @@
" \n",
" \n",
" \n",
+ " 0 | \n",
+ " 0.961789 | \n",
+ " None | \n",
+ " 00:09 | \n",
+ "
\n",
+ " \n",
" 1 | \n",
- " 0.985993 | \n",
- " | \n",
- " 00:05 | \n",
+ " 0.863359 | \n",
+ " None | \n",
+ " 00:08 | \n",
"
\n",
" \n",
" 2 | \n",
- " 0.885496 | \n",
- " | \n",
- " 00:05 | \n",
+ " 0.750853 | \n",
+ " None | \n",
+ " 00:07 | \n",
"
\n",
" \n",
" 3 | \n",
- " 0.777637 | \n",
- " | \n",
- " 00:05 | \n",
+ " 0.637868 | \n",
+ " None | \n",
+ " 00:08 | \n",
"
\n",
" \n",
" 4 | \n",
- " 0.628971 | \n",
- " | \n",
- " 00:05 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0.532328 | \n",
- " | \n",
- " 00:06 | \n",
+ " 0.526907 | \n",
+ " None | \n",
+ " 00:09 | \n",
"
\n",
" \n",
""
@@ -426,13 +493,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Took 29.5549 seconds for training.\n"
+ "Took 51.5260 seconds for training.\n"
]
}
],
"source": [
"with Timer() as train_time:\n",
- " learn.fit_one_cycle(EPOCHS, max_lr=5e-3)\n",
+ " learn.fit_one_cycle(EPOCHS, lr_max=5e-3)\n",
"\n",
"print(\"Took {} seconds for training.\".format(train_time))"
]
@@ -446,7 +513,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -456,7 +523,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -474,11 +541,11 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "learner = load_learner(tmp.name, \"movielens_model.pkl\")"
+ "learner = load_learner(model_path)"
]
},
{
@@ -490,11 +557,11 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
- "total_users, total_items = learner.data.train_ds.x.classes.values()\n",
+ "total_users, total_items = learner.dls.classes.values()\n",
"total_items = total_items[1:]\n",
"total_users = total_users[1:]"
]
@@ -508,7 +575,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -525,7 +592,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -545,7 +612,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"metadata": {
"scrolled": false
},
@@ -564,14 +631,14 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Took 1.9734 seconds for 1511060 predictions.\n"
+ "Took 5.1570 seconds for 1511060 predictions.\n"
]
}
],
@@ -595,7 +662,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -606,7 +673,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -617,7 +684,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -628,7 +695,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@@ -639,27 +706,27 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Model:\tCollabLearner\n",
- "Top K:\t10\n",
- "MAP:\t0.026115\n",
- "NDCG:\t0.155065\n",
- "Precision@K:\t0.136691\n",
- "Recall@K:\t0.054940\n"
+ "Model:\t\tLearner\n",
+ "Top K:\t\t10\n",
+ "MAP:\t\t0.024119\n",
+ "NDCG:\t\t0.152808\n",
+ "Precision@K:\t0.139130\n",
+ "Recall@K:\t0.054943\n"
]
}
],
"source": [
- "print(\"Model:\\t\" + learn.__class__.__name__,\n",
- " \"Top K:\\t%d\" % TOP_K,\n",
- " \"MAP:\\t%f\" % eval_map,\n",
- " \"NDCG:\\t%f\" % eval_ndcg,\n",
+ "print(\"Model:\\t\\t\" + learn.__class__.__name__,\n",
+ " \"Top K:\\t\\t%d\" % TOP_K,\n",
+ " \"MAP:\\t\\t%f\" % eval_map,\n",
+ " \"NDCG:\\t\\t%f\" % eval_ndcg,\n",
" \"Precision@K:\\t%f\" % eval_precision,\n",
" \"Recall@K:\\t%f\" % eval_recall, sep='\\n')"
]
@@ -673,7 +740,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
@@ -693,18 +760,18 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Model:\tCollabLearner\n",
- "RMSE:\t0.902379\n",
- "MAE:\t0.712163\n",
- "Explained variance:\t0.346523\n",
- "R squared:\t0.345672\n"
+ "Model:\t\t\tLearner\n",
+ "RMSE:\t\t\t0.904589\n",
+ "MAE:\t\t\t0.715827\n",
+ "Explained variance:\t0.356082\n",
+ "R squared:\t\t0.355173\n"
]
}
],
@@ -714,36 +781,35 @@
"eval_mae = mae(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n",
"eval_exp_var = exp_var(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n",
"\n",
- "print(\"Model:\\t\" + learn.__class__.__name__,\n",
- " \"RMSE:\\t%f\" % eval_rmse,\n",
- " \"MAE:\\t%f\" % eval_mae,\n",
+ "print(\"Model:\\t\\t\\t\" + learn.__class__.__name__,\n",
+ " \"RMSE:\\t\\t\\t%f\" % eval_rmse,\n",
+ " \"MAE:\\t\\t\\t%f\" % eval_mae,\n",
" \"Explained variance:\\t%f\" % eval_exp_var,\n",
- " \"R squared:\\t%f\" % eval_r2, sep='\\n')"
+ " \"R squared:\\t\\t%f\" % eval_r2, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "That RMSE is actually quite good when compared to these benchmarks: https://www.librec.net/release/v1.3/example.html"
+ "That RMSE is competitive in comparison with other models."
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.02611475567509659,
+ "application/notebook_utils.json+json": {
+ "data": 0.024118782738867094,
"encoder": "json",
- "name": "map",
- "version": 1
+ "name": "map"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "map"
@@ -753,15 +819,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.15506533130248687,
+ "application/notebook_utils.json+json": {
+ "data": 0.1528081472533914,
"encoder": "json",
- "name": "ndcg",
- "version": 1
+ "name": "ndcg"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "ndcg"
@@ -771,15 +836,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.13669141039236482,
+ "application/notebook_utils.json+json": {
+ "data": 0.13913043478260873,
"encoder": "json",
- "name": "precision",
- "version": 1
+ "name": "precision"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "precision"
@@ -789,15 +853,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.05493986799753499,
+ "application/notebook_utils.json+json": {
+ "data": 0.05494302697544413,
"encoder": "json",
- "name": "recall",
- "version": 1
+ "name": "recall"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "recall"
@@ -807,15 +870,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.9023793356156464,
+ "application/notebook_utils.json+json": {
+ "data": 0.9045892929999733,
"encoder": "json",
- "name": "rmse",
- "version": 1
+ "name": "rmse"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "rmse"
@@ -825,15 +887,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.7121634655740025,
+ "application/notebook_utils.json+json": {
+ "data": 0.7158267242352735,
"encoder": "json",
- "name": "mae",
- "version": 1
+ "name": "mae"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "mae"
@@ -843,15 +904,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.34652281723228295,
+ "application/notebook_utils.json+json": {
+ "data": 0.3560824305444269,
"encoder": "json",
- "name": "exp_var",
- "version": 1
+ "name": "exp_var"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "exp_var"
@@ -861,15 +921,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 0.3456716162958503,
+ "application/notebook_utils.json+json": {
+ "data": 0.35517333876960555,
"encoder": "json",
- "name": "rsquared",
- "version": 1
+ "name": "rsquared"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "rsquared"
@@ -879,15 +938,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 29.554921820759773,
+ "application/notebook_utils.json+json": {
+ "data": 51.52598460000445,
"encoder": "json",
- "name": "train_time",
- "version": 1
+ "name": "train_time"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "train_time"
@@ -897,15 +955,14 @@
},
{
"data": {
- "application/scrapbook.scrap.json+json": {
- "data": 1.973397959023714,
+ "application/notebook_utils.json+json": {
+ "data": 5.156951100005244,
"encoder": "json",
- "name": "test_time",
- "version": 1
+ "name": "test_time"
}
},
"metadata": {
- "scrapbook": {
+ "notebook_utils": {
"data": true,
"display": false,
"name": "test_time"
@@ -930,7 +987,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -946,9 +1003,9 @@
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
- "display_name": "Python (reco_gpu)",
+ "display_name": "recommenders",
"language": "python",
- "name": "reco_gpu"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -960,7 +1017,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.11"
+ "version": "3.9.16"
}
},
"nbformat": 4,
diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py
index e28fa6ab7..c62518838 100644
--- a/examples/06_benchmarks/benchmark_utils.py
+++ b/examples/06_benchmarks/benchmark_utils.py
@@ -15,7 +15,7 @@
except ImportError:
pass # skip this import if we are not in a Spark environment
try:
- from fastai.collab import collab_learner, CollabDataBunch
+ from fastai.collab import collab_learner, CollabDataLoaders
except ImportError:
pass # skip this import if we are not in a GPU environment
@@ -181,7 +181,7 @@ def prepare_training_fastai(train, test):
data = train.copy()
data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype("str")
data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype("str")
- data = CollabDataBunch.from_df(
+ data = CollabDataLoaders.from_df(
data,
user_name=DEFAULT_USER_COL,
item_name=DEFAULT_ITEM_COL,
@@ -196,7 +196,7 @@ def train_fastai(params, data):
data, n_factors=params["n_factors"], y_range=params["y_range"], wd=params["wd"]
)
with Timer() as t:
- model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"])
+ model.fit_one_cycle(params["epochs"], lr_max=params["lr_max"])
return model, t
@@ -221,9 +221,9 @@ def predict_fastai(model, test):
def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True):
with Timer() as t:
- total_users, total_items = model.data.train_ds.x.classes.values()
- total_items = total_items[1:]
- total_users = total_users[1:]
+ total_users, total_items = model.dls.classes.values()
+ total_items = np.array(total_items[1:])
+ total_users = np.array(total_users[1:])
test_users = test[DEFAULT_USER_COL].unique()
test_users = np.intersect1d(test_users, total_users)
users_items = cartesian_product(test_users, total_items)
diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb
index 2f7a857ce..8c8ee6d2f 100644
--- a/examples/06_benchmarks/movielens.ipynb
+++ b/examples/06_benchmarks/movielens.ipynb
@@ -299,7 +299,7 @@
" \"n_factors\": 40, \n",
" \"y_range\": [0,5.5], \n",
" \"wd\": 1e-1,\n",
- " \"max_lr\": 5e-3,\n",
+ " \"lr_max\": 5e-3,\n",
" \"epochs\": 15\n",
"}\n",
"\n",
diff --git a/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py b/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py
index 8d8f4c782..e48aaafb0 100644
--- a/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py
+++ b/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py
@@ -601,9 +601,9 @@ def __init__(
):
self._build_bias = build_bias
- if args is None or (nest.is_sequence(args) and not args):
+ if args is None or (nest.is_nested(args) and not args):
raise ValueError("`args` must be specified")
- if not nest.is_sequence(args):
+ if not nest.is_nested(args):
args = [args]
self._is_sequence = False
else:
diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index ab756c7e8..f6b6a8986 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -6,6 +6,7 @@
import pandas as pd
import fastai
import fastprogress
+import torch
from fastprogress.fastprogress import force_console_behavior
from recommenders.utils import constants as cc
@@ -51,24 +52,32 @@ def score(
pandas.DataFrame: Result of recommendation
"""
# replace values not known to the model with NaN
- total_users, total_items = learner.data.train_ds.x.classes.values()
+ total_users, total_items = learner.dls.classes.values()
test_df.loc[~test_df[user_col].isin(total_users), user_col] = np.nan
test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan
# map ids to embedding ids
- u = learner.get_idx(test_df[user_col], is_item=False)
- m = learner.get_idx(test_df[item_col], is_item=True)
+ u = learner._get_idx(test_df[user_col], is_item=False)
+ m = learner._get_idx(test_df[item_col], is_item=True)
# score the pytorch model
- pred = learner.model.forward(u, m)
+ x = torch.column_stack((u, m))
+
+ if torch.cuda.is_available():
+ x = x.to("cuda")
+ learner.model = learner.model.to("cuda")
+
+ pred = learner.model.forward(x).detach().cpu().numpy()
scores = pd.DataFrame(
{user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
)
scores = scores.sort_values([user_col, prediction_col], ascending=[True, False])
+
if top_k is not None:
top_scores = scores.groupby(user_col).head(top_k).reset_index(drop=True)
else:
top_scores = scores
+
return top_scores
@@ -77,7 +86,7 @@ def hide_fastai_progress_bar():
fastprogress.fastprogress.NO_BAR = True
fastprogress.fastprogress.WRITER_FN = str
master_bar, progress_bar = force_console_behavior()
- fastai.basic_train.master_bar, fastai.basic_train.progress_bar = (
+ fastai.callback.progress.master_bar, fastai.callback.progress.progress_bar = (
master_bar,
progress_bar,
)
diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py
index 6b1329d1d..4627eebe1 100644
--- a/recommenders/models/rlrmc/RLRMCdataset.py
+++ b/recommenders/models/rlrmc/RLRMCdataset.py
@@ -68,8 +68,7 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True):
"""
# Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py
# If validation dataset is None
- df = train if validation is None else train.append(validation)
- df = df if test is None else df.append(test)
+ df = pd.concat([train, validation, test])
# Reindex user and item index
if self.user_idx is None:
diff --git a/recommenders/models/sasrec/util.py b/recommenders/models/sasrec/util.py
index 8c81b563c..b92999bd2 100644
--- a/recommenders/models/sasrec/util.py
+++ b/recommenders/models/sasrec/util.py
@@ -35,11 +35,13 @@ def __init__(self, **kwargs):
if self.filename:
with open(self.filename, "r") as fr:
sample = fr.readline()
- ncols = sample.strip().split(self.col_sep)
+ ncols = len(sample.strip().split(self.col_sep))
if ncols == 3:
self.with_time = True
- else:
+ elif ncols == 2:
self.with_time = False
+ else:
+ raise ValueError(f"3 or 2 columns must be in dataset. Given {ncols} columns")
def split(self, **kwargs):
self.filename = kwargs.get("filename", self.filename)
diff --git a/recommenders/models/tfidf/tfidf_utils.py b/recommenders/models/tfidf/tfidf_utils.py
index 24575121c..6a6d22389 100644
--- a/recommenders/models/tfidf/tfidf_utils.py
+++ b/recommenders/models/tfidf/tfidf_utils.py
@@ -115,7 +115,7 @@ def clean_dataframe(self, df, cols_to_clean, new_col_name="cleaned_text"):
return df
def tokenize_text(
- self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0
+ self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0.0
):
"""Tokenize the input text.
For more details on the TfidfVectorizer, see https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
@@ -124,7 +124,7 @@ def tokenize_text(
df_clean (pandas.DataFrame): Dataframe with cleaned text in the new column.
text_col (str): Name of column containing the cleaned text.
ngram_range (tuple of int): The lower and upper boundary of the range of n-values for different n-grams to be extracted.
- min_df (int): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold.
+ min_df (float): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold.
Returns:
TfidfVectorizer, pandas.Series:
diff --git a/setup.py b/setup.py
index bb9db1688..c5fc49bb8 100644
--- a/setup.py
+++ b/setup.py
@@ -27,51 +27,45 @@
version += ".post" + str(int(time.time()))
install_requires = [
- "numpy>=1.19", # 1.19 required by tensorflow 2.6
- "pandas>1.0.3,<2",
- "scipy>=1.0.0,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed
- "tqdm>=4.31.1,<5",
- "matplotlib>=2.2.2,<4",
- "scikit-learn>=0.22.1,<1.0.3",
- "numba>=0.38.1,<1",
- "lightfm>=1.15,<2",
- "lightgbm>=2.2.1",
- "memory_profiler>=0.54.0,<1",
- "nltk>=3.4,<4",
- "seaborn>=0.8.1,<1",
- "transformers>=2.5.0,<5",
- "category_encoders>=1.3.0,<2",
- "jinja2>=2,<3.1",
- "requests>=2.31.0,<3",
- "cornac>=1.1.2,<1.15.2;python_version<='3.7'",
- "cornac>=1.15.2,<2;python_version>='3.8'", # After 1.15.2, Cornac requires python 3.8
- "retrying>=1.3.3",
- "pandera[strategies]>=0.6.5", # For generating fake datasets
- "scikit-surprise>=1.0.6",
- "hyperopt>=0.1.2,<1",
- "ipykernel>=4.6.1,<7",
- "jupyter>=1,<2",
- "locust>=1,<2",
+ "category-encoders>=2.6.0,<3", # requires packaging
+ "cornac>=1.15.2,<2", # requires packaging, tqdm
+ "hyperopt>=0.2.7,<1",
+ "lightfm>=1.17,<2", # requires requests
+ "lightgbm>=4.0.0,<5",
+ "locust>=2.12.2,<3", # requires jinja2
+ "memory-profiler>=0.61.0,<1",
+ "nltk>=3.8.1,<4", # requires tqdm
+ "notebook>=7.0.0,<8", # requires ipykernel, jinja2, jupyter, nbconvert, nbformat, packaging, requests
+ "numba>=0.57.0,<1",
+ "pandas>2.0.0,<3.0.0", # requires numpy
+ "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets
+ "pandera[strategies]>=0.15.0;python_version>='3.9'",
+ "retrying>=1.3.4,<2",
+ "scikit-learn>=1.2.0,<2", # requires scipy, and introduce breaking change affects feature_extraction.text.TfidfVectorizer.min_df
+ "scikit-surprise>=1.1.3",
+ "scipy>=1.10.1,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed
+ "seaborn>=0.13.0,<1", # requires matplotlib, packaging
+ "transformers>=4.27.0,<5", # requires packaging, pyyaml, requests, tqdm
]
# shared dependencies
extras_require = {
"gpu": [
- "nvidia-ml-py3>=7.352.0",
- "tensorflow==2.8.4", # FIXME: Temporarily pinned due to issue with TF version > 2.10.1 See #2018
- "tf-slim>=1.1.0",
- "torch>=1.13.1", # for CUDA 11 support
- "fastai>=1.0.46,<2",
+ "fastai>=2.7.11,<3",
+ "nvidia-ml-py>=11.525.84",
+ "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16", # Fixed TF due to constant security problems and breaking changes #2073
+ "tf-slim>=1.1.0", # No python_requires in its setup.py
+ "torch>=2.0.1,<3",
],
"spark": [
- "pyarrow>=0.12.1,<7.0.0",
- "pyspark>=2.4.5,<3.3.0",
+ "pyarrow>=10.0.1",
+ "pyspark>=3.3.0,<=4",
],
"dev": [
- "black>=18.6b4,<21",
- "pytest>=3.6.4",
- "pytest-cov>=2.12.1",
- "pytest-mock>=3.6.1", # for access to mock fixtures in pytest
+ "black>=23.3.0",
+ "pytest>=7.2.1",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0", # for access to mock fixtures in pytest
],
}
# For the brave of heart
@@ -100,7 +94,7 @@
long_description_content_type="text/markdown",
url="https://github.com/recommenders-team/recommenders",
project_urls={
- "Documentation": "https://microsoft-recommenders.readthedocs.io/en/stable/",
+ "Documentation": "https://recommenders-team.github.io/recommenders/intro.html",
"Wiki": "https://github.com/recommenders-team/recommenders/wiki",
},
author="Recommenders contributors",
@@ -115,6 +109,8 @@
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
"Operating System :: POSIX :: Linux",
],
extras_require=extras_require,
diff --git a/tests/README.md b/tests/README.md
index 813b433ba..a6068daec 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -213,11 +213,26 @@ Then, follow the steps below to create the AzureML infrastructure:
2. Create two new clusters: `cpu-cluster` and `gpu-cluster`. Go to compute, then compute cluster, then new.
- Select the CPU VM base. Anything above 64GB of RAM, and 8 cores should be fine.
- Select the GPU VM base. Anything above 56GB of RAM, and 6 cores, and an NVIDIA K80 should be fine.
-3. Add the subscription ID to GitHub action secrets [here](https://github.com/microsoft/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value.
+3. Add the subscription ID to GitHub action secrets [here](https://github.com/recommenders-team/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value.
4. Make sure you have installed [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli), and that you are logged in: `az login`.
5. Select your subscription: `az account set -s $AZURE_SUBSCRIPTION_ID`.
-5. Create a Service Principal: `az ad sp create-for-rbac --name "recommenders-cicd" --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --sdk-auth`.
-6. Add the output from the Service Principal (should be a JSON blob) as an action secret `AZUREML_TEST_CREDENTIALS`.
+6. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal:
+ ```
+ {
+ "clientId": "XXXXXXXXXXXXXXXXXXXXX",
+ "clientSecret": "XXXXXXXXXXXXXXXXXXXXX",
+ "subscriptionId": "XXXXXXXXXXXXXXXXXXXXX",
+ "tenantId": "XXXXXXXXXXXXXXXXXXXXX",
+ "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
+ "resourceManagerEndpointUrl": "https://management.azure.com/",
+ "activeDirectoryGraphResourceId": "https://graph.windows.net/",
+ "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/",
+ "galleryEndpointUrl": "https://gallery.azure.com/",
+ "managementEndpointUrl": "https://management.core.windows.net/"
+ }
+ ```
+7. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**.
+
## How to execute tests in your local environment
diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 4fe1e5f8e..adda7e172 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -29,20 +29,20 @@
Example:
Usually, this script is run by a DevOps pipeline. It can also be
run from cmd line.
- >>> python tests/ci/refac.py --clustername 'cluster-d3-v2'
- --subid '12345678-9012-3456-abcd-123456789012'
- --pr '666'
- --reponame 'Recommenders'
- --branch 'staging'
+ >>> python tests/ci/submit_groupwise_azureml_pytest.py \
+ --clustername 'cluster-d3-v2' \
+ --subid '12345678-9012-3456-abcd-123456789012' \
+ --pr '666' \
+ --reponame 'Recommenders' \
+ --branch 'staging'
"""
import argparse
import logging
-import glob
from azureml.core.authentication import AzureCliAuthentication
from azureml.core import Workspace
from azureml.core import Experiment
-from azureml.core.runconfig import RunConfiguration
+from azureml.core.runconfig import RunConfiguration, DockerConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.script_run_config import ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute
@@ -146,7 +146,6 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes)
def create_run_config(
cpu_cluster,
- docker_proc_type,
add_gpu_dependencies,
add_spark_dependencies,
conda_pkg_jdk,
@@ -165,7 +164,6 @@ def create_run_config(
the following:
- Reco_cpu_test
- Reco_gpu_test
- docker_proc_type (str) : processor type, cpu or gpu
add_gpu_dependencies (bool) : True if gpu packages should be
added to the conda environment, else False
add_spark_dependencies (bool) : True if PySpark packages should be
@@ -178,8 +176,41 @@ def create_run_config(
run_azuremlcompute = RunConfiguration()
run_azuremlcompute.target = cpu_cluster
- run_azuremlcompute.environment.docker.enabled = True
- run_azuremlcompute.environment.docker.base_image = docker_proc_type
+ if not add_gpu_dependencies:
+ # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
+ run_azuremlcompute.environment.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
+ else:
+ run_azuremlcompute.environment.docker.base_image = None
+ # Use the latest CUDA
+ # See
+ # * https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional
+ # * https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
+ run_azuremlcompute.environment.docker.base_dockerfile = r"""
+FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04
+USER root:root
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV DEBIAN_FRONTEND noninteractive
+RUN apt-get update && \
+ apt-get install -y wget git-all && \
+ apt-get clean -y && \
+ rm -rf /var/lib/apt/lists/*
+# Conda Environment
+# Pin pip=20.1.1 due to the issue: No module named 'ruamel'
+# See https://learn.microsoft.com/en-us/python/api/overview/azure/ml/install?view=azure-ml-py#troubleshooting
+ENV MINICONDA_VERSION py38_23.3.1-0
+ENV PATH /opt/miniconda/bin:$PATH
+ENV CONDA_PACKAGE 23.5.0
+RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
+ bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
+ conda install -y conda=${CONDA_PACKAGE} python=3.8 pip=20.1.1 && \
+ conda update --all -c conda-forge -y && \
+ conda clean -ay && \
+ rm -rf /opt/miniconda/pkgs && \
+ rm /tmp/miniconda.sh && \
+ find / -type d -name __pycache__ | xargs rm -rf
+"""
# Use conda_dependencies.yml to create a conda environment in
# the Docker image for execution
@@ -195,6 +226,7 @@ def create_run_config(
# install recommenders
reco_extras = "dev"
+ conda_dep.add_conda_package("anaconda::git")
if add_gpu_dependencies and add_spark_dependencies:
conda_dep.add_channel("conda-forge")
conda_dep.add_conda_package(conda_pkg_jdk)
@@ -260,8 +292,10 @@ def submit_experiment_to_azureml(
source_directory=".",
script=test,
run_config=run_config,
+ docker_runtime_config=DockerConfiguration(use_docker=True),
arguments=arguments,
)
+
run = experiment.submit(script_run_config)
# waits only for configuration to complete
run.wait_for_completion(show_output=True, wait_post_processing=True)
@@ -326,13 +360,6 @@ def create_arg_parser():
default="STANDARD_D3_V2",
help="Set the size of the VM either STANDARD_D3_V2",
)
- # cpu or gpu
- parser.add_argument(
- "--dockerproc",
- action="store",
- default="cpu",
- help="Base image used in docker container",
- )
# Azure subscription id, when used in a pipeline, it is stored in keyvault
parser.add_argument(
"--subid", action="store", default="123456", help="Azure Subscription ID"
@@ -421,16 +448,6 @@ def create_arg_parser():
logger = logging.getLogger("submit_groupwise_azureml_pytest.py")
args = create_arg_parser()
-
- if args.dockerproc == "cpu":
- from azureml.core.runconfig import DEFAULT_CPU_IMAGE
-
- docker_proc_type = DEFAULT_CPU_IMAGE
- else:
- from azureml.core.runconfig import DEFAULT_GPU_IMAGE
-
- docker_proc_type = DEFAULT_GPU_IMAGE
-
cli_auth = AzureCliAuthentication()
workspace = setup_workspace(
@@ -450,7 +467,6 @@ def create_arg_parser():
run_config = create_run_config(
cpu_cluster=cpu_cluster,
- docker_proc_type=docker_proc_type,
add_gpu_dependencies=args.add_gpu_dependencies,
add_spark_dependencies=args.add_spark_dependencies,
conda_pkg_jdk=args.conda_pkg_jdk,
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 6c44411fe..f05e27a9f 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -69,23 +69,23 @@
"tests/smoke/recommenders/recommender/test_deeprec_model.py::test_FFM_iterator", # 0.74s
"tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_news_iterator", # 3.04s
#
- "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn", # 6.03s # FIXME: Issue with TF version > 2.10.1 See #2018
- "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional", # 19.45s # FIXME: Issue with TF version > 2.10.1 See #2018
+ "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn", # 6.03s
+ "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional", # 19.45s
#
- "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum", # 27.23s # FIXME: Issue with TF version > 2.10.1 See #2018
+ # "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum", # 27.23s # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018
#
"tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_dkn", # 187.20s
"tests/functional/examples/test_notebooks_gpu.py::test_dkn_quickstart_functional", # 1167.93s
#
- "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional", # 175.00s # FIXME: Issue with TF version > 2.10.1 See #2018
- "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec", # 346.72s # FIXME: Issue with TF version > 2.10.1 See #2018
+ "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional", # 175.00s
+ "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec", # 346.72s
],
"group_gpu_002": [ # Total group time: 1896.76s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_xdeepfm", # 3.10s
# FIXME: https://github.com/microsoft/recommenders/issues/1883
# "tests/smoke/examples/test_notebooks_gpu.py::test_xdeepfm_smoke", # 77.93s
- "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional", # FIXME: Issue with TF version > 2.10.1 See #2018
+ "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional",
#
"tests/smoke/examples/test_notebooks_gpu.py::test_cornac_bivae_smoke", # 67.84s
"tests/functional/examples/test_notebooks_gpu.py::test_cornac_bivae_functional", # 453.21s
@@ -426,12 +426,12 @@
],
"group_gpu_002": [ # Total group time:
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
- "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018
+ # "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition",
- "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018
+ "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
- "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018
+ # "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018
"tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
"tests/unit/recommenders/models/test_deeprec_utils.py::test_prepare_hparams",
"tests/unit/recommenders/models/test_deeprec_utils.py::test_load_yaml_file",
@@ -449,7 +449,7 @@
"group_notebooks_gpu_002": [ # Total group time: 241.15s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/examples/test_notebooks_gpu.py::test_wide_deep",
- "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm", # FIXME: Issue with TF version > 2.10.1 See #2018
+ "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm",
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",
],
}
diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py
index 2007cc1a7..05b53c68e 100644
--- a/tests/functional/examples/test_notebooks_gpu.py
+++ b/tests/functional/examples/test_notebooks_gpu.py
@@ -247,7 +247,9 @@ def test_wide_deep_functional(
os.path.join("tests", "resources", "deeprec", "slirec"),
10,
400,
- {"auc": 0.7183}, # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss
+ {
+ "auc": 0.7183
+ }, # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss
42,
)
],
@@ -278,7 +280,7 @@ def test_slirec_quickstart_functional(
results = read_notebook(output_notebook)
assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL)
-
+
@pytest.mark.gpu
@pytest.mark.notebooks
@@ -567,7 +569,7 @@ def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name):
notebook_path,
output_notebook,
kernel_name=kernel_name,
- parameters=dict(EPOCHS=5, BATCH_SIZE=500),
+ parameters=dict(EPOCHS=5, BATCH_SIZE=200),
)
results = read_notebook(output_notebook)
diff --git a/tests/security/test_dependency_security.py b/tests/security/test_dependency_security.py
index 82a4f0596..fccb0b22b 100644
--- a/tests/security/test_dependency_security.py
+++ b/tests/security/test_dependency_security.py
@@ -7,6 +7,8 @@
import numpy as np
import pandas as pd
+from packaging.version import Version
+
try:
import tensorflow as tf
import torch
@@ -16,17 +18,17 @@
def test_requests():
# Security issue: https://github.com/psf/requests/releases/tag/v2.31.0
- assert requests.__version__ >= "2.31.0"
+ assert Version(requests.__version__) >= Version("2.31.0")
def test_numpy():
# Security issue: https://github.com/advisories/GHSA-frgw-fgh6-9g52
- assert np.__version__ >= "1.13.3"
+ assert Version(np.__version__) >= Version("1.13.3")
def test_pandas():
# Security issue: https://github.com/advisories/GHSA-cmm9-mgm5-9r42
- assert pd.__version__ >= "1.0.3"
+ assert Version(pd.__version__) >= Version("1.0.3")
@pytest.mark.gpu
@@ -34,10 +36,10 @@ def test_tensorflow():
# Security issue: https://github.com/advisories/GHSA-w5gh-2wr2-pm6g
# Security issue: https://github.com/advisories/GHSA-r6jx-9g48-2r5r
# Security issue: https://github.com/advisories/GHSA-xxcj-rhqg-m46g
- assert tf.__version__ >= "2.8.4"
+ assert Version(tf.__version__) >= Version("2.8.4")
@pytest.mark.gpu
def test_torch():
# Security issue: https://github.com/advisories/GHSA-47fc-vmwq-366v
- assert torch.__version__ >= "1.13.1"
+ assert Version(torch.__version__) >= Version("1.13.1")
diff --git a/tests/smoke/recommenders/recommender/test_deeprec_model.py b/tests/smoke/recommenders/recommender/test_deeprec_model.py
index 81e6f589c..860e45bd6 100644
--- a/tests/smoke/recommenders/recommender/test_deeprec_model.py
+++ b/tests/smoke/recommenders/recommender/test_deeprec_model.py
@@ -20,7 +20,6 @@
from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator
from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator
from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel
- from recommenders.models.deeprec.models.sequential.sum import SUMModel
from recommenders.datasets.amazon_reviews import (
download_and_extract,
data_preprocessing,
@@ -31,6 +30,11 @@
except ImportError:
pass # disable error while collecting tests for non-gpu environments
+try:
+ from recommenders.models.deeprec.models.sequential.sum import SUMModel
+except ImportError:
+ pass # disable error while collecting tests for SUMModel
+
@pytest.mark.gpu
def test_FFM_iterator(deeprec_resource_path):
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 278a2e287..55c064e8b 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -5,7 +5,7 @@
import pytest
import numpy as np
import pandas as pd
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
from recommenders.evaluation.python_evaluation import (
precision_at_k,