From ffac856156996011328841dff8fb292bc5de053f Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Tue, 6 Jun 2023 19:49:59 +0800 Subject: [PATCH 01/74] Add support for Python 3.10 and 3.11 --- setup.py | 81 +++++++++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/setup.py b/setup.py index 080dad2cb..0db28d225 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ import sys import time -# workround for enabling editable user pip installs +# workaround for enabling editable user pip installs site.ENABLE_USER_SITE = "--user" in sys.argv[1:] # version @@ -27,61 +27,57 @@ version += ".post" + str(int(time.time())) install_requires = [ - "numpy>=1.19", # 1.19 required by tensorflow 2.6 - "pandas>1.0.3,<2", - "scipy>=1.0.0,<2", - "tqdm>=4.31.1,<5", - "matplotlib>=2.2.2,<4", - "scikit-learn>=0.22.1,<1.0.3", - "numba>=0.38.1,<1", - "lightfm>=1.15,<2", - "lightgbm>=2.2.1", - "memory_profiler>=0.54.0,<1", - "nltk>=3.4,<4", - "seaborn>=0.8.1,<1", - "transformers>=2.5.0,<5", - "bottleneck>=1.2.1,<2", - "category_encoders>=1.3.0,<2", - "jinja2>=2,<3.1", - "pyyaml>=5.4.1,<6", - "requests>=2.0.0,<3", - "cornac>=1.1.2,<2", - "retrying>=1.3.3", - "pandera[strategies]>=0.6.5", # For generating fake datasets - "scikit-surprise>=1.0.6", + "pandas>1.5.2,<2.1", # requires numpy + "tqdm>=4.65.0,<5", + "matplotlib>=3.6.0,<4", + "scikit-learn>=1.1.3,<2", # 1.0.2 may not support Python 3.10. requires scipy + "numba>=0.57.0,<1", + "lightfm>=1.17,<2", + "lightgbm>=3.3.2,<4", + "memory_profiler>=0.61.0,<1", + "nltk>=3.8.1,<4", + "seaborn>=0.12.0,<1", + "transformers>=4.26.0,<5", # requires pyyaml + "bottleneck>=1.3.7,<2", + "category_encoders>=2.6.0,<2", + "jinja2>=3.1.0,<3.2", + "cornac>=1.15.2,<2", + "retrying>=1.3.4", + "pandera[strategies]>=0.15.0", # For generating fake datasets + "scikit-surprise>=1.1.3", "scrapbook>=0.5.0,<1.0.0", ] # shared dependencies extras_require = { "examples": [ - "azure.mgmt.cosmosdb>=0.8.0,<1", - "hyperopt>=0.1.2,<1", - "ipykernel>=4.6.1,<7", - "jupyter>=1,<2", - "locust>=1,<2", - "papermill>=2.1.2,<3", + "azure-mgmt-cosmosdb>=9.0.0,<10", + "hyperopt>=0.2.7,<1", + "ipykernel>=6.20.1,<7", + "notebook>=6.5.4,<8", + "locust>=2.15.1,<3", + "papermill>=2.4.0,<3", ], "gpu": [ - "nvidia-ml-py3>=7.352.0", + "nvidia-ml-py3>=11.510.69", # TensorFlow compiled with CUDA 11.2, cudnn 8.1 "tensorflow~=2.6.1;python_version=='3.6'", "tensorflow~=2.7.0;python_version>='3.7'", "tf-slim>=1.1.0", - "torch>=1.8", # for CUDA 11 support - "fastai>=1.0.46,<2", + "torch>=2.0.1", + "fastai>=2.7.11,<3", ], "spark": [ - "databricks_cli>=0.8.6,<1", - "pyarrow>=0.12.1,<7.0.0", - "pyspark>=2.4.5,<3.3.0", + "databricks_cli>=0.17.7,<1", + "pyarrow>=10.0.1", + "pyspark>=3.0.1,<=3.4.0", ], "dev": [ - "black>=18.6b4,<21", - "pytest>=3.6.4", - "pytest-cov>=2.12.1", - "pytest-mock>=3.6.1", # for access to mock fixtures in pytest - "pytest-rerunfailures>=10.2", # to mark flaky tests + "black>=23.3.0,<24", + "pytest>=7.2.1", + "pytest-cov>=4.1.0", + "pytest-mock>=3.10.0", # for access to mock fixtures in pytest + "pytest-rerunfailures>=11.1.2", # to mark flaky tests ], } # for the brave of heart @@ -124,9 +120,10 @@ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Operating System :: MacOS", @@ -136,7 +133,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.6, <3.10", + python_requires=">=3.8, <3.12", packages=find_packages(where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"]), setup_requires=["numpy>=1.15"] ) From ffd8b9e13dafdb96ac52a34f0e614c2059b7dae5 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Wed, 7 Jun 2023 09:57:41 +0800 Subject: [PATCH 02/74] Correct upper bound version for category_encoders --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0db28d225..3a49bf185 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ "seaborn>=0.12.0,<1", "transformers>=4.26.0,<5", # requires pyyaml "bottleneck>=1.3.7,<2", - "category_encoders>=2.6.0,<2", + "category_encoders>=2.6.0,<3", "jinja2>=3.1.0,<3.2", "cornac>=1.15.2,<2", "retrying>=1.3.4", From 644aa5d70468d391bfe696293727a5ae6eb17391 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:25:15 +0800 Subject: [PATCH 03/74] Add tests for Python 3.10 and 3.11 --- .github/workflows/azureml-cpu-nightly.yml | 2 +- .github/workflows/azureml-gpu-nightly.yml | 2 +- .github/workflows/azureml-spark-nightly.yml | 2 +- .github/workflows/azureml-unit-tests.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index 36e333551..e52058a2c 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index dc4601f10..94ac06a00 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 74695b1ce..66f981abc 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index 64761d52c..2e28740b8 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -54,7 +54,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code From 900345083e4629954b16e03849fb06bc34a2d9e8 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:54:19 +0800 Subject: [PATCH 04/74] Remove dependencies that others require --- setup.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/setup.py b/setup.py index 3a49bf185..3a5835391 100644 --- a/setup.py +++ b/setup.py @@ -28,24 +28,22 @@ install_requires = [ "pandas>1.5.2,<2.1", # requires numpy - "tqdm>=4.65.0,<5", - "matplotlib>=3.6.0,<4", - "scikit-learn>=1.1.3,<2", # 1.0.2 may not support Python 3.10. requires scipy + "scikit-learn>=1.1.3,<2", # requires scipy "numba>=0.57.0,<1", "lightfm>=1.17,<2", "lightgbm>=3.3.2,<4", - "memory_profiler>=0.61.0,<1", - "nltk>=3.8.1,<4", - "seaborn>=0.12.0,<1", - "transformers>=4.26.0,<5", # requires pyyaml + "memory-profiler>=0.61.0,<1", + "nltk>=3.8.1,<4", # requires tqdm + "seaborn>=0.12.0,<1", # requires matplotlib + "transformers>=4.26.0,<5", # requires pyyaml, tqdm "bottleneck>=1.3.7,<2", - "category_encoders>=2.6.0,<3", + "category-encoders>=2.6.0,<3", "jinja2>=3.1.0,<3.2", - "cornac>=1.15.2,<2", + "cornac>=1.15.2,<2", # requires tqdm "retrying>=1.3.4", "pandera[strategies]>=0.15.0", # For generating fake datasets "scikit-surprise>=1.1.3", - "scrapbook>=0.5.0,<1.0.0", + "scrapbook>=0.5.0,<1.0.0", # requires tqdm, papermill ] # shared dependencies @@ -53,10 +51,8 @@ "examples": [ "azure-mgmt-cosmosdb>=9.0.0,<10", "hyperopt>=0.2.7,<1", - "ipykernel>=6.20.1,<7", "notebook>=6.5.4,<8", "locust>=2.15.1,<3", - "papermill>=2.4.0,<3", ], "gpu": [ "nvidia-ml-py3>=11.510.69", @@ -68,7 +64,7 @@ "fastai>=2.7.11,<3", ], "spark": [ - "databricks_cli>=0.17.7,<1", + "databricks-cli>=0.17.7,<1", "pyarrow>=10.0.1", "pyspark>=3.0.1,<=3.4.0", ], From 793ec8785a647bd97c03d1f979e9b831922f9a1b Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 9 Jun 2023 11:44:20 +0800 Subject: [PATCH 05/74] Update nvidia-ml-py and tensorflow version --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index c07825fcc..1d9228d85 100644 --- a/setup.py +++ b/setup.py @@ -54,10 +54,9 @@ "locust>=2.15.1,<3", ], "gpu": [ - "nvidia-ml-py3>=11.510.69", - # TensorFlow compiled with CUDA 11.2, cudnn 8.1 - "tensorflow~=2.6.1;python_version=='3.6'", - "tensorflow~=2.7.0;python_version>='3.7'", + "nvidia-ml-py>=11.510.69", + # TensorFlow compiled with CUDA 11.8, cudnn 8.6.0.163 + "tensorflow~=2.12.0", "tf-slim>=1.1.0", "torch>=2.0.1", "fastai>=2.7.11,<3", From 1556eb4c4ba528014bfff200c6730cc44f10952e Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 9 Jun 2023 12:22:27 +0800 Subject: [PATCH 06/74] Install system level dependencies for scipy --- .github/actions/azureml-test/action.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml index 72b7e7dea..73c65a041 100644 --- a/.github/actions/azureml-test/action.yml +++ b/.github/actions/azureml-test/action.yml @@ -68,6 +68,11 @@ inputs: runs: using: "composite" steps: + - name: Install system-level dependencies + shell: bash + run: | + # for scipy, see https://docs.scipy.org/doc/scipy/dev/contributor/building.html + sudo apt install -y gcc g++ gfortran libopenblas-dev liblapack-dev pkg-config - name: Setup python uses: actions/setup-python@v4 with: From 890a3fe14875418633d92878007e90b7cd2ab9dc Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:58:32 +0800 Subject: [PATCH 07/74] Support from Python 3.8 to 3.11 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 398941ea3..3c0c40d0d 100644 --- a/setup.py +++ b/setup.py @@ -123,7 +123,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.6, <3.10", + python_requires=">=3.8, <3.12", packages=find_packages( where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"], From c7bb84627e913091ac3af46a4ece820903b11a55 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:59:55 +0800 Subject: [PATCH 08/74] Remove unused system deps --- .github/actions/azureml-test/action.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml index 73c65a041..72b7e7dea 100644 --- a/.github/actions/azureml-test/action.yml +++ b/.github/actions/azureml-test/action.yml @@ -68,11 +68,6 @@ inputs: runs: using: "composite" steps: - - name: Install system-level dependencies - shell: bash - run: | - # for scipy, see https://docs.scipy.org/doc/scipy/dev/contributor/building.html - sudo apt install -y gcc g++ gfortran libopenblas-dev liblapack-dev pkg-config - name: Setup python uses: actions/setup-python@v4 with: From 7f2e29896715107ce1c61701b78657944cfbe1e4 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:43:14 +0800 Subject: [PATCH 09/74] Drop python 3.11 because some packages do not support 3.11 --- .github/workflows/azureml-cpu-nightly.yml | 2 +- .github/workflows/azureml-gpu-nightly.yml | 2 +- .github/workflows/azureml-spark-nightly.yml | 2 +- .github/workflows/azureml-unit-tests.yml | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index e52058a2c..5010b35ad 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index 94ac06a00..d7b485528 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 66f981abc..b0d085fcf 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index 2e28740b8..89854ef99 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -54,7 +54,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/setup.py b/setup.py index 3c0c40d0d..1107dada1 100644 --- a/setup.py +++ b/setup.py @@ -123,7 +123,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.8, <3.12", + python_requires=">=3.8, <3.11", packages=find_packages( where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"], From 09069f7ce8b23dc91d70bea577bb15603f07e823 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sat, 10 Jun 2023 09:47:02 +0800 Subject: [PATCH 10/74] Install dependencies for scipy in docker image --- .../submit_groupwise_azureml_pytest.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 190089054..4a34127e5 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -37,7 +37,6 @@ """ import argparse import logging -import glob from azureml.core.authentication import AzureCliAuthentication from azureml.core import Workspace @@ -146,8 +145,7 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes) def create_run_config( cpu_cluster, - docker_proc_type, - workspace, + docker_image, add_gpu_dependencies, add_spark_dependencies, conda_pkg_jdk, @@ -166,8 +164,7 @@ def create_run_config( the following: - Reco_cpu_test - Reco_gpu_test - docker_proc_type (str) : processor type, cpu or gpu - workspace : workspace reference + docker_image (str) : docker image for cpu or gpu add_gpu_dependencies (bool) : True if gpu packages should be added to the conda environment, else False add_spark_dependencies (bool) : True if PySpark packages should be @@ -181,7 +178,21 @@ def create_run_config( run_azuremlcompute = RunConfiguration() run_azuremlcompute.target = cpu_cluster run_azuremlcompute.environment.docker.enabled = True - run_azuremlcompute.environment.docker.base_image = docker_proc_type + # See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional + run_azuremlcompute.environment.docker.base_image = None + run_azuremlcompute.environment.docker.base_dockerfile = f""" + FROM {docker_image} + # Install system-level deps for scipy. See + # https://docs.scipy.org/doc/scipy/dev/contributor/building.html + RUN apt-get update && \ + apt-get install -y \ + gcc \ + g++ \ + gfortran \ + libopenblas-dev \ + liblapack-dev \ + pkg-config + """ # Use conda_dependencies.yml to create a conda environment in # the Docker image for execution @@ -425,13 +436,11 @@ def create_arg_parser(): args = create_arg_parser() if args.dockerproc == "cpu": - from azureml.core.runconfig import DEFAULT_CPU_IMAGE - - docker_proc_type = DEFAULT_CPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" else: - from azureml.core.runconfig import DEFAULT_GPU_IMAGE - - docker_proc_type = DEFAULT_GPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04" cli_auth = AzureCliAuthentication() @@ -452,8 +461,7 @@ def create_arg_parser(): run_config = create_run_config( cpu_cluster=cpu_cluster, - docker_proc_type=docker_proc_type, - workspace=workspace, + docker_image=docker_image, add_gpu_dependencies=args.add_gpu_dependencies, add_spark_dependencies=args.add_spark_dependencies, conda_pkg_jdk=args.conda_pkg_jdk, From 643fed667ecddd715ecaaac62a88edb0849ad87a Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sat, 10 Jun 2023 10:23:17 +0800 Subject: [PATCH 11/74] Change docker image --- .../azureml_tests/submit_groupwise_azureml_pytest.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 4a34127e5..af7f931b8 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -435,12 +435,15 @@ def create_arg_parser(): logger = logging.getLogger("submit_groupwise_azureml_pytest.py") args = create_arg_parser() + # See Dockerfiles at https://github.com/Azure/AzureML-Containers/tree/master/base if args.dockerproc == "cpu": - # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 - docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" + from azureml.core.runconfig import DEFAULT_CPU_IMAGE + + docker_image = DEFAULT_CPU_IMAGE else: - # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 - docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04" + from azureml.core.runconfig import DEFAULT_GPU_IMAGE + + docker_image = DEFAULT_GPU_IMAGE cli_auth = AzureCliAuthentication() From db4c9c3729e6bdb9577647ad8bb7da31d9dfd230 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sat, 10 Jun 2023 16:31:17 +0800 Subject: [PATCH 12/74] Add pip==20.1.1 --- tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index af7f931b8..194490129 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -202,6 +202,7 @@ def create_run_config( conda_dep = CondaDependencies() conda_dep.add_conda_package(conda_pkg_python) + conda_dep.add_conda_package("pip==20.1.1") conda_dep.add_pip_package( "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip" ) From 9e05e4fa5d339f0ff04c1ad585366a5476c8a199 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sat, 10 Jun 2023 16:39:17 +0800 Subject: [PATCH 13/74] Correct conda package format --- tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 194490129..2f407fbf8 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -202,7 +202,7 @@ def create_run_config( conda_dep = CondaDependencies() conda_dep.add_conda_package(conda_pkg_python) - conda_dep.add_conda_package("pip==20.1.1") + conda_dep.add_conda_package("pip=20.1.1") conda_dep.add_pip_package( "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip" ) From e1d6acf75807ac04678d2ecb54d608eec628bf55 Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sat, 10 Jun 2023 17:32:30 +0800 Subject: [PATCH 14/74] Remove pip downgrade code --- tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 2f407fbf8..af7f931b8 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -202,7 +202,6 @@ def create_run_config( conda_dep = CondaDependencies() conda_dep.add_conda_package(conda_pkg_python) - conda_dep.add_conda_package("pip=20.1.1") conda_dep.add_pip_package( "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip" ) From b71c4ed66991f8eddfd80a8afbff05b34591c7ee Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Sun, 11 Jun 2023 09:21:31 +0800 Subject: [PATCH 15/74] Use docker images for ubuntu 22.04 --- setup.py | 1 + .../submit_groupwise_azureml_pytest.py | 14 +++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 1107dada1..e8353577c 100644 --- a/setup.py +++ b/setup.py @@ -114,6 +114,7 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Operating System :: MacOS", diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index af7f931b8..030877356 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -186,12 +186,11 @@ def create_run_config( # https://docs.scipy.org/doc/scipy/dev/contributor/building.html RUN apt-get update && \ apt-get install -y \ - gcc \ - g++ \ gfortran \ libopenblas-dev \ liblapack-dev \ pkg-config + RUN apt-get install -y git """ # Use conda_dependencies.yml to create a conda environment in @@ -435,15 +434,12 @@ def create_arg_parser(): logger = logging.getLogger("submit_groupwise_azureml_pytest.py") args = create_arg_parser() - # See Dockerfiles at https://github.com/Azure/AzureML-Containers/tree/master/base if args.dockerproc == "cpu": - from azureml.core.runconfig import DEFAULT_CPU_IMAGE - - docker_image = DEFAULT_CPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" else: - from azureml.core.runconfig import DEFAULT_GPU_IMAGE - - docker_image = DEFAULT_GPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04" cli_auth = AzureCliAuthentication() From d80002ed494947747a104a98139e488eb7782470 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 2 Sep 2023 09:52:19 +0800 Subject: [PATCH 16/74] Replace pandas.util.testing with pandas.testing --- tests/unit/recommenders/evaluation/test_python_evaluation.py | 2 +- tests/unit/recommenders/evaluation/test_spark_evaluation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py index e5837fc66..cd54ec36b 100644 --- a/tests/unit/recommenders/evaluation/test_python_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import Mock from sklearn.preprocessing import minmax_scale -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from recommenders.utils.constants import ( DEFAULT_USER_COL, diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py index 9cf35ee3e..c45f095ba 100644 --- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pytest -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from recommenders.evaluation.python_evaluation import ( precision_at_k, From e084412a5ec5ef3e5fab2f13832395a18c996a5c Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 2 Sep 2023 10:52:54 +0800 Subject: [PATCH 17/74] Remove nonexistent argument check_less_precise of assert_frame_equal() --- .../unit/recommenders/evaluation/test_spark_evaluation.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py index c45f095ba..1b740a91c 100644 --- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py @@ -441,7 +441,7 @@ def test_item_novelty(spark_diversity_data, target_metrics): ) actual = evaluator.historical_item_novelty().toPandas() assert_frame_equal( - target_metrics["item_novelty"], actual, check_exact=False, check_less_precise=4 + target_metrics["item_novelty"], actual, check_exact=False ) assert np.all(actual["item_novelty"].values >= 0) # Test that novelty is zero when data includes only one item @@ -482,7 +482,6 @@ def test_user_diversity(spark_diversity_data, target_metrics): target_metrics["user_diversity"], actual, check_exact=False, - check_less_precise=4, ) @@ -510,7 +509,6 @@ def test_user_item_serendipity(spark_diversity_data, target_metrics): target_metrics["user_item_serendipity"], actual, check_exact=False, - check_less_precise=4, ) @@ -529,7 +527,6 @@ def test_user_serendipity(spark_diversity_data, target_metrics): target_metrics["user_serendipity"], actual, check_exact=False, - check_less_precise=4, ) @@ -562,7 +559,6 @@ def test_user_diversity_item_feature_vector(spark_diversity_data, target_metrics target_metrics["user_diversity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, ) @@ -599,7 +595,6 @@ def test_user_item_serendipity_item_feature_vector( target_metrics["user_item_serendipity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, ) @@ -620,7 +615,6 @@ def test_user_serendipity_item_feature_vector(spark_diversity_data, target_metri target_metrics["user_serendipity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, ) From a6054048bd0576be0728b021c40bf672883efa07 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 09:12:01 +0800 Subject: [PATCH 18/74] Remove tests for sarplus for Python 3.7 --- .github/workflows/sarplus.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index e5a25fa14..05c0923a2 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -36,10 +36,10 @@ jobs: # Test pysarplus with different versions of Python. # Package pysarplus and upload as GitHub workflow artifact when merged into # the main branch. - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v3 From 40361f4b007979b62e422414f48af39dfac58907 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 09:13:28 +0800 Subject: [PATCH 19/74] Fixed error: 'DataFrame' object has no attribute 'append' --- recommenders/models/rlrmc/RLRMCdataset.py | 4 ++-- setup.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py index 6b1329d1d..7670105b3 100644 --- a/recommenders/models/rlrmc/RLRMCdataset.py +++ b/recommenders/models/rlrmc/RLRMCdataset.py @@ -68,8 +68,8 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True): """ # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py # If validation dataset is None - df = train if validation is None else train.append(validation) - df = df if test is None else df.append(test) + df = train if validation is None else pd.concat([train, validation]) + df = df if test is None else pd.concat([df, test]) # Reindex user and item index if self.user_idx is None: diff --git a/setup.py b/setup.py index 1524611dc..15829f73d 100644 --- a/setup.py +++ b/setup.py @@ -27,15 +27,15 @@ version += ".post" + str(int(time.time())) install_requires = [ - "pandas>1.5.2,<2.1", # requires numpy + "pandas>1.5.2,<1.6", # requires numpy "scikit-learn>=1.1.3,<2", # requires scipy "numba>=0.57.0,<1", "lightfm>=1.17,<2", - "lightgbm>=3.3.2,<4", + "lightgbm>=3.3.2,<5", "memory-profiler>=0.61.0,<1", "nltk>=3.8.1,<4", # requires tqdm "seaborn>=0.12.0,<1", # requires matplotlib - "transformers>=4.26.0,<5", # requires pyyaml, tqdm + "transformers>=4.27.0,<5", # requires pyyaml, tqdm "category-encoders>=2.6.0,<3", "jinja2>=3.1.0,<3.2", "cornac>=1.15.2,<2", # requires tqdm @@ -44,8 +44,8 @@ "scikit-surprise>=1.1.3", "scrapbook>=0.5.0,<1.0.0", # requires tqdm, papermill "hyperopt>=0.2.7,<1", - "notebook>=6.5.4,<8", # requires jupyter, ipykernel - "locust>=2.15.1,<3", + "notebook>=7.0.0,<8", # requires jupyter, ipykernel + "locust>=2.12.2,<3", ] # shared dependencies From 9364c9b7305fa0a8073816cee3c4a157afe94b65 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 14:52:43 +0800 Subject: [PATCH 20/74] Add hypothesis<6.83.1 --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 15829f73d..93266abcc 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,9 @@ "hyperopt>=0.2.7,<1", "notebook>=7.0.0,<8", # requires jupyter, ipykernel "locust>=2.12.2,<3", + # 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in + # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11 + "hypothesis<6.83.1", ] # shared dependencies From be86a29bc2a0e67e45de9689369395965e08f5f2 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 15:40:10 +0800 Subject: [PATCH 21/74] Use ubuntu-22.04 instead of latest Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/sarplus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index 05c0923a2..e0e483ab5 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -36,7 +36,7 @@ jobs: # Test pysarplus with different versions of Python. # Package pysarplus and upload as GitHub workflow artifact when merged into # the main branch. - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: python-version: ["3.8", "3.9", "3.10"] From 0641d9531384f2f30b2c16cb368e6b57f32cd829 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 15:42:13 +0800 Subject: [PATCH 22/74] Update comments --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 93266abcc..f8e0f5bef 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,8 @@ version += ".post" + str(int(time.time())) install_requires = [ - "pandas>1.5.2,<1.6", # requires numpy + # requires numpy and pandas>1.6 removes DataFrame.append() which is used in scrapbook.models + "pandas>1.5.2,<1.6", "scikit-learn>=1.1.3,<2", # requires scipy "numba>=0.57.0,<1", "lightfm>=1.17,<2", @@ -46,7 +47,7 @@ "hyperopt>=0.2.7,<1", "notebook>=7.0.0,<8", # requires jupyter, ipykernel "locust>=2.12.2,<3", - # 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in + # hypothesis 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11 "hypothesis<6.83.1", ] From 60e847a12120b068e3319c4f72dd3d6bfefcd139 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 16:56:56 +0800 Subject: [PATCH 23/74] Add python 3.11 Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/azureml-cpu-nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index 14f45531c..337035242 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code From 313de47909fa18d5746fde038e9c5e3dba9af751 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 16:59:58 +0800 Subject: [PATCH 24/74] Add python 3.11 Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/azureml-gpu-nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index 1b5e7ce74..a342b45c8 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code From 91be6aec94e4edf978b60582a578b7d2e975a24c Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 17:00:27 +0800 Subject: [PATCH 25/74] Add python 3.11 Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/azureml-spark-nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 6a12190c0..b784a3bea 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code From 015ce4ae90bfdbccb47566f8701b75eaf3e07918 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 17:01:11 +0800 Subject: [PATCH 26/74] Add python 3.11 Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/azureml-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index 92c59f7ef..977961a1e 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -54,7 +54,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code From 76901c6e1ee74921db93e373d452a00b8acc06b3 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 17:02:06 +0800 Subject: [PATCH 27/74] Add python 3.11 Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .github/workflows/sarplus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index e0e483ab5..66dd7d829 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 From 22ac9e2590b239c9f0d8a0269bb457de82a8cb25 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 17:07:24 +0800 Subject: [PATCH 28/74] Add python 3.11 --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index f8e0f5bef..384d34cf5 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ import sys import time -# workaround for enabling editable user pip installs +# Workaround for enabling editable user pip installs site.ENABLE_USER_SITE = "--user" in sys.argv[1:] # Version @@ -111,10 +111,10 @@ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: POSIX :: Linux", ], extras_require=extras_require, @@ -122,7 +122,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.8, <3.11", + python_requires=">=3.8, <=3.11", packages=find_packages( where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"], From c3a70302f3b6f530d6737f097d7fc5a494790e70 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Mon, 4 Sep 2023 19:20:23 +0800 Subject: [PATCH 29/74] Remove python 3.11 --- .github/workflows/azureml-cpu-nightly.yml | 2 +- .github/workflows/azureml-gpu-nightly.yml | 2 +- .github/workflows/azureml-spark-nightly.yml | 2 +- .github/workflows/azureml-unit-tests.yml | 2 +- .github/workflows/sarplus.yml | 2 +- setup.py | 3 +-- 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index 337035242..14f45531c 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index a342b45c8..1b5e7ce74 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index b784a3bea..6a12190c0 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index 977961a1e..92c59f7ef 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -54,7 +54,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index 66dd7d829..e0e483ab5 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v3 diff --git a/setup.py b/setup.py index 384d34cf5..166049dca 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,6 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Operating System :: POSIX :: Linux", ], extras_require=extras_require, @@ -122,7 +121,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.8, <=3.11", + python_requires=">=3.8, <=3.10", packages=find_packages( where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"], From 26641987cd7dacd0c2b14a4d07f09c688bbb72fe Mon Sep 17 00:00:00 2001 From: Drobin Max Date: Fri, 5 Jan 2024 12:49:17 +0300 Subject: [PATCH 30/74] Update util.py Fixed bug with reading dataset with splitting dataset with timestamp --- recommenders/models/sasrec/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/sasrec/util.py b/recommenders/models/sasrec/util.py index 8c81b563c..965c76467 100644 --- a/recommenders/models/sasrec/util.py +++ b/recommenders/models/sasrec/util.py @@ -35,7 +35,7 @@ def __init__(self, **kwargs): if self.filename: with open(self.filename, "r") as fr: sample = fr.readline() - ncols = sample.strip().split(self.col_sep) + ncols = len(sample.strip().split(self.col_sep)) if ncols == 3: self.with_time = True else: From 292fdf5fcab92202bf0ceaa48dd3f736813c56eb Mon Sep 17 00:00:00 2001 From: Drobin Max Date: Fri, 5 Jan 2024 13:03:35 +0300 Subject: [PATCH 31/74] Update util.py added exception for unkown count of columns --- recommenders/models/sasrec/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recommenders/models/sasrec/util.py b/recommenders/models/sasrec/util.py index 965c76467..e1b692446 100644 --- a/recommenders/models/sasrec/util.py +++ b/recommenders/models/sasrec/util.py @@ -38,8 +38,10 @@ def __init__(self, **kwargs): ncols = len(sample.strip().split(self.col_sep)) if ncols == 3: self.with_time = True - else: + elif ncol == 2: self.with_time = False + else: + raise ValueError(f'3 or 2 columns must be in dataset. Given {ncol} columns') def split(self, **kwargs): self.filename = kwargs.get("filename", self.filename) From 0c1933acf87877dcef03652cda700dafcd20bf69 Mon Sep 17 00:00:00 2001 From: Drobin Max Date: Fri, 5 Jan 2024 13:04:48 +0300 Subject: [PATCH 32/74] Update util.py fixed typo --- recommenders/models/sasrec/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recommenders/models/sasrec/util.py b/recommenders/models/sasrec/util.py index e1b692446..2aadb6435 100644 --- a/recommenders/models/sasrec/util.py +++ b/recommenders/models/sasrec/util.py @@ -38,10 +38,10 @@ def __init__(self, **kwargs): ncols = len(sample.strip().split(self.col_sep)) if ncols == 3: self.with_time = True - elif ncol == 2: + elif ncols == 2: self.with_time = False else: - raise ValueError(f'3 or 2 columns must be in dataset. Given {ncol} columns') + raise ValueError(f'3 or 2 columns must be in dataset. Given {ncols} columns') def split(self, **kwargs): self.filename = kwargs.get("filename", self.filename) From fd865a31f73cfa6a6421800090c8e2202eb1e068 Mon Sep 17 00:00:00 2001 From: Andreas Argyriou Date: Tue, 30 Jan 2024 11:26:33 +0000 Subject: [PATCH 33/74] Update documentation URL in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bb9db1688..03e84fed3 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ long_description_content_type="text/markdown", url="https://github.com/recommenders-team/recommenders", project_urls={ - "Documentation": "https://microsoft-recommenders.readthedocs.io/en/stable/", + "Documentation": "https://recommenders-team.github.io/recommenders/intro.html", "Wiki": "https://github.com/recommenders-team/recommenders/wiki", }, author="Recommenders contributors", From 1249199dba78a6eac267e0b714e509c899c34238 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Wed, 31 Jan 2024 11:49:56 +0100 Subject: [PATCH 34/74] Bug in github actions in python version Signed-off-by: miguelgfierro --- .github/workflows/update_documentation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_documentation.yml b/.github/workflows/update_documentation.yml index 9779e5eed..a6627dae1 100644 --- a/.github/workflows/update_documentation.yml +++ b/.github/workflows/update_documentation.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: "3.10" - name: Install dependencies run: | From 7205f2aa8e5016690d6eb68454a49b996c3ad26a Mon Sep 17 00:00:00 2001 From: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:42:31 +0100 Subject: [PATCH 35/74] Update util.py --- recommenders/models/sasrec/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/sasrec/util.py b/recommenders/models/sasrec/util.py index 2aadb6435..b92999bd2 100644 --- a/recommenders/models/sasrec/util.py +++ b/recommenders/models/sasrec/util.py @@ -41,7 +41,7 @@ def __init__(self, **kwargs): elif ncols == 2: self.with_time = False else: - raise ValueError(f'3 or 2 columns must be in dataset. Given {ncols} columns') + raise ValueError(f"3 or 2 columns must be in dataset. Given {ncols} columns") def split(self, **kwargs): self.filename = kwargs.get("filename", self.filename) From f5a15c83d8cf9622ef4342439e60892c446355c9 Mon Sep 17 00:00:00 2001 From: Andreas Argyriou Date: Fri, 9 Feb 2024 13:43:10 +0000 Subject: [PATCH 36/74] Fix pandera in Python 3.7 Signed-off-by: Andreas Argyriou --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3ce8b5d4b..013ac8af3 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,8 @@ "cornac>=1.1.2,<1.15.2;python_version<='3.7'", "cornac>=1.15.2,<2;python_version>='3.8'", # After 1.15.2, Cornac requires python 3.8 "retrying>=1.3.3", - "pandera[strategies]>=0.6.5", # For generating fake datasets + "pandera[strategies]>=0.6.5,<0.18;python_version<='3.7'", # For generating fake datasets + "pandera[strategies]>=0.6.5;python_version>='3.8'", "scikit-surprise>=1.0.6", "hyperopt>=0.1.2,<1", "ipykernel>=4.6.1,<7", From 00bc2863f00532e4fc84031f124d362065b98af5 Mon Sep 17 00:00:00 2001 From: Andreas Argyriou Date: Tue, 13 Feb 2024 11:21:10 +0000 Subject: [PATCH 37/74] Update setup.py Signed-off-by: Andreas Argyriou --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 40d781af3..13e06d06d 100644 --- a/setup.py +++ b/setup.py @@ -46,8 +46,8 @@ "cornac>=1.1.2,<1.15.2;python_version<='3.7'", "cornac>=1.15.2,<2;python_version>='3.8'", # After 1.15.2, Cornac requires python 3.8 "retrying>=1.3.3", - "pandera[strategies]>=0.6.5,<0.18;python_version<='3.7'", # For generating fake datasets - "pandera[strategies]>=0.6.5;python_version>='3.8'", + "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets + "pandera[strategies]>=0.6.5;python_version>='3.9'", "scikit-surprise>=1.0.6", "hyperopt>=0.1.2,<1", "ipykernel>=4.6.1,<7", From c736241b0af62025d603343e184b0041384d011b Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 20 Feb 2024 01:39:09 +0800 Subject: [PATCH 38/74] Resolve issue #2018 (#2022) * Issue with TF Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Comment out the PR gate affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * revert the breaking tests with TF 2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * temporary pin to TF=2.8.4 Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update security tests Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Try to resolve #2018 Signed-off-by: Simon Zhao * Exclude tensorflow versions that are not supported Signed-off-by: Simon Zhao * Correct version comparison using packaging.version.Version Signed-off-by: Simon Zhao * Capture importerror Signed-off-by: Simon Zhao * Restrict tensorflow < 2.13 Signed-off-by: Simon Zhao * Set tensorflow < 2.12 Signed-off-by: Simon Zhao * Not triggering unit tests on Draft PR (#2033) * Not triggering unit tests on Draft PR Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Change a PR-triggering file to test Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --------- Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Signed-off-by: Simon Zhao * Refactor ranking metric `map` to be the same as Spark's (#2004) * Announcement LF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update email Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update README.md Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * security Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * license and contribution notice Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * update author link Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Add new code of conduct from LF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec to GRU Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec to GRU Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec in config files Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update references Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Delete conda.md Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * refactor map_at_k and map to be the same as Spark's Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * list of test failing to fix Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update readme LF feedback @wutaomsft Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update NEWS.md Co-authored-by: Andreas Argyriou Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update README.md Co-authored-by: Andreas Argyriou Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Fix test errors, Refactor column check utils to be simpler Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Rename ranking tests to be _at_k suffixed Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Change test names in the test group Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * add comment to mocked fn in a test Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :memo: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * remove unused input Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :memo: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * no need to output the logs twice Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * packages Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * skipping flaky test Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Issue with TF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the PR gate affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :bug: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * revert the breaking tests with TF 2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * temporary pin to TF=2.8.4 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update security tests Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update expected values to not use fixture Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * list of test failing to fix Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Fix missing fixture error Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --------- Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Co-authored-by: miguelgfierro Co-authored-by: Andreas Argyriou Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> Signed-off-by: Simon Zhao * Add missing kernelspec language Signed-off-by: Simon Zhao * Remove scrapbook and papermill deps Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * notebook utils programmatic execution Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Test notebook programmatic Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Added test notebook for utils Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * data notebooks Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update new programmatic execution code Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update new programmatic execution code Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update notebooks with new utility Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Issue with xDeepFM WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Document the tests in programmatic notebook Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Import missing store_metadata Signed-off-by: Simon Zhao * Correct pattern matching and substitution Signed-off-by: Simon Zhao * Merge multiline parameters into one line Signed-off-by: Simon Zhao * Increase timeout Signed-off-by: Simon Zhao * Fix nightly test errors (#2045) * Revert tests tolerance * Fix notebook parameter parsing * Add notebook utils tests to test groups * Fix notebooks * Fix notebook unit tests * Update evaluation metrics name map. Handle None for exp_var * Fix smoke tests * cleanup * Fix functional test errors * make notebook parameter update function to be private * Fix benchmark notebook bug * fix remaining bugs --------- Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Signed-off-by: Simon Zhao * Fix benchmarks last cell to store value, not [value] Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated PR template Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated contributing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated PR template and contributing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated contributing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * [Fix] correct MIND data construction of user behavior history Signed-off-by: Simon Zhao * change path hybrid Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update hybrid to CF Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * change path hybrid Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * change path hybrid Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Replace LayerRNNCell with AbstractRNNCell Signed-off-by: Simon Zhao * Stop testing for deeprec Signed-off-by: Simon Zhao * Refactor ranking metric `map` to be the same as Spark's (#2004) * Announcement LF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update email Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update README.md Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * security Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * license and contribution notice Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * update author link Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Add new code of conduct from LF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec to GRU Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec to GRU Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Replacing references GRU4Rec in config files Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update references Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Delete conda.md Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * refactor map_at_k and map to be the same as Spark's Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * list of test failing to fix Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update readme LF feedback @wutaomsft Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update NEWS.md Co-authored-by: Andreas Argyriou Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update README.md Co-authored-by: Andreas Argyriou Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Fix test errors, Refactor column check utils to be simpler Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Rename ranking tests to be _at_k suffixed Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Change test names in the test group Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * add comment to mocked fn in a test Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :memo: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * remove unused input Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :memo: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * no need to output the logs twice Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * packages Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * skipping flaky test Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Issue with TF Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the PR gate affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * :bug: Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Comment out the nightly builds affected tests with the upgrade to TF>2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * revert the breaking tests with TF 2.10.1 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * temporary pin to TF=2.8.4 Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update security tests Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Update expected values to not use fixture Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * list of test failing to fix Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> * Fix missing fixture error Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --------- Signed-off-by: miguelgfierro Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Co-authored-by: miguelgfierro Co-authored-by: Andreas Argyriou Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> * notebook utils programmatic execution Signed-off-by: miguelgfierro * Test notebook programmatic Signed-off-by: miguelgfierro * Added test notebook for utils Signed-off-by: miguelgfierro * Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro * Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update new programmatic execution code Signed-off-by: miguelgfierro * Update new programmatic execution code Signed-off-by: miguelgfierro * Update notebooks with new utility Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Issue with xDeepFM WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Document the tests in programmatic notebook Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Import missing store_metadata Signed-off-by: Simon Zhao * Correct pattern matching and substitution Signed-off-by: Simon Zhao * Increase timeout Signed-off-by: Simon Zhao * Fix nightly test errors (#2045) * Revert tests tolerance * Fix notebook parameter parsing * Add notebook utils tests to test groups * Fix notebooks * Fix notebook unit tests * Update evaluation metrics name map. Handle None for exp_var * Fix smoke tests * cleanup * Fix functional test errors * make notebook parameter update function to be private * Fix benchmark notebook bug * fix remaining bugs --------- Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Signed-off-by: Simon Zhao * Fix benchmarks last cell to store value, not [value] Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: remove papermill and scrapbook references Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated PR template Signed-off-by: miguelgfierro * Updated contributing Signed-off-by: miguelgfierro * Updated PR template and contributing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Updated contributing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * change path hybrid Signed-off-by: miguelgfierro * change path hybrid Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Creating a jupyter book Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Creating documentation Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * WIP Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Added rst files Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * license Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Weird warning with a link in the docstrings Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Fix docstring errors and replace .. note:: with Note: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Automatic build of documentation Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Automatic build of documentation dev Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Automatic build of documentation deps Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Automatic build of documentation deps Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Automatic build of documentation deps Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Delete workflow and try via UI Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Added again the workflow Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * git add * -rf Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * git add * -f Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * add git info Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * actions to automatically update documentation Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * actions to automatically update documentation Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * actions to automatically update documentation :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * actions to automatically update documentation :bug: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * trying github token Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * trying github token Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * trying github token and pull before pushing Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * pull rebase Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * pull rebase and -Xtheirs Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * clean Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update documentation badge Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * install all deps Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * try adding other sphinx extensions Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Refact model rst Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * comment geoimc and rlrmc docs until issue is fixed Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * :memo: Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Adding init and other special members Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Adding init and other special members Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Reviewing other rst Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Change sphinx version Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Change sphinx version and jupyter book Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Change the way we compile the documentation Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Using the latest JB release Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Documentation working Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Update docs/_config.yml Co-authored-by: Simon Zhao Signed-off-by: Simon Zhao * Update docs/requirements-doc.txt Co-authored-by: Simon Zhao Signed-off-by: Simon Zhao * Update docs/_config.yml Co-authored-by: Simon Zhao Signed-off-by: Simon Zhao * Added comments by @SimonYansenZhao Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao * Upgrade versions of GitHub Actions See https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/ Signed-off-by: Simon Zhao * Update setup.py Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> Signed-off-by: Simon Zhao * Try to disable sum and sum_component only Signed-off-by: Simon Zhao * Upgrade AzureML docker image Signed-off-by: Simon Zhao * Correct variable names Signed-off-by: Simon Zhao * Install git in the Conda env Signed-off-by: Simon Zhao * Disable test_xdeepfm_component_definition Signed-off-by: Simon Zhao * Use latest CUDA Signed-off-by: Simon Zhao * Correct GPU selection Signed-off-by: Simon Zhao * Remove leading whitespaces in Dockerfile Signed-off-by: Simon Zhao * Simplify azureml-test/action.yml Signed-off-by: Simon Zhao * Install wget in Docker image Signed-off-by: Simon Zhao * Update Signed-off-by: Simon Zhao --------- Signed-off-by: miguelgfierro Signed-off-by: Simon Zhao Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Co-authored-by: miguelgfierro Co-authored-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Co-authored-by: Andreas Argyriou Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> Co-authored-by: thaiminhpv --- .github/actions/azureml-test/action.yml | 49 +++++++--------- .github/workflows/azureml-cpu-nightly.yml | 4 +- .github/workflows/azureml-gpu-nightly.yml | 4 +- .../workflows/azureml-release-pipeline.yml | 4 +- .github/workflows/azureml-spark-nightly.yml | 4 +- .github/workflows/azureml-unit-tests.yml | 4 +- .github/workflows/sarplus.yml | 12 ++-- .github/workflows/update_documentation.yml | 4 +- .../models/sequential/rnn_cell_implement.py | 4 +- setup.py | 3 +- .../submit_groupwise_azureml_pytest.py | 56 +++++++++++-------- tests/ci/azureml_tests/test_groups.py | 20 +++---- tests/security/test_dependency_security.py | 12 ++-- 13 files changed, 92 insertions(+), 88 deletions(-) diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml index 3770298d2..d74b88160 100644 --- a/.github/actions/azureml-test/action.yml +++ b/.github/actions/azureml-test/action.yml @@ -69,7 +69,7 @@ runs: using: "composite" steps: - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" - name: Install azureml-core and azure-cli on a GitHub hosted server @@ -82,43 +82,32 @@ runs: - name: Install wheel package shell: bash run: pip install --quiet wheel - - name: Submit CPU tests to AzureML + - name: Submit tests to AzureML shell: bash - if: contains(inputs.TEST_GROUP, 'cpu') run: >- - python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}} - --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }} - --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} - --testlogs ${{inputs.TEST_LOGS_PATH}} --testkind ${{inputs.TEST_KIND}} - --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}} - --disable-warnings --sha "${GITHUB_SHA}" - - name: Submit GPU tests to AzureML - shell: bash - if: contains(inputs.TEST_GROUP, 'gpu') - run: >- - python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.GPU_CLUSTER_NAME}} - --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }} - --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} - --testlogs ${{inputs.TEST_LOGS_PATH}} --add_gpu_dependencies --testkind ${{inputs.TEST_KIND}} - --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}} - --disable-warnings --sha "${GITHUB_SHA}" - - name: Submit PySpark tests to AzureML - shell: bash - if: contains(inputs.TEST_GROUP, 'spark') - run: >- - python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}} - --subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }} - --rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} - --testlogs ${{inputs.TEST_LOGS_PATH}} --add_spark_dependencies --testkind ${{inputs.TEST_KIND}} - --conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}} - --disable-warnings --sha "${GITHUB_SHA}" + python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py \ + --subid ${{inputs.AZUREML_TEST_SUBID}} \ + --reponame "recommenders" \ + --branch ${{ github.ref }} \ + --rg ${{inputs.RG}} \ + --wsname ${{inputs.WS}} \ + --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} \ + --testlogs ${{inputs.TEST_LOGS_PATH}} \ + --testkind ${{inputs.TEST_KIND}} \ + --conda_pkg_python ${{inputs.PYTHON_VERSION}} \ + --testgroup ${{inputs.TEST_GROUP}} \ + --disable-warnings \ + --sha "${GITHUB_SHA}" \ + --clustername $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "${{inputs.GPU_CLUSTER_NAME}}"; else echo "${{inputs.CPU_CLUSTER_NAME}}"; fi) \ + $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "--add_gpu_dependencies"; fi) \ + $(if [[ ${{inputs.TEST_GROUP}} =~ "spark" ]]; then echo "--add_spark_dependencies"; fi) - name: Get exit status shell: bash id: exit_status run: echo "code=$(cat ${{inputs.PYTEST_EXIT_CODE}})" >> $GITHUB_OUTPUT - name: Check Success/Failure if: ${{ steps.exit_status.outputs.code != 0 }} - uses: actions/github-script@v3 + uses: actions/github-script@v7 with: script: | core.setFailed('All tests did not pass!') diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index e2142a231..72bb700cf 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get test group names id: get_test_groups uses: ./.github/actions/get-test-groups @@ -71,7 +71,7 @@ jobs: test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Execute tests uses: ./.github/actions/azureml-test id: execute_tests diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index dcf0bc102..efac48774 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get test group names id: get_test_groups uses: ./.github/actions/get-test-groups @@ -71,7 +71,7 @@ jobs: test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Execute tests uses: ./.github/actions/azureml-test id: execute_tests diff --git a/.github/workflows/azureml-release-pipeline.yml b/.github/workflows/azureml-release-pipeline.yml index 8475a9a2f..d9899658e 100644 --- a/.github/workflows/azureml-release-pipeline.yml +++ b/.github/workflows/azureml-release-pipeline.yml @@ -33,9 +33,9 @@ jobs: needs: [unit-test-workflow, cpu-nightly-workflow, gpu-nightly-workflow, spark-nightly-workflow] steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" - name: Install wheel package diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 434929c2e..b3a76f9ea 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -46,7 +46,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get test group names id: get_test_groups uses: ./.github/actions/get-test-groups @@ -70,7 +70,7 @@ jobs: test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Execute tests uses: ./.github/actions/azureml-test id: execute_tests diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index a175b7247..13ed56005 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -36,7 +36,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get test group names id: get_test_groups uses: ./.github/actions/get-test-groups @@ -60,7 +60,7 @@ jobs: test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Execute tests uses: ./.github/actions/azureml-test id: execute_tests diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index 6df1c6c9b..766b31645 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -41,10 +41,10 @@ jobs: matrix: python-version: ["3.8", "3.9"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -96,7 +96,7 @@ jobs: - name: Upload Python wheel as GitHub artifact when merged into main # Upload the whl file of the specific python version if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: pysarplus-${{ env.sarplus_version }}-cp${{ matrix.python-version }}-wheel path: ${{ env.PYTHON_ROOT }}/dist/*.whl @@ -104,7 +104,7 @@ jobs: - name: Upload Python source as GitHub artifact when merged into main # Only one pysarplus source tar file is needed if: github.ref == 'refs/heads/main' && matrix.python-version == '3.10' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: pysarplus-${{ env.sarplus_version }}-source path: ${{ env.PYTHON_ROOT }}/dist/*.tar.gz @@ -131,7 +131,7 @@ jobs: hadoop-version: "3.3.1" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Test run: | @@ -180,7 +180,7 @@ jobs: - name: Upload Scala bundle as GitHub artifact when merged into main if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: sarplus-${{ env.sarplus_version }}-bundle_2.12-spark-${{ matrix.spark-version }}-jar path: ${{ env.SCALA_ROOT }}/target/scala-2.12/*bundle*.jar diff --git a/.github/workflows/update_documentation.yml b/.github/workflows/update_documentation.yml index a6627dae1..30e2eadf1 100644 --- a/.github/workflows/update_documentation.yml +++ b/.github/workflows/update_documentation.yml @@ -16,10 +16,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py b/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py index 8d8f4c782..e48aaafb0 100644 --- a/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py +++ b/recommenders/models/deeprec/models/sequential/rnn_cell_implement.py @@ -601,9 +601,9 @@ def __init__( ): self._build_bias = build_bias - if args is None or (nest.is_sequence(args) and not args): + if args is None or (nest.is_nested(args) and not args): raise ValueError("`args` must be specified") - if not nest.is_sequence(args): + if not nest.is_nested(args): args = [args] self._is_sequence = False else: diff --git a/setup.py b/setup.py index 13e06d06d..758765690 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ extras_require = { "gpu": [ "nvidia-ml-py3>=7.352.0", - "tensorflow==2.8.4", # FIXME: Temporarily pinned due to issue with TF version > 2.10.1 See #2018 + "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3", "tf-slim>=1.1.0", "torch>=1.13.1", # for CUDA 11 support "fastai>=1.0.46,<2", @@ -73,6 +73,7 @@ "pytest>=3.6.4", "pytest-cov>=2.12.1", "pytest-mock>=3.6.1", # for access to mock fixtures in pytest + "packaging>=20.9", # for version comparison in test_dependency_security.py ], } # For the brave of heart diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 4fe1e5f8e..86d0c80ab 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -37,7 +37,6 @@ """ import argparse import logging -import glob from azureml.core.authentication import AzureCliAuthentication from azureml.core import Workspace @@ -146,7 +145,6 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes) def create_run_config( cpu_cluster, - docker_proc_type, add_gpu_dependencies, add_spark_dependencies, conda_pkg_jdk, @@ -165,7 +163,6 @@ def create_run_config( the following: - Reco_cpu_test - Reco_gpu_test - docker_proc_type (str) : processor type, cpu or gpu add_gpu_dependencies (bool) : True if gpu packages should be added to the conda environment, else False add_spark_dependencies (bool) : True if PySpark packages should be @@ -179,7 +176,39 @@ def create_run_config( run_azuremlcompute = RunConfiguration() run_azuremlcompute.target = cpu_cluster run_azuremlcompute.environment.docker.enabled = True - run_azuremlcompute.environment.docker.base_image = docker_proc_type + if not add_gpu_dependencies: + # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 + run_azuremlcompute.environment.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" + else: + run_azuremlcompute.environment.docker.base_image = None + # Use the latest CUDA + # See + # * https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional + # * https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 + run_azuremlcompute.environment.docker.base_dockerfile = r""" +FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 +USER root:root +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update && \ + apt-get install -y wget git-all && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* +# Conda Environment +ENV MINICONDA_VERSION py38_23.3.1-0 +ENV PATH /opt/miniconda/bin:$PATH +ENV CONDA_PACKAGE 23.5.0 +RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \ + bash /tmp/miniconda.sh -bf -p /opt/miniconda && \ + conda install conda=${CONDA_PACKAGE} -y && \ + conda update --all -c conda-forge -y && \ + conda clean -ay && \ + rm -rf /opt/miniconda/pkgs && \ + rm /tmp/miniconda.sh && \ + find / -type d -name __pycache__ | xargs rm -rf +""" # Use conda_dependencies.yml to create a conda environment in # the Docker image for execution @@ -195,6 +224,7 @@ def create_run_config( # install recommenders reco_extras = "dev" + conda_dep.add_conda_package("anaconda::git") if add_gpu_dependencies and add_spark_dependencies: conda_dep.add_channel("conda-forge") conda_dep.add_conda_package(conda_pkg_jdk) @@ -326,13 +356,6 @@ def create_arg_parser(): default="STANDARD_D3_V2", help="Set the size of the VM either STANDARD_D3_V2", ) - # cpu or gpu - parser.add_argument( - "--dockerproc", - action="store", - default="cpu", - help="Base image used in docker container", - ) # Azure subscription id, when used in a pipeline, it is stored in keyvault parser.add_argument( "--subid", action="store", default="123456", help="Azure Subscription ID" @@ -421,16 +444,6 @@ def create_arg_parser(): logger = logging.getLogger("submit_groupwise_azureml_pytest.py") args = create_arg_parser() - - if args.dockerproc == "cpu": - from azureml.core.runconfig import DEFAULT_CPU_IMAGE - - docker_proc_type = DEFAULT_CPU_IMAGE - else: - from azureml.core.runconfig import DEFAULT_GPU_IMAGE - - docker_proc_type = DEFAULT_GPU_IMAGE - cli_auth = AzureCliAuthentication() workspace = setup_workspace( @@ -450,7 +463,6 @@ def create_arg_parser(): run_config = create_run_config( cpu_cluster=cpu_cluster, - docker_proc_type=docker_proc_type, add_gpu_dependencies=args.add_gpu_dependencies, add_spark_dependencies=args.add_spark_dependencies, conda_pkg_jdk=args.conda_pkg_jdk, diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py index 6c44411fe..f05e27a9f 100644 --- a/tests/ci/azureml_tests/test_groups.py +++ b/tests/ci/azureml_tests/test_groups.py @@ -69,23 +69,23 @@ "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_FFM_iterator", # 0.74s "tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_news_iterator", # 3.04s # - "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn", # 6.03s # FIXME: Issue with TF version > 2.10.1 See #2018 - "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional", # 19.45s # FIXME: Issue with TF version > 2.10.1 See #2018 + "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn", # 6.03s + "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional", # 19.45s # - "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum", # 27.23s # FIXME: Issue with TF version > 2.10.1 See #2018 + # "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum", # 27.23s # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018 # "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_dkn", # 187.20s "tests/functional/examples/test_notebooks_gpu.py::test_dkn_quickstart_functional", # 1167.93s # - "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional", # 175.00s # FIXME: Issue with TF version > 2.10.1 See #2018 - "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec", # 346.72s # FIXME: Issue with TF version > 2.10.1 See #2018 + "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional", # 175.00s + "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec", # 346.72s ], "group_gpu_002": [ # Total group time: 1896.76s "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_xdeepfm", # 3.10s # FIXME: https://github.com/microsoft/recommenders/issues/1883 # "tests/smoke/examples/test_notebooks_gpu.py::test_xdeepfm_smoke", # 77.93s - "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional", # FIXME: Issue with TF version > 2.10.1 See #2018 + "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional", # "tests/smoke/examples/test_notebooks_gpu.py::test_cornac_bivae_smoke", # 67.84s "tests/functional/examples/test_notebooks_gpu.py::test_cornac_bivae_functional", # 453.21s @@ -426,12 +426,12 @@ ], "group_gpu_002": [ # Total group time: "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) - "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018 + # "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018 "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition", "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition", - "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018 + "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition", - "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Issue with TF version > 2.10.1 See #2018 + # "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Disabled due to the issue with TF version > 2.10.1 See #2018 "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition", "tests/unit/recommenders/models/test_deeprec_utils.py::test_prepare_hparams", "tests/unit/recommenders/models/test_deeprec_utils.py::test_load_yaml_file", @@ -449,7 +449,7 @@ "group_notebooks_gpu_002": [ # Total group time: 241.15s "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) "tests/unit/examples/test_notebooks_gpu.py::test_wide_deep", - "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm", # FIXME: Issue with TF version > 2.10.1 See #2018 + "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm", "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", ], } diff --git a/tests/security/test_dependency_security.py b/tests/security/test_dependency_security.py index 82a4f0596..fccb0b22b 100644 --- a/tests/security/test_dependency_security.py +++ b/tests/security/test_dependency_security.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd +from packaging.version import Version + try: import tensorflow as tf import torch @@ -16,17 +18,17 @@ def test_requests(): # Security issue: https://github.com/psf/requests/releases/tag/v2.31.0 - assert requests.__version__ >= "2.31.0" + assert Version(requests.__version__) >= Version("2.31.0") def test_numpy(): # Security issue: https://github.com/advisories/GHSA-frgw-fgh6-9g52 - assert np.__version__ >= "1.13.3" + assert Version(np.__version__) >= Version("1.13.3") def test_pandas(): # Security issue: https://github.com/advisories/GHSA-cmm9-mgm5-9r42 - assert pd.__version__ >= "1.0.3" + assert Version(pd.__version__) >= Version("1.0.3") @pytest.mark.gpu @@ -34,10 +36,10 @@ def test_tensorflow(): # Security issue: https://github.com/advisories/GHSA-w5gh-2wr2-pm6g # Security issue: https://github.com/advisories/GHSA-r6jx-9g48-2r5r # Security issue: https://github.com/advisories/GHSA-xxcj-rhqg-m46g - assert tf.__version__ >= "2.8.4" + assert Version(tf.__version__) >= Version("2.8.4") @pytest.mark.gpu def test_torch(): # Security issue: https://github.com/advisories/GHSA-47fc-vmwq-366v - assert torch.__version__ >= "1.13.1" + assert Version(torch.__version__) >= Version("1.13.1") From 15fbf90eb993b17f592d8c980dece4de1bbdb2ad Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 22 Feb 2024 11:37:49 +0800 Subject: [PATCH 39/74] Pin pip=20.1.1 --- tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 86d0c80ab..af8b22b19 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -197,12 +197,14 @@ def create_run_config( apt-get clean -y && \ rm -rf /var/lib/apt/lists/* # Conda Environment +# Pin pip=20.1.1 due to the issue: No module named 'ruamel' +# See https://learn.microsoft.com/en-us/python/api/overview/azure/ml/install?view=azure-ml-py#troubleshooting ENV MINICONDA_VERSION py38_23.3.1-0 ENV PATH /opt/miniconda/bin:$PATH ENV CONDA_PACKAGE 23.5.0 RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \ bash /tmp/miniconda.sh -bf -p /opt/miniconda && \ - conda install conda=${CONDA_PACKAGE} -y && \ + conda install -y conda=${CONDA_PACKAGE} python=3.8 pip=20.1.1 && \ conda update --all -c conda-forge -y && \ conda clean -ay && \ rm -rf /opt/miniconda/pkgs && \ From a7f8346b3ef4842c7e6c7056f3e62e8141922845 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 22 Feb 2024 13:21:34 +0800 Subject: [PATCH 40/74] Update dep versions --- setup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index f3dd7854f..f6875cdba 100644 --- a/setup.py +++ b/setup.py @@ -27,19 +27,19 @@ version += ".post" + str(int(time.time())) install_requires = [ - "pandas>1.5.2,<1.6", # requires numpy + "pandas>1.5.3,<3.0.0", # requires numpy "scikit-learn>=1.1.3,<2", # requires scipy "numba>=0.57.0,<1", "lightfm>=1.17,<2", - "lightgbm>=3.3.2,<5", + "lightgbm>=4.0.0,<5", "memory-profiler>=0.61.0,<1", "nltk>=3.8.1,<4", # requires tqdm - "seaborn>=0.12.0,<1", # requires matplotlib + "seaborn>=0.13.0,<1", # requires matplotlib "transformers>=4.27.0,<5", # requires pyyaml, tqdm "category-encoders>=2.6.0,<3", "jinja2>=3.1.0,<3.2", "cornac>=1.15.2,<2", # requires tqdm - "retrying>=1.3.4", + "retrying>=1.3.4,<2", "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets "pandera[strategies]>=0.15.0;python_version>='3.9'", "scikit-surprise>=1.1.3", @@ -51,22 +51,22 @@ # shared dependencies extras_require = { "gpu": [ - "nvidia-ml-py>=11.510.69", + "nvidia-ml-py>=11.525.84", "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3", - "tf-slim>=1.1.0", - "torch>=2.0.1", + "tf-slim>=1.1.0", # No python_requires in its setup.py + "torch>=2.0.1,<3", "fastai>=2.7.11,<3", ], "spark": [ "pyarrow>=10.0.1", - "pyspark>=3.0.1,<=3.4.0", + "pyspark>=3.3.0,<=4", ], "dev": [ - "black>=23.3.0,<24", + "black>=23.3.0", "pytest>=7.2.1", "pytest-cov>=4.1.0", "pytest-mock>=3.10.0", # for access to mock fixtures in pytest - "packaging>=20.9", # for version comparison in test_dependency_security.py + "packaging>=22.0", # for version comparison in test_dependency_security.py ], } # For the brave of heart From 9f9c81531b86211af0227e166851821959c0486b Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 22 Feb 2024 14:53:47 +0800 Subject: [PATCH 41/74] Fix pandas import --- setup.py | 2 +- tests/unit/recommenders/evaluation/test_spark_evaluation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f6875cdba..cc784959f 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ version += ".post" + str(int(time.time())) install_requires = [ - "pandas>1.5.3,<3.0.0", # requires numpy + "pandas>2.0.0,<3.0.0", # requires numpy "scikit-learn>=1.1.3,<2", # requires scipy "numba>=0.57.0,<1", "lightfm>=1.17,<2", diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py index 278a2e287..55c064e8b 100644 --- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py @@ -5,7 +5,7 @@ import pytest import numpy as np import pandas as pd -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from recommenders.evaluation.python_evaluation import ( precision_at_k, From 2fdf5901aaaabf59c687163e46dbfe8ff86826c7 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 22 Feb 2024 20:30:38 +0800 Subject: [PATCH 42/74] Set scipy <1.11.0 and sort dependencies alphabetically --- setup.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index cc784959f..7a558ca73 100644 --- a/setup.py +++ b/setup.py @@ -27,35 +27,36 @@ version += ".post" + str(int(time.time())) install_requires = [ - "pandas>2.0.0,<3.0.0", # requires numpy - "scikit-learn>=1.1.3,<2", # requires scipy - "numba>=0.57.0,<1", + "category-encoders>=2.6.0,<3", + "cornac>=1.15.2,<2", # requires tqdm + "hyperopt>=0.2.7,<1", + "jinja2>=3.1.0,<3.2", "lightfm>=1.17,<2", "lightgbm>=4.0.0,<5", + "locust>=2.12.2,<3", "memory-profiler>=0.61.0,<1", "nltk>=3.8.1,<4", # requires tqdm - "seaborn>=0.13.0,<1", # requires matplotlib - "transformers>=4.27.0,<5", # requires pyyaml, tqdm - "category-encoders>=2.6.0,<3", - "jinja2>=3.1.0,<3.2", - "cornac>=1.15.2,<2", # requires tqdm - "retrying>=1.3.4,<2", + "notebook>=7.0.0,<8", # requires jupyter, ipykernel + "numba>=0.57.0,<1", + "pandas>2.0.0,<3.0.0", # requires numpy "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets "pandera[strategies]>=0.15.0;python_version>='3.9'", + "retrying>=1.3.4,<2", + "scikit-learn>=1.1.3,<2", # requires scipy "scikit-surprise>=1.1.3", - "hyperopt>=0.2.7,<1", - "notebook>=7.0.0,<8", # requires jupyter, ipykernel - "locust>=2.12.2,<3", + "scipy>=1.7.2,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed + "seaborn>=0.13.0,<1", # requires matplotlib + "transformers>=4.27.0,<5", # requires pyyaml, tqdm ] # shared dependencies extras_require = { "gpu": [ + "fastai>=2.7.11,<3", "nvidia-ml-py>=11.525.84", "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3", "tf-slim>=1.1.0", # No python_requires in its setup.py "torch>=2.0.1,<3", - "fastai>=2.7.11,<3", ], "spark": [ "pyarrow>=10.0.1", @@ -63,10 +64,10 @@ ], "dev": [ "black>=23.3.0", + "packaging>=22.0", # for version comparison in test_dependency_security.py "pytest>=7.2.1", "pytest-cov>=4.1.0", "pytest-mock>=3.10.0", # for access to mock fixtures in pytest - "packaging>=22.0", # for version comparison in test_dependency_security.py ], } # For the brave of heart From b2fef7a9d4705200e9be8c89a7290b1d36204acb Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 22 Feb 2024 22:04:48 +0800 Subject: [PATCH 43/74] Fix error caused by changes in scikit-learn --- recommenders/models/tfidf/tfidf_utils.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/recommenders/models/tfidf/tfidf_utils.py b/recommenders/models/tfidf/tfidf_utils.py index 24575121c..6a6d22389 100644 --- a/recommenders/models/tfidf/tfidf_utils.py +++ b/recommenders/models/tfidf/tfidf_utils.py @@ -115,7 +115,7 @@ def clean_dataframe(self, df, cols_to_clean, new_col_name="cleaned_text"): return df def tokenize_text( - self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0 + self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0.0 ): """Tokenize the input text. For more details on the TfidfVectorizer, see https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html @@ -124,7 +124,7 @@ def tokenize_text( df_clean (pandas.DataFrame): Dataframe with cleaned text in the new column. text_col (str): Name of column containing the cleaned text. ngram_range (tuple of int): The lower and upper boundary of the range of n-values for different n-grams to be extracted. - min_df (int): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. + min_df (float): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. Returns: TfidfVectorizer, pandas.Series: diff --git a/setup.py b/setup.py index 7a558ca73..72fc94819 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets "pandera[strategies]>=0.15.0;python_version>='3.9'", "retrying>=1.3.4,<2", - "scikit-learn>=1.1.3,<2", # requires scipy + "scikit-learn>=1.2.0,<2", # requires scipy, and introduce breaking change affects feature_extraction.text.TfidfVectorizer.min_df "scikit-surprise>=1.1.3", "scipy>=1.7.2,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed "seaborn>=0.13.0,<1", # requires matplotlib From 9a225d5464cc2bc9961dce41d4014dba8574deb8 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 11:01:33 +0800 Subject: [PATCH 44/74] Replace CollabDataBunch with CollabDataLoaders --- examples/00_quick_start/fastai_movielens.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index 517673178..f32c93be6 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -46,7 +46,7 @@ "import fastai\n", "from tempfile import TemporaryDirectory\n", "\n", - "from fastai.collab import collab_learner, CollabDataBunch, load_learner\n", + "from fastai.collab import collab_learner, CollabDataLoaders, load_learner\n", "\n", "from recommenders.utils.constants import (\n", " DEFAULT_USER_COL as USER, \n", @@ -258,11 +258,11 @@ "outputs": [], "source": [ "with Timer() as preprocess_time:\n", - " data = CollabDataBunch.from_df(train_valid_df, \n", - " user_name=USER, \n", - " item_name=ITEM, \n", - " rating_name=RATING, \n", - " valid_pct=0)\n" + " data = CollabDataLoaders.from_df(train_valid_df, \n", + " user_name=USER, \n", + " item_name=ITEM, \n", + " rating_name=RATING, \n", + " valid_pct=0)\n" ] }, { From 5484d9bced6f1eae0d0e467a28f0db081550e7b0 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 12:17:37 +0800 Subject: [PATCH 45/74] Replace max_lr with lr_max --- examples/00_quick_start/fastai_movielens.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index f32c93be6..227effc97 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -432,7 +432,7 @@ ], "source": [ "with Timer() as train_time:\n", - " learn.fit_one_cycle(EPOCHS, max_lr=5e-3)\n", + " learn.fit_one_cycle(EPOCHS, lr_max=5e-3)\n", "\n", "print(\"Took {} seconds for training.\".format(train_time))" ] From 69444044e4beda8b8d86d3c38decbed58172dfe3 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 15:02:15 +0800 Subject: [PATCH 46/74] Correct usage of load_learner in fastai --- examples/00_quick_start/fastai_movielens.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index 227effc97..0417e59eb 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -478,7 +478,7 @@ "metadata": {}, "outputs": [], "source": [ - "learner = load_learner(tmp.name, \"movielens_model.pkl\")" + "learner = load_learner(model_path)" ] }, { From 0e69106333c11b84a7d69e159191a514f3996b71 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 17:24:23 +0800 Subject: [PATCH 47/74] Replace learner.data.train_ds.x.classes.values() with learner.dls.classes.values() --- examples/00_quick_start/fastai_movielens.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index 0417e59eb..b475d09cf 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -494,7 +494,7 @@ "metadata": {}, "outputs": [], "source": [ - "total_users, total_items = learner.data.train_ds.x.classes.values()\n", + "total_users, total_items = learner.dls.classes.values()\n", "total_items = total_items[1:]\n", "total_users = total_users[1:]" ] From 22ef9b9902525053b82c586cb2bbd73f6511696d Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 18:22:38 +0800 Subject: [PATCH 48/74] Replace learner.data.train_ds.x.classes.values() with learner.dls.classes.values() in fastai_utils --- recommenders/models/fastai/fastai_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index ab756c7e8..60500b1e7 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -51,7 +51,7 @@ def score( pandas.DataFrame: Result of recommendation """ # replace values not known to the model with NaN - total_users, total_items = learner.data.train_ds.x.classes.values() + total_users, total_items = learner.dls.classes.values() test_df.loc[~test_df[user_col].isin(total_users), user_col] = np.nan test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan From a5fea78dd48952193d70abd221ac0c1b1709e572 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 20:02:16 +0800 Subject: [PATCH 49/74] Upgrade fastai code --- recommenders/models/fastai/fastai_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 60500b1e7..ccc7232d0 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -6,6 +6,7 @@ import pandas as pd import fastai import fastprogress +import torch from fastprogress.fastprogress import force_console_behavior from recommenders.utils import constants as cc @@ -56,11 +57,11 @@ def score( test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan # map ids to embedding ids - u = learner.get_idx(test_df[user_col], is_item=False) - m = learner.get_idx(test_df[item_col], is_item=True) + u = learner._get_idx(test_df[user_col], is_item=False) + m = learner._get_idx(test_df[item_col], is_item=True) # score the pytorch model - pred = learner.model.forward(u, m) + pred = learner.model.forward(torch.column_stack(u, m)) scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} ) From dccac172e9692f42b51cf970d1da2d4555d3ec9d Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 23 Feb 2024 21:19:56 +0800 Subject: [PATCH 50/74] Correct the usage of torch.column_stack() --- recommenders/models/fastai/fastai_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index ccc7232d0..3188cb460 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -61,7 +61,7 @@ def score( m = learner._get_idx(test_df[item_col], is_item=True) # score the pytorch model - pred = learner.model.forward(torch.column_stack(u, m)) + pred = learner.model.forward(torch.column_stack((u, m))) scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} ) From d3b0ad7e71b2f88981cb30281dd0b7eab4eb8de7 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 24 Feb 2024 09:16:59 +0800 Subject: [PATCH 51/74] Correct conversion from tensor to numpy --- recommenders/models/fastai/fastai_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 3188cb460..e5dc502aa 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -61,7 +61,7 @@ def score( m = learner._get_idx(test_df[item_col], is_item=True) # score the pytorch model - pred = learner.model.forward(torch.column_stack((u, m))) + pred = learner.model.forward(torch.column_stack((u, m))).detach().numpy() scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} ) From d249bfe6aa5814d7219160ce0b850dd7b5dbe15b Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 24 Feb 2024 11:33:27 +0800 Subject: [PATCH 52/74] Remove duplicate dependencies jinja2 and packaging required other packages --- setup.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 72fc94819..57d22117e 100644 --- a/setup.py +++ b/setup.py @@ -27,16 +27,15 @@ version += ".post" + str(int(time.time())) install_requires = [ - "category-encoders>=2.6.0,<3", - "cornac>=1.15.2,<2", # requires tqdm + "category-encoders>=2.6.0,<3", # requires packaging + "cornac>=1.15.2,<2", # requires packaging, tqdm "hyperopt>=0.2.7,<1", - "jinja2>=3.1.0,<3.2", - "lightfm>=1.17,<2", + "lightfm>=1.17,<2", # requires requests "lightgbm>=4.0.0,<5", - "locust>=2.12.2,<3", + "locust>=2.12.2,<3", # requires jinja2 "memory-profiler>=0.61.0,<1", "nltk>=3.8.1,<4", # requires tqdm - "notebook>=7.0.0,<8", # requires jupyter, ipykernel + "notebook>=7.0.0,<8", # requires ipykernel, jinja2, jupyter, nbconvert, nbformat, packaging, requests "numba>=0.57.0,<1", "pandas>2.0.0,<3.0.0", # requires numpy "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'", # For generating fake datasets @@ -44,9 +43,9 @@ "retrying>=1.3.4,<2", "scikit-learn>=1.2.0,<2", # requires scipy, and introduce breaking change affects feature_extraction.text.TfidfVectorizer.min_df "scikit-surprise>=1.1.3", - "scipy>=1.7.2,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed - "seaborn>=0.13.0,<1", # requires matplotlib - "transformers>=4.27.0,<5", # requires pyyaml, tqdm + "scipy>=1.10.1,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed + "seaborn>=0.13.0,<1", # requires matplotlib, packaging + "transformers>=4.27.0,<5", # requires packaging, pyyaml, requests, tqdm ] # shared dependencies @@ -64,7 +63,6 @@ ], "dev": [ "black>=23.3.0", - "packaging>=22.0", # for version comparison in test_dependency_security.py "pytest>=7.2.1", "pytest-cov>=4.1.0", "pytest-mock>=3.10.0", # for access to mock fixtures in pytest From 547ab6663628742efa5bab7924f964a35f9a04fb Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 24 Feb 2024 11:36:19 +0800 Subject: [PATCH 53/74] Try Python 3.11 --- .github/workflows/azureml-cpu-nightly.yml | 2 +- .github/workflows/azureml-gpu-nightly.yml | 2 +- .github/workflows/azureml-spark-nightly.yml | 2 +- .github/workflows/azureml-unit-tests.yml | 2 +- .github/workflows/sarplus.yml | 2 +- setup.py | 1 + 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index f6c3837b8..93e414564 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index 0f122bda5..3b9f6d6b4 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 30f88a52d..8f28be6f2 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index 385bf005a..b39268318 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -56,7 +56,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index 0e743d792..90d03fef6 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/setup.py b/setup.py index 57d22117e..9d606c8dc 100644 --- a/setup.py +++ b/setup.py @@ -110,6 +110,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: POSIX :: Linux", ], extras_require=extras_require, From ed3b632ef0f1406204ce080cf99b320c3e8680b4 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Sat, 24 Feb 2024 13:41:29 +0800 Subject: [PATCH 54/74] Allow Python 3.11 for sarplus --- contrib/sarplus/python/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/sarplus/python/setup.py b/contrib/sarplus/python/setup.py index 4009ec751..f755f5310 100644 --- a/contrib/sarplus/python/setup.py +++ b/contrib/sarplus/python/setup.py @@ -42,6 +42,7 @@ def __str__(self): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Mathematics", @@ -49,7 +50,7 @@ def __str__(self): setup_requires=["pytest-runner"], install_requires=DEPENDENCIES, tests_require=["pytest"], - python_requires=">=3.6,<3.11", + python_requires=">=3.6,<3.12", packages=["pysarplus"], package_data={"": ["VERSION"]}, ext_modules=[ From c8d90f755b08a18ec3f03554c6b92feadc7ec50b Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Sat, 24 Feb 2024 19:23:32 +0100 Subject: [PATCH 55/74] Rerun and fix fastai movielens notebook Signed-off-by: miguelgfierro --- .../00_quick_start/fastai_movielens.ipynb | 353 ++++++++++-------- 1 file changed, 205 insertions(+), 148 deletions(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index b475d09cf..944b92623 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -27,17 +27,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "System version: 3.6.11 | packaged by conda-forge | (default, Aug 5 2020, 20:09:42) \n", - "[GCC 7.5.0]\n", - "Pandas version: 0.25.3\n", - "Fast AI version: 1.0.46\n", - "Torch version: 1.4.0\n", - "Cuda Available: False\n", + "System version: 3.9.16 (main, May 15 2023, 23:46:34) \n", + "[GCC 11.2.0]\n", + "Pandas version: 1.5.3\n", + "Fast AI version: 2.7.11\n", + "Torch version: 1.13.1+cu117\n", + "CUDA Available: True\n", "CuDNN Enabled: True\n" ] } ], "source": [ + "# Suppress all warnings\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", "import os\n", "import sys\n", "import numpy as np\n", @@ -67,7 +71,7 @@ "print(\"Pandas version: {}\".format(pd.__version__))\n", "print(\"Fast AI version: {}\".format(fastai.__version__))\n", "print(\"Torch version: {}\".format(torch.__version__))\n", - "print(\"Cuda Available: {}\".format(torch.cuda.is_available()))\n", + "print(\"CUDA Available: {}\".format(torch.cuda.is_available()))\n", "print(\"CuDNN Enabled: {}\".format(torch.backends.cudnn.enabled))" ] }, @@ -80,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "tags": [ "parameters" @@ -101,14 +105,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 4.81k/4.81k [00:01<00:00, 4.49kKB/s]\n" + "100%|██████████| 4.81k/4.81k [00:01<00:00, 3.52kKB/s]\n" ] }, { @@ -132,10 +136,10 @@ " \n", " \n", " \n", - " UserId\n", - " MovieId\n", - " Rating\n", - " Timestamp\n", + " userID\n", + " itemID\n", + " rating\n", + " timestamp\n", " \n", " \n", " \n", @@ -179,15 +183,15 @@ "" ], "text/plain": [ - " UserId MovieId Rating Timestamp\n", - "0 196 242 3.0 881250949\n", - "1 186 302 3.0 891717742\n", - "2 22 377 1.0 878887116\n", - "3 244 51 2.0 880606923\n", - "4 166 346 1.0 886397596" + " userID itemID rating timestamp\n", + "0 196 242 3.0 881250949\n", + "1 186 302 3.0 891717742\n", + "2 22 377 1.0 878887116\n", + "3 244 51 2.0 880606923\n", + "4 166 346 1.0 886397596" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -207,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -224,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -276,37 +280,73 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", "
UserIdMovieIdtargetuserIDitemIDrating
54315553.001048401.0
909455.018811122.0
27465063.0
29251531042574.0
30310921.0451115274.0
54979467633.0
64078693.0
72919244.0
8109944.0
9825973.0
" ], @@ -369,6 +409,33 @@ "execution_count": 10, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -383,34 +450,34 @@ " \n", " \n", " \n", + " 0\n", + " 0.961789\n", + " None\n", + " 00:09\n", + " \n", + " \n", " 1\n", - " 0.985993\n", - " \n", - " 00:05\n", + " 0.863359\n", + " None\n", + " 00:08\n", " \n", " \n", " 2\n", - " 0.885496\n", - " \n", - " 00:05\n", + " 0.750853\n", + " None\n", + " 00:07\n", " \n", " \n", " 3\n", - " 0.777637\n", - " \n", - " 00:05\n", + " 0.637868\n", + " None\n", + " 00:08\n", " \n", " \n", " 4\n", - " 0.628971\n", - " \n", - " 00:05\n", - " \n", - " \n", - " 5\n", - " 0.532328\n", - " \n", - " 00:06\n", + " 0.526907\n", + " None\n", + " 00:09\n", " \n", " \n", "" @@ -426,7 +493,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Took 29.5549 seconds for training.\n" + "Took 51.5260 seconds for training.\n" ] } ], @@ -446,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -456,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -474,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -490,7 +557,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -508,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -525,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -545,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "scrolled": false }, @@ -564,14 +631,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Took 1.9734 seconds for 1511060 predictions.\n" + "Took 5.1570 seconds for 1511060 predictions.\n" ] } ], @@ -595,7 +662,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -606,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -617,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -628,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -639,27 +706,27 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model:\tCollabLearner\n", - "Top K:\t10\n", - "MAP:\t0.026115\n", - "NDCG:\t0.155065\n", - "Precision@K:\t0.136691\n", - "Recall@K:\t0.054940\n" + "Model:\t\tLearner\n", + "Top K:\t\t10\n", + "MAP:\t\t0.024119\n", + "NDCG:\t\t0.152808\n", + "Precision@K:\t0.139130\n", + "Recall@K:\t0.054943\n" ] } ], "source": [ - "print(\"Model:\\t\" + learn.__class__.__name__,\n", - " \"Top K:\\t%d\" % TOP_K,\n", - " \"MAP:\\t%f\" % eval_map,\n", - " \"NDCG:\\t%f\" % eval_ndcg,\n", + "print(\"Model:\\t\\t\" + learn.__class__.__name__,\n", + " \"Top K:\\t\\t%d\" % TOP_K,\n", + " \"MAP:\\t\\t%f\" % eval_map,\n", + " \"NDCG:\\t\\t%f\" % eval_ndcg,\n", " \"Precision@K:\\t%f\" % eval_precision,\n", " \"Recall@K:\\t%f\" % eval_recall, sep='\\n')" ] @@ -673,7 +740,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -693,18 +760,18 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model:\tCollabLearner\n", - "RMSE:\t0.902379\n", - "MAE:\t0.712163\n", - "Explained variance:\t0.346523\n", - "R squared:\t0.345672\n" + "Model:\t\t\tLearner\n", + "RMSE:\t\t\t0.904589\n", + "MAE:\t\t\t0.715827\n", + "Explained variance:\t0.356082\n", + "R squared:\t\t0.355173\n" ] } ], @@ -714,36 +781,35 @@ "eval_mae = mae(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n", "eval_exp_var = exp_var(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n", "\n", - "print(\"Model:\\t\" + learn.__class__.__name__,\n", - " \"RMSE:\\t%f\" % eval_rmse,\n", - " \"MAE:\\t%f\" % eval_mae,\n", + "print(\"Model:\\t\\t\\t\" + learn.__class__.__name__,\n", + " \"RMSE:\\t\\t\\t%f\" % eval_rmse,\n", + " \"MAE:\\t\\t\\t%f\" % eval_mae,\n", " \"Explained variance:\\t%f\" % eval_exp_var,\n", - " \"R squared:\\t%f\" % eval_r2, sep='\\n')" + " \"R squared:\\t\\t%f\" % eval_r2, sep='\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "That RMSE is actually quite good when compared to these benchmarks: https://www.librec.net/release/v1.3/example.html" + "That RMSE is competitive in comparison with other models." ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.02611475567509659, + "application/notebook_utils.json+json": { + "data": 0.024118782738867094, "encoder": "json", - "name": "map", - "version": 1 + "name": "map" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "map" @@ -753,15 +819,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.15506533130248687, + "application/notebook_utils.json+json": { + "data": 0.1528081472533914, "encoder": "json", - "name": "ndcg", - "version": 1 + "name": "ndcg" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "ndcg" @@ -771,15 +836,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.13669141039236482, + "application/notebook_utils.json+json": { + "data": 0.13913043478260873, "encoder": "json", - "name": "precision", - "version": 1 + "name": "precision" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "precision" @@ -789,15 +853,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.05493986799753499, + "application/notebook_utils.json+json": { + "data": 0.05494302697544413, "encoder": "json", - "name": "recall", - "version": 1 + "name": "recall" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "recall" @@ -807,15 +870,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.9023793356156464, + "application/notebook_utils.json+json": { + "data": 0.9045892929999733, "encoder": "json", - "name": "rmse", - "version": 1 + "name": "rmse" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "rmse" @@ -825,15 +887,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.7121634655740025, + "application/notebook_utils.json+json": { + "data": 0.7158267242352735, "encoder": "json", - "name": "mae", - "version": 1 + "name": "mae" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "mae" @@ -843,15 +904,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.34652281723228295, + "application/notebook_utils.json+json": { + "data": 0.3560824305444269, "encoder": "json", - "name": "exp_var", - "version": 1 + "name": "exp_var" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "exp_var" @@ -861,15 +921,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.3456716162958503, + "application/notebook_utils.json+json": { + "data": 0.35517333876960555, "encoder": "json", - "name": "rsquared", - "version": 1 + "name": "rsquared" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "rsquared" @@ -879,15 +938,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 29.554921820759773, + "application/notebook_utils.json+json": { + "data": 51.52598460000445, "encoder": "json", - "name": "train_time", - "version": 1 + "name": "train_time" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "train_time" @@ -897,15 +955,14 @@ }, { "data": { - "application/scrapbook.scrap.json+json": { - "data": 1.973397959023714, + "application/notebook_utils.json+json": { + "data": 5.156951100005244, "encoder": "json", - "name": "test_time", - "version": 1 + "name": "test_time" } }, "metadata": { - "scrapbook": { + "notebook_utils": { "data": true, "display": false, "name": "test_time" @@ -930,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -946,9 +1003,9 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "Python (reco_gpu)", + "display_name": "recommenders", "language": "python", - "name": "reco_gpu" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -960,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.11" + "version": "3.9.16" } }, "nbformat": 4, From d9ec1cd1ccd7fb77b45a0c4d43559041539fc6c3 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Sat, 24 Feb 2024 19:32:50 +0100 Subject: [PATCH 56/74] Fixed deprecated attribute in fastai Signed-off-by: miguelgfierro --- recommenders/models/fastai/fastai_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index e5dc502aa..6e805ae17 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -78,7 +78,7 @@ def hide_fastai_progress_bar(): fastprogress.fastprogress.NO_BAR = True fastprogress.fastprogress.WRITER_FN = str master_bar, progress_bar = force_console_behavior() - fastai.basic_train.master_bar, fastai.basic_train.progress_bar = ( + fastai.callback.progress.master_bar, fastai.callback.progress.progress_bar = ( master_bar, progress_bar, ) From fda52655762580e68e500784c307d7110682e119 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 4 Mar 2024 17:38:57 +0100 Subject: [PATCH 57/74] Fixing breaking changes in fastai Signed-off-by: miguelgfierro --- examples/06_benchmarks/benchmark_utils.py | 6 +++--- examples/06_benchmarks/movielens.ipynb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index e28fa6ab7..90bcfb822 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -15,7 +15,7 @@ except ImportError: pass # skip this import if we are not in a Spark environment try: - from fastai.collab import collab_learner, CollabDataBunch + from fastai.collab import collab_learner, CollabDataLoaders except ImportError: pass # skip this import if we are not in a GPU environment @@ -181,7 +181,7 @@ def prepare_training_fastai(train, test): data = train.copy() data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype("str") data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype("str") - data = CollabDataBunch.from_df( + data = CollabDataLoaders.from_df( data, user_name=DEFAULT_USER_COL, item_name=DEFAULT_ITEM_COL, @@ -196,7 +196,7 @@ def train_fastai(params, data): data, n_factors=params["n_factors"], y_range=params["y_range"], wd=params["wd"] ) with Timer() as t: - model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"]) + model.fit_one_cycle(params["epochs"], lr_max=params["lr_max"]) return model, t diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 2f7a857ce..8c8ee6d2f 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -299,7 +299,7 @@ " \"n_factors\": 40, \n", " \"y_range\": [0,5.5], \n", " \"wd\": 1e-1,\n", - " \"max_lr\": 5e-3,\n", + " \"lr_max\": 5e-3,\n", " \"epochs\": 15\n", "}\n", "\n", From ac90e543e0b5c77315a452e69ce1b9efb654349e Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 12 Mar 2024 15:22:23 +0800 Subject: [PATCH 58/74] Upgrade GitHub Action azure/login --- .github/actions/azureml-test/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml index d74b88160..85ae9f84a 100644 --- a/.github/actions/azureml-test/action.yml +++ b/.github/actions/azureml-test/action.yml @@ -76,7 +76,7 @@ runs: shell: bash run: pip install --quiet "azureml-core>1,<2" "azure-cli>2,<3" - name: Log in to Azure - uses: azure/login@v1 + uses: azure/login@v2 with: creds: ${{inputs.AZUREML_TEST_CREDENTIALS}} - name: Install wheel package From 1d0fe7d22431b75840c5e725ec44753431fef70b Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Fri, 15 Mar 2024 11:03:48 +0800 Subject: [PATCH 59/74] Update fastai usage in utils --- examples/06_benchmarks/benchmark_utils.py | 6 +++--- recommenders/models/fastai/fastai_utils.py | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index 90bcfb822..c62518838 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -221,9 +221,9 @@ def predict_fastai(model, test): def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: - total_users, total_items = model.data.train_ds.x.classes.values() - total_items = total_items[1:] - total_users = total_users[1:] + total_users, total_items = model.dls.classes.values() + total_items = np.array(total_items[1:]) + total_users = np.array(total_users[1:]) test_users = test[DEFAULT_USER_COL].unique() test_users = np.intersect1d(test_users, total_users) users_items = cartesian_product(test_users, total_items) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 6e805ae17..44705924c 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -61,7 +61,10 @@ def score( m = learner._get_idx(test_df[item_col], is_item=True) # score the pytorch model - pred = learner.model.forward(torch.column_stack((u, m))).detach().numpy() + x = torch.column_stack((u, m)) + if torch.cuda.is_available() + x = x.to('cuda') + pred = learner.model.forward(x).detach().cpu().numpy() scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} ) From 0740b1687a088d4cc2e13a5ea1aba43b9ef2877f Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Fri, 15 Mar 2024 10:13:43 -0700 Subject: [PATCH 60/74] change deprecated azureml option (#2069) Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- .../submit_groupwise_azureml_pytest.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index af8b22b19..adda7e172 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -29,11 +29,12 @@ Example: Usually, this script is run by a DevOps pipeline. It can also be run from cmd line. - >>> python tests/ci/refac.py --clustername 'cluster-d3-v2' - --subid '12345678-9012-3456-abcd-123456789012' - --pr '666' - --reponame 'Recommenders' - --branch 'staging' + >>> python tests/ci/submit_groupwise_azureml_pytest.py \ + --clustername 'cluster-d3-v2' \ + --subid '12345678-9012-3456-abcd-123456789012' \ + --pr '666' \ + --reponame 'Recommenders' \ + --branch 'staging' """ import argparse import logging @@ -41,7 +42,7 @@ from azureml.core.authentication import AzureCliAuthentication from azureml.core import Workspace from azureml.core import Experiment -from azureml.core.runconfig import RunConfiguration +from azureml.core.runconfig import RunConfiguration, DockerConfiguration from azureml.core.conda_dependencies import CondaDependencies from azureml.core.script_run_config import ScriptRunConfig from azureml.core.compute import ComputeTarget, AmlCompute @@ -175,7 +176,6 @@ def create_run_config( run_azuremlcompute = RunConfiguration() run_azuremlcompute.target = cpu_cluster - run_azuremlcompute.environment.docker.enabled = True if not add_gpu_dependencies: # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 run_azuremlcompute.environment.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" @@ -292,8 +292,10 @@ def submit_experiment_to_azureml( source_directory=".", script=test, run_config=run_config, + docker_runtime_config=DockerConfiguration(use_docker=True), arguments=arguments, ) + run = experiment.submit(script_run_config) # waits only for configuration to complete run.wait_for_completion(show_output=True, wait_post_processing=True) From 89cc98514badca994b8e61cdaca895aa0ba7b54d Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Fri, 15 Mar 2024 11:35:29 -0700 Subject: [PATCH 61/74] Update SP creation doc Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- tests/README.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/README.md b/tests/README.md index 813b433ba..650990c9e 100644 --- a/tests/README.md +++ b/tests/README.md @@ -216,8 +216,23 @@ Then, follow the steps below to create the AzureML infrastructure: 3. Add the subscription ID to GitHub action secrets [here](https://github.com/microsoft/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value. 4. Make sure you have installed [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli), and that you are logged in: `az login`. 5. Select your subscription: `az account set -s $AZURE_SUBSCRIPTION_ID`. -5. Create a Service Principal: `az ad sp create-for-rbac --name "recommenders-cicd" --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --sdk-auth`. -6. Add the output from the Service Principal (should be a JSON blob) as an action secret `AZUREML_TEST_CREDENTIALS`. +5. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal: + ``` + { + "clientId": ..., + "clientSecret": ..., + "subscriptionId": ..., + "tenantId": ..., + "activeDirectoryEndpointUrl": "https://login.microsoftonline.com", + "resourceManagerEndpointUrl": "https://management.azure.com/", + "activeDirectoryGraphResourceId": "https://graph.windows.net/", + "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/", + "galleryEndpointUrl": "https://gallery.azure.com/", + "managementEndpointUrl": "https://management.core.windows.net/" + } + ``` +6. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**. + ## How to execute tests in your local environment From 55433c56241bb7b3d53faf7ee1066b6e12f18092 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Sat, 16 Mar 2024 20:42:36 +0100 Subject: [PATCH 62/74] :memo: Signed-off-by: miguelgfierro --- tests/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/README.md b/tests/README.md index 650990c9e..a6068daec 100644 --- a/tests/README.md +++ b/tests/README.md @@ -213,16 +213,16 @@ Then, follow the steps below to create the AzureML infrastructure: 2. Create two new clusters: `cpu-cluster` and `gpu-cluster`. Go to compute, then compute cluster, then new. - Select the CPU VM base. Anything above 64GB of RAM, and 8 cores should be fine. - Select the GPU VM base. Anything above 56GB of RAM, and 6 cores, and an NVIDIA K80 should be fine. -3. Add the subscription ID to GitHub action secrets [here](https://github.com/microsoft/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value. +3. Add the subscription ID to GitHub action secrets [here](https://github.com/recommenders-team/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value. 4. Make sure you have installed [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli), and that you are logged in: `az login`. 5. Select your subscription: `az account set -s $AZURE_SUBSCRIPTION_ID`. -5. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal: +6. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal: ``` { - "clientId": ..., - "clientSecret": ..., - "subscriptionId": ..., - "tenantId": ..., + "clientId": "XXXXXXXXXXXXXXXXXXXXX", + "clientSecret": "XXXXXXXXXXXXXXXXXXXXX", + "subscriptionId": "XXXXXXXXXXXXXXXXXXXXX", + "tenantId": "XXXXXXXXXXXXXXXXXXXXX", "activeDirectoryEndpointUrl": "https://login.microsoftonline.com", "resourceManagerEndpointUrl": "https://management.azure.com/", "activeDirectoryGraphResourceId": "https://graph.windows.net/", @@ -231,7 +231,7 @@ Then, follow the steps below to create the AzureML infrastructure: "managementEndpointUrl": "https://management.core.windows.net/" } ``` -6. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**. +7. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**. ## How to execute tests in your local environment From 730a5e98d7ad8b2adf5d6a14ce64984c3041a251 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 18 Mar 2024 20:16:32 +0100 Subject: [PATCH 63/74] Fixing TF to < 2.16 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d606c8dc..89d3af503 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "gpu": [ "fastai>=2.7.11,<3", "nvidia-ml-py>=11.525.84", - "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3", + "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16", "tf-slim>=1.1.0", # No python_requires in its setup.py "torch>=2.0.1,<3", ], From 657531ac355d87cc319a849cdd0c7fa18a3ae552 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 18 Mar 2024 21:06:05 +0100 Subject: [PATCH 64/74] :bug: Signed-off-by: miguelgfierro --- recommenders/models/fastai/fastai_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 44705924c..0742857f3 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -62,8 +62,8 @@ def score( # score the pytorch model x = torch.column_stack((u, m)) - if torch.cuda.is_available() - x = x.to('cuda') + if torch.cuda.is_available(): + x = x.to("cuda") pred = learner.model.forward(x).detach().cpu().numpy() scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} From e99b8d04f46861daf60181b956a4a42714b52957 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 18 Mar 2024 22:18:12 +0100 Subject: [PATCH 65/74] model to CUDA as well as data Signed-off-by: miguelgfierro --- recommenders/models/fastai/fastai_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 0742857f3..f6b6a8986 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -62,17 +62,22 @@ def score( # score the pytorch model x = torch.column_stack((u, m)) + if torch.cuda.is_available(): x = x.to("cuda") + learner.model = learner.model.to("cuda") + pred = learner.model.forward(x).detach().cpu().numpy() scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} ) scores = scores.sort_values([user_col, prediction_col], ascending=[True, False]) + if top_k is not None: top_scores = scores.groupby(user_col).head(top_k).reset_index(drop=True) else: top_scores = scores + return top_scores From 03554deff45d0d18b90c11ea64136f67147e892d Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 19 Mar 2024 09:25:35 +0800 Subject: [PATCH 66/74] Set tensorflow <= 2.15.0 Signed-off-by: Simon Zhao --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d606c8dc..75b44f775 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "gpu": [ "fastai>=2.7.11,<3", "nvidia-ml-py>=11.525.84", - "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3", + "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<=2.15.0", "tf-slim>=1.1.0", # No python_requires in its setup.py "torch>=2.0.1,<3", ], From 47281c8a31db03959345c391c8a756ea2ec72475 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 19 Mar 2024 10:30:32 +0800 Subject: [PATCH 67/74] Add missing colon Signed-off-by: Simon Zhao --- recommenders/models/fastai/fastai_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 44705924c..61b576f3b 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -62,7 +62,7 @@ def score( # score the pytorch model x = torch.column_stack((u, m)) - if torch.cuda.is_available() + if torch.cuda.is_available(): x = x.to('cuda') pred = learner.model.forward(x).detach().cpu().numpy() scores = pd.DataFrame( From b255fae97203ee0f82bbc5faba1f971d07a25f80 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 19 Mar 2024 07:14:39 +0100 Subject: [PATCH 68/74] :memo: Signed-off-by: miguelgfierro --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 89d3af503..c5fc49bb8 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "gpu": [ "fastai>=2.7.11,<3", "nvidia-ml-py>=11.525.84", - "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16", + "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16", # Fixed TF due to constant security problems and breaking changes #2073 "tf-slim>=1.1.0", # No python_requires in its setup.py "torch>=2.0.1,<3", ], From 85899cf7f9bad8ad84f405074dbe4bd6acea0371 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 19 Mar 2024 09:22:46 +0100 Subject: [PATCH 69/74] Reducing DKN batch size to 200 Signed-off-by: miguelgfierro --- tests/functional/examples/test_notebooks_gpu.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py index 2007cc1a7..05b53c68e 100644 --- a/tests/functional/examples/test_notebooks_gpu.py +++ b/tests/functional/examples/test_notebooks_gpu.py @@ -247,7 +247,9 @@ def test_wide_deep_functional( os.path.join("tests", "resources", "deeprec", "slirec"), 10, 400, - {"auc": 0.7183}, # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss + { + "auc": 0.7183 + }, # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss 42, ) ], @@ -278,7 +280,7 @@ def test_slirec_quickstart_functional( results = read_notebook(output_notebook) assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL) - + @pytest.mark.gpu @pytest.mark.notebooks @@ -567,7 +569,7 @@ def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(EPOCHS=5, BATCH_SIZE=500), + parameters=dict(EPOCHS=5, BATCH_SIZE=200), ) results = read_notebook(output_notebook) From d8e8ac30e8eca6ff33f4c9a90a1fdfdeb36942be Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 19 Mar 2024 17:17:07 +0800 Subject: [PATCH 70/74] Move learner.model to cuda if cuda is available Signed-off-by: Simon Zhao --- recommenders/models/fastai/fastai_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py index 61b576f3b..062a8e5e7 100644 --- a/recommenders/models/fastai/fastai_utils.py +++ b/recommenders/models/fastai/fastai_utils.py @@ -63,7 +63,8 @@ def score( # score the pytorch model x = torch.column_stack((u, m)) if torch.cuda.is_available(): - x = x.to('cuda') + x = x.to("cuda") + learner.model = learner.model.to("cuda") pred = learner.model.forward(x).detach().cpu().numpy() scores = pd.DataFrame( {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred} From e0495646e38c96f96457d6dbefef159acb1e8869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Dav=C3=B3?= Date: Wed, 20 Mar 2024 14:18:34 +0100 Subject: [PATCH 71/74] Merged two concats into one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Davó --- recommenders/models/rlrmc/RLRMCdataset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py index 7670105b3..673b0fe7c 100644 --- a/recommenders/models/rlrmc/RLRMCdataset.py +++ b/recommenders/models/rlrmc/RLRMCdataset.py @@ -68,8 +68,7 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True): """ # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py # If validation dataset is None - df = train if validation is None else pd.concat([train, validation]) - df = df if test is None else pd.concat([df, test]) + df = pd.concat(filter(lambda x: x is not None, [train, validation, test])) # Reindex user and item index if self.user_idx is None: From 83933448851395ddb6d2a3bb4d4dd4a9ae4d0170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Dav=C3=B3?= Date: Wed, 20 Mar 2024 19:18:33 +0100 Subject: [PATCH 72/74] Further simplified merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Davó --- recommenders/models/rlrmc/RLRMCdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py index 673b0fe7c..4627eebe1 100644 --- a/recommenders/models/rlrmc/RLRMCdataset.py +++ b/recommenders/models/rlrmc/RLRMCdataset.py @@ -68,7 +68,7 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True): """ # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py # If validation dataset is None - df = pd.concat(filter(lambda x: x is not None, [train, validation, test])) + df = pd.concat([train, validation, test]) # Reindex user and item index if self.user_idx is None: From bd1a95f546174317f60b0febe9a8330dbfbe7ad0 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 4 Apr 2024 21:34:04 +0800 Subject: [PATCH 73/74] Catch import error separately for SUMModel Signed-off-by: Simon Zhao --- tests/smoke/recommenders/recommender/test_deeprec_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/smoke/recommenders/recommender/test_deeprec_model.py b/tests/smoke/recommenders/recommender/test_deeprec_model.py index 81e6f589c..860e45bd6 100644 --- a/tests/smoke/recommenders/recommender/test_deeprec_model.py +++ b/tests/smoke/recommenders/recommender/test_deeprec_model.py @@ -20,7 +20,6 @@ from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel - from recommenders.models.deeprec.models.sequential.sum import SUMModel from recommenders.datasets.amazon_reviews import ( download_and_extract, data_preprocessing, @@ -31,6 +30,11 @@ except ImportError: pass # disable error while collecting tests for non-gpu environments +try: + from recommenders.models.deeprec.models.sequential.sum import SUMModel +except ImportError: + pass # disable error while collecting tests for SUMModel + @pytest.mark.gpu def test_FFM_iterator(deeprec_resource_path): From 813d1419114d6c93ede46f66c74cd3a52b6040af Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 4 Apr 2024 17:44:24 +0200 Subject: [PATCH 74/74] Reco book update Signed-off-by: miguelgfierro --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bdc82c96a..89ef90ecf 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ The nightly build tests are run daily on AzureML. ## References -- D. Li, J. Lian, L. Zhang, K. Ren, D. Lu, T. Wu, X. Xie, "Recommender Systems: Frontiers and Practices" (in Chinese), Publishing House of Electronics Industry, Beijing 2022. +- D. Li, J. Lian, L. Zhang, K. Ren, D. Lu, T. Wu, X. Xie, "Recommender Systems: Frontiers and Practices", Springer, Beijing, 2024. [Available on this link](https://www.amazon.com/Recommender-Systems-Frontiers-Practices-Dongsheng/dp/9819989639/). - A. Argyriou, M. González-Fierro, and L. Zhang, "Microsoft Recommenders: Best Practices for Production-Ready Recommendation Systems", *WWW 2020: International World Wide Web Conference Taipei*, 2020. Available online: https://dl.acm.org/doi/abs/10.1145/3366424.3382692 -- L. Zhang, T. Wu, X. Xie, A. Argyriou, M. González-Fierro and J. Lian, "Building Production-Ready Recommendation System at Scale", *ACM SIGKDD Conference on Knowledge Discovery and Data Mining 2019 (KDD 2019)*, 2019. - S. Graham, J.K. Min, T. Wu, "Microsoft recommenders: tools to accelerate developing recommender systems", *RecSys '19: Proceedings of the 13th ACM Conference on Recommender Systems*, 2019. Available online: https://dl.acm.org/doi/10.1145/3298689.3346967 +- L. Zhang, T. Wu, X. Xie, A. Argyriou, M. González-Fierro and J. Lian, "Building Production-Ready Recommendation System at Scale", *ACM SIGKDD Conference on Knowledge Discovery and Data Mining 2019 (KDD 2019)*, 2019.