Upgrade dependencies, remove support for python 3.7 and add python 3.11

Signed-off-by: Simon Zhao <[email protected]>
recommenders-team · Sep 5, 2023 · eb78929 · eb78929
1 parent 2ff0713
commit eb78929
Show file tree

Hide file tree

Showing 10 changed files with 70 additions and 74 deletions.
diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
@@ -36,10 +36,10 @@ jobs:
     # Test pysarplus with different versions of Python.
     # Package pysarplus and upload as GitHub workflow artifact when merged into
     # the main branch.
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v3
 
@@ -111,7 +111,7 @@ jobs:
 
   scala:
     # Test sarplus with different versions of Databricks and Synapse runtime
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         include:

diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py
@@ -68,8 +68,8 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True):
         """
         # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py
         # If validation dataset is None
-        df = train if validation is None else train.append(validation)
-        df = df if test is None else df.append(test)
+        df = train if validation is None else pd.concat([train, validation])
+        df = df if test is None else pd.concat([df, test])
 
         # Reindex user and item index
         if self.user_idx is None:

diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 import sys
 import time
 
-# Workround for enabling editable user pip installs
+# Workaround for enabling editable user pip installs
 site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
 
 # Version
@@ -27,55 +27,49 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "numpy>=1.19",  # 1.19 required by tensorflow 2.6
-    "pandas>1.0.3,<2",
-    "scipy>=1.0.0,<1.11.0",  # FIXME: We limit <1.11.0 until #1954 is fixed
-    "tqdm>=4.31.1,<5",
-    "matplotlib>=2.2.2,<4",
-    "scikit-learn>=0.22.1,<1.0.3",
-    "numba>=0.38.1,<1",
-    "lightfm>=1.15,<2",
-    "lightgbm>=2.2.1",
-    "memory_profiler>=0.54.0,<1",
-    "nltk>=3.4,<4",
-    "seaborn>=0.8.1,<1",
-    "transformers>=2.5.0,<5",
-    "category_encoders>=1.3.0,<2",
-    "jinja2>=2,<3.1",
-    "requests>=2.0.0,<3",
-    "cornac>=1.1.2,<1.15.2;python_version<='3.7'",
-    "cornac>=1.15.2,<2;python_version>='3.8'",  # After 1.15.2, Cornac requires python 3.8
-    "retrying>=1.3.3",
-    "pandera[strategies]>=0.6.5",  # For generating fake datasets
-    "scikit-surprise>=1.0.6",
-    "scrapbook>=0.5.0,<1.0.0",
-    "hyperopt>=0.1.2,<1",
-    "ipykernel>=4.6.1,<7",
-    "jupyter>=1,<2",
-    "locust>=1,<2",
-    "papermill>=2.1.2,<3",
+    "pandas>1.5.2,<3",  # requires numpy
+    "scikit-learn>=1.1.3,<2",  # requires scipy
+    "numba>=0.57.0,<1",
+    "lightfm>=1.17,<2",
+    "lightgbm>=3.3.2,<5",
+    "memory-profiler>=0.61.0,<1",
+    "nltk>=3.8.1,<4",  # requires tqdm
+    "seaborn>=0.12.0,<1",  # requires matplotlib
+    "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
+    "category-encoders>=2.6.0,<3",
+    "jinja2>=3.1.0,<3.2",
+    "cornac>=1.15.2,<2",  # requires tqdm
+    "retrying>=1.3.4",
+    "pandera[strategies]>=0.15.0",  # For generating fake datasets
+    "scikit-surprise>=1.1.3",
+    "scrapbook>=0.5.0,<1.0.0",  # requires tqdm, papermill
+    "hyperopt>=0.2.7,<1",
+    "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
+    "locust>=2.12.2,<3",
+    # hypothesis 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in
+    # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11
+    "hypothesis<6.83.1",
 ]
 
 # shared dependencies
 extras_require = {
     "gpu": [
-        "nvidia-ml-py3>=7.352.0",
-        # TensorFlow compiled with CUDA 11.2, cudnn 8.1
-        "tensorflow~=2.6.1;python_version=='3.6'",
-        "tensorflow~=2.7.0;python_version>='3.7'",
+        "nvidia-ml-py>=11.510.69",
+        # TensorFlow compiled with CUDA 11.8, cudnn 8.6
+        "tensorflow>=2.12.0",
         "tf-slim>=1.1.0",
-        "torch>=1.8",  # for CUDA 11 support
-        "fastai>=1.0.46,<2",
+        "torch>=2.0.1",
+        "fastai>=2.7.11,<3",
     ],
     "spark": [
-        "pyarrow>=0.12.1,<7.0.0",
-        "pyspark>=2.4.5,<3.3.0",
+        "pyarrow>=10.0.1",
+        "pyspark>=3.4.0",
     ],
     "dev": [
-        "black>=18.6b4,<21",
-        "pytest>=3.6.4",
-        "pytest-cov>=2.12.1",
-        "pytest-mock>=3.6.1",  # for access to mock fixtures in pytest
+        "black>=23.3.0,<24",
+        "pytest>=7.2.1",
+        "pytest-cov>=4.1.0",
+        "pytest-mock>=3.10.0",  # for access to mock fixtures in pytest
     ],
 }
 # For the brave of heart
@@ -116,17 +110,18 @@
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Operating System :: POSIX :: Linux",
     ],
     extras_require=extras_require,
     keywords="recommendations recommendation recommenders recommender system engine "
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.6, <3.10",
+    python_requires=">=3.8, <=3.11",
     packages=find_packages(
         where=".",
         exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"],

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -37,7 +37,6 @@
 """
 import argparse
 import logging
-import glob
 
 from azureml.core.authentication import AzureCliAuthentication
 from azureml.core import Workspace
@@ -146,8 +145,7 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes)
 
 def create_run_config(
     cpu_cluster,
-    docker_proc_type,
-    workspace,
+    docker_image,
     add_gpu_dependencies,
     add_spark_dependencies,
     conda_pkg_jdk,
@@ -166,8 +164,7 @@ def create_run_config(
                                                 the following:
                                                 - Reco_cpu_test
                                                 - Reco_gpu_test
-            docker_proc_type (str)          : processor type, cpu or gpu
-            workspace                       : workspace reference
+            docker_image (str)              : docker image for cpu or gpu
             add_gpu_dependencies (bool)     : True if gpu packages should be
                                         added to the conda environment, else False
             add_spark_dependencies (bool)   : True if PySpark packages should be
@@ -181,7 +178,20 @@ def create_run_config(
     run_azuremlcompute = RunConfiguration()
     run_azuremlcompute.target = cpu_cluster
     run_azuremlcompute.environment.docker.enabled = True
-    run_azuremlcompute.environment.docker.base_image = docker_proc_type
+    # See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional
+    run_azuremlcompute.environment.docker.base_image = None
+    run_azuremlcompute.environment.docker.base_dockerfile = f"""
+    FROM {docker_image}
+    # Install system-level deps for scipy.  See
+    # https://docs.scipy.org/doc/scipy/dev/contributor/building.html
+    RUN apt-get update && \
+        apt-get install -y \
+        gfortran \
+        libopenblas-dev \
+        liblapack-dev \
+        pkg-config
+    RUN apt-get install -y git
+    """
 
     # Use conda_dependencies.yml to create a conda environment in
     # the Docker image for execution
@@ -425,13 +435,11 @@ def create_arg_parser():
     args = create_arg_parser()
 
     if args.dockerproc == "cpu":
-        from azureml.core.runconfig import DEFAULT_CPU_IMAGE
-
-        docker_proc_type = DEFAULT_CPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
     else:
-        from azureml.core.runconfig import DEFAULT_GPU_IMAGE
-
-        docker_proc_type = DEFAULT_GPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04"
 
     cli_auth = AzureCliAuthentication()
 
@@ -452,8 +460,7 @@ def create_arg_parser():
 
     run_config = create_run_config(
         cpu_cluster=cpu_cluster,
-        docker_proc_type=docker_proc_type,
-        workspace=workspace,
+        docker_image=docker_image,
         add_gpu_dependencies=args.add_gpu_dependencies,
         add_spark_dependencies=args.add_spark_dependencies,
         conda_pkg_jdk=args.conda_pkg_jdk,

diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -6,7 +6,7 @@
 import pytest
 from unittest.mock import Mock
 from sklearn.preprocessing import minmax_scale
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,

diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from recommenders.evaluation.python_evaluation import (
     precision_at_k,
@@ -441,7 +441,7 @@ def test_item_novelty(spark_diversity_data, target_metrics):
     )
     actual = evaluator.historical_item_novelty().toPandas()
     assert_frame_equal(
-        target_metrics["item_novelty"], actual, check_exact=False, check_less_precise=4
+        target_metrics["item_novelty"], actual, check_exact=False
     )
     assert np.all(actual["item_novelty"].values >= 0)
     # Test that novelty is zero when data includes only one item
@@ -482,7 +482,6 @@ def test_user_diversity(spark_diversity_data, target_metrics):
         target_metrics["user_diversity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -510,7 +509,6 @@ def test_user_item_serendipity(spark_diversity_data, target_metrics):
         target_metrics["user_item_serendipity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -529,7 +527,6 @@ def test_user_serendipity(spark_diversity_data, target_metrics):
         target_metrics["user_serendipity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -562,7 +559,6 @@ def test_user_diversity_item_feature_vector(spark_diversity_data, target_metrics
         target_metrics["user_diversity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -599,7 +595,6 @@ def test_user_item_serendipity_item_feature_vector(
         target_metrics["user_item_serendipity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -620,7 +615,6 @@ def test_user_serendipity_item_feature_vector(spark_diversity_data, target_metri
         target_metrics["user_serendipity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )