Merge pull request #1998 from recommenders-team/staging

Staging to main: Remove support for Python 3.7 and redefinition of tests
recommenders-team · Sep 26, 2023 · 2aca74f · 2aca74f
2 parents 57f37ea + 6a2b0a3
commit 2aca74f
Show file tree

Hide file tree

Showing 101 changed files with 1,207 additions and 1,575 deletions.
diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml
@@ -29,6 +29,6 @@ runs:
         if [[ ${{ inputs.TEST_KIND }} == "nightly" ]]; then
           test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import nightly_test_groups; print([t for t in nightly_test_groups.keys() if "${{inputs.TEST_ENV}}" in t])')
         else
-          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import unit_test_groups; print(list(unit_test_groups.keys()))')
+          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import pr_gate_test_groups; print(list(pr_gate_test_groups.keys()))')
         fi
         echo "test_groups=$test_groups_str" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
@@ -36,10 +36,10 @@ jobs:
     # Test pysarplus with different versions of Python.
     # Package pysarplus and upload as GitHub workflow artifact when merged into
     # the main branch.
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9"]
     steps:
       - uses: actions/checkout@v3
 
@@ -111,7 +111,7 @@ jobs:
 
   scala:
     # Test sarplus with different versions of Databricks and Synapse runtime
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         include:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -68,8 +68,3 @@ Try to be empathic.
 
 </details>
 
-## Microsoft Contributor License Agreement
-
-Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com.
-
-When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 
 ## What's New (August, 2023)
 
-We moved to a new organization! Now to access the repo, instead of going to https://github.com/microsoft/recommenders, you need to go to https://github.com/recommenders-team/recommenders. The old URL will still resolve to the new one, but we recommend you to update your bookmarks.
+We moved to a new organization! Now to access the repo, instead of going to https://github.com/microsoft/recommenders, you need to go to https://github.com/recommenders-team/recommenders. The old URL will still resolve to the new one, but we recommend that you update your bookmarks.
 
 Starting with release 0.6.0, Recommenders has been available on PyPI and can be installed using pip! 
 
@@ -18,11 +18,11 @@ Here you can find the package documentation: https://microsoft-recommenders.read
 
 This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. The examples detail our learnings on five key tasks:
 
-- [Prepare Data](examples/01_prepare_data): Preparing and loading data for each recommender algorithm
+- [Prepare Data](examples/01_prepare_data): Preparing and loading data for each recommender algorithm.
 - [Model](examples/00_quick_start): Building models using various classical and deep learning recommender algorithms such as Alternating Least Squares ([ALS](https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS)) or eXtreme Deep Factorization Machines ([xDeepFM](https://arxiv.org/abs/1803.05170)).
-- [Evaluate](examples/03_evaluate): Evaluating algorithms with offline metrics
-- [Model Select and Optimize](examples/04_model_select_and_optimize): Tuning and optimizing hyperparameters for recommender models
-- [Operationalize](examples/05_operationalize): Operationalizing models in a production environment on Azure
+- [Evaluate](examples/03_evaluate): Evaluating algorithms with offline metrics.
+- [Model Select and Optimize](examples/04_model_select_and_optimize): Tuning and optimizing hyperparameters for recommender models.
+- [Operationalize](examples/05_operationalize): Operationalizing models in a production environment on Azure.
 
 Several utilities are provided in [recommenders](recommenders) to support common tasks such as loading datasets in the format expected by different algorithms, evaluating model outputs, and splitting training/test data. Implementations of several state-of-the-art algorithms are included for self-study and customization in your own applications. See the [Recommenders documentation](https://readthedocs.org/projects/microsoft-recommenders/).
 
@@ -40,16 +40,16 @@ We recommend [conda](https://docs.conda.io/projects/conda/en/latest/glossary.htm
 conda create -n <environment_name> python=3.9
 conda activate <environment_name>
 
-# 3. Install the recommenders package with examples
-pip install recommenders[examples]
+# 3. Install the core recommenders package. It can run all the CPU notebooks.
+pip install recommenders
 
 # 4. create a Jupyter kernel
 python -m ipykernel install --user --name <environment_name> --display-name <kernel_name>
 
-# 5. Clone this repo within vscode or using command:
-git clone https://github.com/microsoft/recommenders.git
+# 5. Clone this repo within VSCode or using command line:
+git clone https://github.com/recommenders-team/recommenders.git
 
-# 6. Within VS Code:
+# 6. Within VSCode:
 #   a. Open a notebook, e.g., examples/00_quick_start/sar_movielens.ipynb;  
 #   b. Select Jupyter kernel <kernel_name>;
 #   c. Run the notebook.
@@ -58,14 +58,11 @@ git clone https://github.com/microsoft/recommenders.git
 For more information about setup on other platforms (e.g., Windows and macOS) and different configurations (e.g., GPU, Spark and experimental features), see the [Setup Guide](SETUP.md).
 
 In addition to the core package, several extras are also provided, including:
-+ `[examples]`: Needed for running examples.
 + `[gpu]`: Needed for running GPU models.
 + `[spark]`: Needed for running Spark models.
 + `[dev]`: Needed for development for the repo.
-+ `[all]`: `[examples]`|`[gpu]`|`[spark]`|`[dev]`
++ `[all]`: `[gpu]`|`[spark]`|`[dev]`
 + `[experimental]`: Models that are not thoroughly tested and/or may require additional steps in installation.
-+ `[nni]`: Needed for running models integrated with [NNI](https://nni.readthedocs.io/en/stable/).
-
 
 ## Algorithms
 
@@ -138,13 +135,13 @@ This project adheres to [Microsoft's Open Source Code of Conduct](CODE_OF_CONDUC
 
 ## Build Status
 
-These tests are the nightly builds, which compute the smoke and integration tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
+These tests are the nightly builds, which compute the asynchronous tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
 
 For more information about the testing pipelines, please see the [test documentation](tests/README.md).
 
 ### AzureML Nightly Build Status
 
-Smoke and integration tests are run daily on AzureML.
+The nightly build tests are run daily on AzureML.
 
 | Build Type | Branch | Status |  | Branch | Status |
 | --- | --- | --- | --- | --- | --- |

diff --git a/SECURITY.md b/SECURITY.md
diff --git a/SETUP.md b/SETUP.md
@@ -1,23 +1,34 @@
 # Setup Guide
 
-The repo, including this guide, is tested on Linux. Where applicable, we document differences in [Windows](#windows-specific-instructions)  and [macOS](#macos-specific-instructions) although 
+The repo, including this guide, is tested on Linux. Where applicable, we document differences in [Windows](#windows-specific-instructions) and [MacOS](#macos-specific-instructions) although 
 such documentation may not always be up to date.   
 
 ## Extras
+
 In addition to the pip installable package, several extras are provided, including:
-+ `[examples]`: Needed for running examples.
 + `[gpu]`: Needed for running GPU models.  
 + `[spark]`: Needed for running Spark models.
 + `[dev]`: Needed for development.
-+ `[all]`: `[examples]`|`[gpu]`|`[spark]`|`[dev]`
++ `[all]`: `[gpu]`|`[spark]`|`[dev]`
 + `[experimental]`: Models that are not thoroughly tested and/or may require additional steps in installation).
-+ `[nni]`: Needed for running models integrated with [NNI](https://nni.readthedocs.io/en/stable/).
-
 
 ## Setup for Core Package
 
 Follow the [Getting Started](./README.md#Getting-Started) section in the [README](./README.md) to install the package and run the examples.
 
+## Setup for GPU
+
+```bash
+# 1. Make sure CUDA is installed.
+
+# 2. Follow Steps 1-5 in the Getting Started section in README.md to install the package and Jupyter kernel, adding the gpu extra to the pip install command:
+pip install recommenders[gpu]
+
+# 3. Within VSCode:
+#   a. Open a notebook with a GPU model, e.g., examples/00_quick_start/wide_deep_movielens.ipynb;
+#   b. Select Jupyter kernel <kernel_name>;
+#   c. Run the notebook.
+```
 
 ## Setup for Spark 
 
@@ -26,9 +37,9 @@ Follow the [Getting Started](./README.md#Getting-Started) section in the [README
 # sudo apt-get install openjdk-11-jdk
 
 # 2. Follow Steps 1-5 in the Getting Started section in README.md to install the package and Jupyter kernel, adding the spark extra to the pip install command:
-pip install recommenders[examples,spark]
+pip install recommenders[spark]
 
-# 3. Within VS Code:
+# 3. Within VSCode:
 #   a. Open a notebook with a Spark model, e.g., examples/00_quick_start/als_movielens.ipynb;  
 #   b. Select Jupyter kernel <kernel_name>;
 #   c. Run the notebook.
@@ -99,7 +110,7 @@ The `xlearn` package has dependency on `cmake`. If one uses the `xlearn` related
 
 For Spark features to work, make sure Java and Spark are installed and respective environment varialbes such as `JAVA_HOME`, `SPARK_HOME` and `HADOOP_HOME` are set properly. Also make sure environment variables `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` are set to the the same python executable.
 
-## macOS-Specific Instructions
+## MacOS-Specific Instructions
 
 We recommend using [Homebrew](https://brew.sh/) to install the dependencies on macOS, including conda (please remember to add conda's path to `$PATH`). One may also need to install lightgbm using Homebrew before pip install the package.
 
@@ -145,9 +156,9 @@ First make sure that the tag that you want to add, e.g. `0.6.0`, is added in [`r
 1. Make sure that the code in main passes all the tests (unit and nightly tests).
 1. Create a tag with the version number: e.g. `git tag -a 0.6.0 -m "Recommenders 0.6.0"`.
 1. Push the tag to the remote server: `git push origin 0.6.0`.
-1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (unit, smoke and integration), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
+1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (PR gate and nightly builds), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
 1. Fill up the draft release with all the recent changes in the code.
 1. Download the wheel and tar.gz locally, these files shouldn't have any bug, since they passed all the tests.
 1. Install twine: `pip install twine`
-1. Publish the wheel and tar.gz to pypi: `twine upload recommenders*`
+1. Publish the wheel and tar.gz to PyPI: `twine upload recommenders*`
 
diff --git a/contrib/sarplus/python/setup.py b/contrib/sarplus/python/setup.py
@@ -39,8 +39,6 @@ def __str__(self):
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
@@ -51,7 +49,7 @@ def __str__(self):
     setup_requires=["pytest-runner"],
     install_requires=DEPENDENCIES,
     tests_require=["pytest"],
-    python_requires=">=3.6",
+    python_requires=">=3.6,<3.11",
     packages=["pysarplus"],
     package_data={"": ["VERSION"]},
     ext_modules=[

diff --git a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb
@@ -19,7 +19,7 @@
         {
             "cell_type": "markdown",
             "source": [
-                "Hyperparameter tuning for Spark based recommender algorithm is important to select a model with the optimal performance. This notebook introduces good practices in performing hyperparameter tuning for building recommender models with the utility functions provided in the [Microsoft/Recommenders](https://github.com/Microsoft/Recommenders.git) repository.\n",
+                "Hyperparameter tuning for Spark based recommender algorithm is important to select a model with the optimal performance. This notebook introduces good practices in performing hyperparameter tuning for building recommender models with the utility functions provided in the [Microsoft/Recommenders](https://github.com/recommenders-team/recommenders.git) repository.\n",
                 "\n",
                 "Three different approaches are introduced and comparatively studied.\n",
                 "* Spark native/custom constructs (`ParamGridBuilder`, `TrainValidationSplit`).\n",

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,10 +14,7 @@ build-backend = "setuptools.build_meta"
 [tool.pytest.ini_options]
 markers = [
     "experimental: tests that will not be executed and may need extra dependencies",
-    "flaky: flaky tests that can fail unexpectedly",
     "gpu: tests running on GPU",
-    "integration: integration tests",
     "notebooks: tests for notebooks",
-    "smoke: smoke tests",
     "spark: tests that requires Spark",
 ]
diff --git a/recommenders/README.md b/recommenders/README.md
@@ -9,7 +9,7 @@ Some dependencies require compilation during pip installation. On Linux this can
 ```bash
 sudo apt-get install -y build-essential libpython<version>
 ``` 
-where `<version>` should be the Python version (e.g. `3.6`).
+where `<version>` should be the Python version (e.g. `3.8`).
 
 On Windows you will need [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
 

diff --git a/recommenders/utils/gpu_utils.py b/recommenders/utils/gpu_utils.py
@@ -97,7 +97,7 @@ def get_cuda_version():
                 data = f.read().replace("\n", "")
             return data
         else:
-            return "Cannot find CUDA in this machine"
+            return None
 
 
 def get_cudnn_version():
@@ -125,14 +125,14 @@ def find_cudnn_in_headers(candiates):
             if version:
                 return version
             else:
-                return "Cannot find CUDNN version"
+                return None
         else:
-            return "Cannot find CUDNN version"
+            return None
 
     try:
         import torch
 
-        return torch.backends.cudnn.version()
+        return str(torch.backends.cudnn.version())
     except (ImportError, ModuleNotFoundError):
         if sys.platform == "win32":
             candidates = [r"C:\NVIDIA\cuda\include\cudnn.h"]