From 94415774f194b8e6608d25fee0fd955e1985382d Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 1 Sep 2023 12:41:28 +0200 Subject: [PATCH 1/4] Integrate exmaples in core package Signed-off-by: miguelgfierro --- README.md | 23 ++++++++++------------- setup.py | 24 ++++++++++-------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index b999eab65..b1d20bdeb 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,11 @@ Here you can find the package documentation: https://microsoft-recommenders.read This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. The examples detail our learnings on five key tasks: -- [Prepare Data](examples/01_prepare_data): Preparing and loading data for each recommender algorithm +- [Prepare Data](examples/01_prepare_data): Preparing and loading data for each recommender algorithm. - [Model](examples/00_quick_start): Building models using various classical and deep learning recommender algorithms such as Alternating Least Squares ([ALS](https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS)) or eXtreme Deep Factorization Machines ([xDeepFM](https://arxiv.org/abs/1803.05170)). -- [Evaluate](examples/03_evaluate): Evaluating algorithms with offline metrics -- [Model Select and Optimize](examples/04_model_select_and_optimize): Tuning and optimizing hyperparameters for recommender models -- [Operationalize](examples/05_operationalize): Operationalizing models in a production environment on Azure +- [Evaluate](examples/03_evaluate): Evaluating algorithms with offline metrics. +- [Model Select and Optimize](examples/04_model_select_and_optimize): Tuning and optimizing hyperparameters for recommender models. +- [Operationalize](examples/05_operationalize): Operationalizing models in a production environment on Azure. Several utilities are provided in [recommenders](recommenders) to support common tasks such as loading datasets in the format expected by different algorithms, evaluating model outputs, and splitting training/test data. Implementations of several state-of-the-art algorithms are included for self-study and customization in your own applications. See the [Recommenders documentation](https://readthedocs.org/projects/microsoft-recommenders/). @@ -40,16 +40,16 @@ We recommend [conda](https://docs.conda.io/projects/conda/en/latest/glossary.htm conda create -n python=3.9 conda activate -# 3. Install the recommenders package with examples -pip install recommenders[examples] +# 3. Install the core recommenders package. It can run all the CPU notebooks. +pip install recommenders # 4. create a Jupyter kernel python -m ipykernel install --user --name --display-name -# 5. Clone this repo within vscode or using command: -git clone https://github.com/microsoft/recommenders.git +# 5. Clone this repo within VSCode or using command line: +git clone https://github.com/recommenders-team/recommenders.git -# 6. Within VS Code: +# 6. Within VSCode: # a. Open a notebook, e.g., examples/00_quick_start/sar_movielens.ipynb; # b. Select Jupyter kernel ; # c. Run the notebook. @@ -58,14 +58,11 @@ git clone https://github.com/microsoft/recommenders.git For more information about setup on other platforms (e.g., Windows and macOS) and different configurations (e.g., GPU, Spark and experimental features), see the [Setup Guide](SETUP.md). In addition to the core package, several extras are also provided, including: -+ `[examples]`: Needed for running examples. + `[gpu]`: Needed for running GPU models. + `[spark]`: Needed for running Spark models. + `[dev]`: Needed for development for the repo. -+ `[all]`: `[examples]`|`[gpu]`|`[spark]`|`[dev]` ++ `[all]`: `[gpu]`|`[spark]`|`[dev]` + `[experimental]`: Models that are not thoroughly tested and/or may require additional steps in installation. -+ `[nni]`: Needed for running models integrated with [NNI](https://nni.readthedocs.io/en/stable/). - ## Algorithms diff --git a/setup.py b/setup.py index c34d4d716..d84c85d81 100644 --- a/setup.py +++ b/setup.py @@ -8,10 +8,10 @@ import sys import time -# workround for enabling editable user pip installs +# Workround for enabling editable user pip installs site.ENABLE_USER_SITE = "--user" in sys.argv[1:] -# version +# Version here = Path(__file__).absolute().parent version_data = {} with open(here.joinpath("recommenders", "__init__.py"), "r") as f: @@ -29,7 +29,7 @@ install_requires = [ "numpy>=1.19", # 1.19 required by tensorflow 2.6 "pandas>1.0.3,<2", - "scipy>=1.0.0,<1.11.0", #FIXME: We limit <1.11.0 until #1954 is fixed + "scipy>=1.0.0,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed "tqdm>=4.31.1,<5", "matplotlib>=2.2.2,<4", "scikit-learn>=0.22.1,<1.0.3", @@ -49,17 +49,15 @@ "pandera[strategies]>=0.6.5", # For generating fake datasets "scikit-surprise>=1.0.6", "scrapbook>=0.5.0,<1.0.0", + "hyperopt>=0.1.2,<1", + "ipykernel>=4.6.1,<7", + "jupyter>=1,<2", + "locust>=1,<2", + "papermill>=2.1.2,<3", ] # shared dependencies extras_require = { - "examples": [ - "hyperopt>=0.1.2,<1", - "ipykernel>=4.6.1,<7", - "jupyter>=1,<2", - "locust>=1,<2", - "papermill>=2.1.2,<3", - ], "gpu": [ "nvidia-ml-py3>=7.352.0", # TensorFlow compiled with CUDA 11.2, cudnn 8.1 @@ -80,17 +78,15 @@ "pytest-mock>=3.6.1", # for access to mock fixtures in pytest ], } -# for the brave of heart +# For the brave of heart extras_require["all"] = list(set(sum([*extras_require.values()], []))) -# the following dependencies need additional testing +# The following dependencies need additional testing extras_require["experimental"] = [ # xlearn requires cmake to be pre-installed "xlearn==0.40a1", # VW C++ binary needs to be installed manually for some code to work "vowpalwabbit>=8.9.0,<9", -] -extras_require["nni"] = [ # nni needs to be upgraded "nni==1.5", ] From 339f25b7700c6e0396eb1553c02899bc6fdba29f Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 1 Sep 2023 12:46:43 +0200 Subject: [PATCH 2/4] Add GPU installation details Signed-off-by: miguelgfierro --- SETUP.md | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/SETUP.md b/SETUP.md index ed8b657c9..2cd2d3679 100644 --- a/SETUP.md +++ b/SETUP.md @@ -1,23 +1,34 @@ # Setup Guide -The repo, including this guide, is tested on Linux. Where applicable, we document differences in [Windows](#windows-specific-instructions) and [macOS](#macos-specific-instructions) although +The repo, including this guide, is tested on Linux. Where applicable, we document differences in [Windows](#windows-specific-instructions) and [MacOS](#macos-specific-instructions) although such documentation may not always be up to date. ## Extras + In addition to the pip installable package, several extras are provided, including: -+ `[examples]`: Needed for running examples. + `[gpu]`: Needed for running GPU models. + `[spark]`: Needed for running Spark models. + `[dev]`: Needed for development. -+ `[all]`: `[examples]`|`[gpu]`|`[spark]`|`[dev]` ++ `[all]`: `[gpu]`|`[spark]`|`[dev]` + `[experimental]`: Models that are not thoroughly tested and/or may require additional steps in installation). -+ `[nni]`: Needed for running models integrated with [NNI](https://nni.readthedocs.io/en/stable/). - ## Setup for Core Package Follow the [Getting Started](./README.md#Getting-Started) section in the [README](./README.md) to install the package and run the examples. +## Setup for GPU + +```bash +# 1. Make sure CUDA is installed. + +# 2. Follow Steps 1-5 in the Getting Started section in README.md to install the package and Jupyter kernel, adding the gpu extra to the pip install command: +pip install recommenders[gpu] + +# 3. Within VSCode: +# a. Open a notebook with a GPU model, e.g., examples/00_quick_start/wide_deep_movielens.ipynb; +# b. Select Jupyter kernel ; +# c. Run the notebook. +``` ## Setup for Spark @@ -26,9 +37,9 @@ Follow the [Getting Started](./README.md#Getting-Started) section in the [README # sudo apt-get install openjdk-11-jdk # 2. Follow Steps 1-5 in the Getting Started section in README.md to install the package and Jupyter kernel, adding the spark extra to the pip install command: -pip install recommenders[examples,spark] +pip install recommenders[spark] -# 3. Within VS Code: +# 3. Within VSCode: # a. Open a notebook with a Spark model, e.g., examples/00_quick_start/als_movielens.ipynb; # b. Select Jupyter kernel ; # c. Run the notebook. From 5b6ac71cd7c00c8979efb8a02a5c1e1cae32daad Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 1 Sep 2023 12:48:25 +0200 Subject: [PATCH 3/4] :memo: Signed-off-by: miguelgfierro --- SETUP.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SETUP.md b/SETUP.md index 2cd2d3679..64f5f2090 100644 --- a/SETUP.md +++ b/SETUP.md @@ -110,7 +110,7 @@ The `xlearn` package has dependency on `cmake`. If one uses the `xlearn` related For Spark features to work, make sure Java and Spark are installed and respective environment varialbes such as `JAVA_HOME`, `SPARK_HOME` and `HADOOP_HOME` are set properly. Also make sure environment variables `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` are set to the the same python executable. -## macOS-Specific Instructions +## MacOS-Specific Instructions We recommend using [Homebrew](https://brew.sh/) to install the dependencies on macOS, including conda (please remember to add conda's path to `$PATH`). One may also need to install lightgbm using Homebrew before pip install the package. From 19cfa7d0d22fd97b931f1b490ed1a3126a7114b7 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 1 Sep 2023 12:50:49 +0200 Subject: [PATCH 4/4] Remove examples from tests Signed-off-by: miguelgfierro --- tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index bdc869b0c..79a189ccc 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -196,7 +196,7 @@ def create_run_config( ) # install recommenders - reco_extras = "dev,examples" + reco_extras = "dev" if add_gpu_dependencies and add_spark_dependencies: conda_dep.add_channel("conda-forge") conda_dep.add_conda_package(conda_pkg_jdk)