From 5f18cae5e3eb319ae22a265144b0f4f48e40c3f3 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Thu, 7 Sep 2023 10:54:17 +0200
Subject: [PATCH 01/43] Refactor data validation tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 .github/actions/get-test-groups/action.yml    |  2 +-
 .../ci/azureml_tests/run_groupwise_pytest.py  |  4 +--
 tests/ci/azureml_tests/test_groups.py         | 30 ++++++++++---------
 .../recommenders/datasets/test_covid_utils.py |  0
 .../recommenders/datasets/test_movielens.py   |  0
 .../recommenders/datasets/test_wikidata.py    |  0
 6 files changed, 19 insertions(+), 17 deletions(-)
 rename tests/{unit => data_validation}/recommenders/datasets/test_covid_utils.py (100%)
 rename tests/{unit => data_validation}/recommenders/datasets/test_movielens.py (100%)
 rename tests/{unit => data_validation}/recommenders/datasets/test_wikidata.py (100%)

diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml
index 3e803c800e..39364fa81f 100644
--- a/.github/actions/get-test-groups/action.yml
+++ b/.github/actions/get-test-groups/action.yml
@@ -29,6 +29,6 @@ runs:
         if [[ ${{ inputs.TEST_KIND }} == "nightly" ]]; then
           test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import nightly_test_groups; print([t for t in nightly_test_groups.keys() if "${{inputs.TEST_ENV}}" in t])')
         else
-          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import unit_test_groups; print(list(unit_test_groups.keys()))')
+          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import pr_gate_test_groups; print(list(pr_gate_test_groups.keys()))')
         fi
         echo "test_groups=$test_groups_str" >> $GITHUB_OUTPUT
diff --git a/tests/ci/azureml_tests/run_groupwise_pytest.py b/tests/ci/azureml_tests/run_groupwise_pytest.py
index 64c9895809..f038567be8 100644
--- a/tests/ci/azureml_tests/run_groupwise_pytest.py
+++ b/tests/ci/azureml_tests/run_groupwise_pytest.py
@@ -13,7 +13,7 @@
 import argparse
 import glob
 from azureml.core import Run
-from test_groups import nightly_test_groups, unit_test_groups
+from test_groups import nightly_test_groups, pr_gate_test_groups
 
 if __name__ == "__main__":
 
@@ -46,7 +46,7 @@
     if args.testkind == "nightly":
         test_group = nightly_test_groups[args.testgroup]
     else:
-        test_group = unit_test_groups[args.testgroup]
+        test_group = pr_gate_test_groups[args.testgroup]
 
     logger.info("Tests to be executed")
     logger.info(str(test_group))
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index ec3bbea622..503be5236f 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -11,7 +11,7 @@
 # FOR INTEGRATION, NO GROUP SHOULD SURPASS 45MIN = 2700s !!!
 # FOR UNIT, NO GROUP SHOULD SURPASS 15MIN = 900s !!!
 
-global nightly_test_groups, unit_test_groups
+global nightly_test_groups, pr_gate_test_groups
 
 nightly_test_groups = {
     "group_cpu_001": [  # Total group time: 1883s
@@ -151,15 +151,17 @@
     ],
 }
 
-unit_test_groups = {
+pr_gate_test_groups = {
     "group_spark_001": [  # Total group time: 270.41s
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
+
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_stratified_splitter",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_chrono_splitter",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_diversity_item_feature_vector",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_python_match",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_precision",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_exp_var",
@@ -170,7 +172,6 @@
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_serendipity",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_diversity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_diversity",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_item_serendipity_item_feature_vector",
@@ -257,17 +258,18 @@
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",
     ],
     "group_cpu_001": [  # Total group time: 525.96s
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
-        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_wrong_bytes",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
+        "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
+
         "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_retry",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
+        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_wrong_bytes",
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
-        "tests/unit/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
         "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
diff --git a/tests/unit/recommenders/datasets/test_covid_utils.py b/tests/data_validation/recommenders/datasets/test_covid_utils.py
similarity index 100%
rename from tests/unit/recommenders/datasets/test_covid_utils.py
rename to tests/data_validation/recommenders/datasets/test_covid_utils.py
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/data_validation/recommenders/datasets/test_movielens.py
similarity index 100%
rename from tests/unit/recommenders/datasets/test_movielens.py
rename to tests/data_validation/recommenders/datasets/test_movielens.py
diff --git a/tests/unit/recommenders/datasets/test_wikidata.py b/tests/data_validation/recommenders/datasets/test_wikidata.py
similarity index 100%
rename from tests/unit/recommenders/datasets/test_wikidata.py
rename to tests/data_validation/recommenders/datasets/test_wikidata.py

From 5457f508d5a12e24abe3fe73bd2bc2e4d92cd6ae Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 14:01:12 +0200
Subject: [PATCH 02/43] Changed test_dataset to test_download_utils FYI
 @looklike

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py                     | 8 ++++----
 .../datasets/{test_dataset.py => test_download_utils.py}  | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)
 rename tests/unit/recommenders/datasets/{test_dataset.py => test_download_utils.py} (99%)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 503be5236f..41f34b045e 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -158,7 +158,6 @@
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
-
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_stratified_splitter",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_chrono_splitter",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_diversity_item_feature_vector",
@@ -264,9 +263,10 @@
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
         "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
-
-        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_retry",
-        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_wrong_bytes",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_retry",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_download_path",
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
diff --git a/tests/unit/recommenders/datasets/test_dataset.py b/tests/unit/recommenders/datasets/test_download_utils.py
similarity index 99%
rename from tests/unit/recommenders/datasets/test_dataset.py
rename to tests/unit/recommenders/datasets/test_download_utils.py
index 942ac1d35f..9a8e8857aa 100644
--- a/tests/unit/recommenders/datasets/test_dataset.py
+++ b/tests/unit/recommenders/datasets/test_download_utils.py
@@ -4,8 +4,9 @@
 import os
 import pytest
 import requests
-from tempfile import TemporaryDirectory
 import logging
+from tempfile import TemporaryDirectory
+
 from recommenders.datasets.download_utils import maybe_download, download_path
 
 

From 1fc7b85817280f9203273d3435834164df3e4bcc Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 14:12:05 +0200
Subject: [PATCH 03/43] Performance tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py             | 15 ++++++++++++++-
 .../test_python_evaluation_time_performance.py    |  8 +++-----
 2 files changed, 17 insertions(+), 6 deletions(-)
 rename tests/{unit => performance}/recommenders/evaluation/test_python_evaluation_time_performance.py (99%)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 41f34b045e..5fdf5a1b28 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -274,7 +274,20 @@
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
         "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
-        "tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py",  # 297.91s
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_mae",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rsquared",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_exp_var",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items_largek",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_ndcg_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_map_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_precision",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_recall",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_auc",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_logloss",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py b/tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
similarity index 99%
rename from tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py
rename to tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
index d4dea22086..7c9070c5f3 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py
+++ b/tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
@@ -4,8 +4,10 @@
 import numpy as np
 import pandas as pd
 import pytest
+import random
 from sklearn.preprocessing import minmax_scale
 
+from recommenders.utils.timer import Timer
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
@@ -28,15 +30,13 @@
     auc,
     logloss,
 )
-import random
-from recommenders.utils.timer import Timer
+
 
 random.seed(SEED)
 DATA_USER_NUM = 5000
 DATA_ITEM_NUM = DATA_USER_NUM * 2
 DATA_SAMPLE_NUM = DATA_USER_NUM * 1000
 DATA_RATING_MAX = 5
-
 TOL = 0.1
 
 
@@ -84,8 +84,6 @@ def rating_pred_binary(rating_pred):
 
 # The following time thresholds are benchmarked on Azure
 # Standard_A8m_v2 with 8 vCPUs and 64 GiB memory.
-
-
 def test_merge_rating(rating_true, rating_pred):
     with Timer() as t:
         merge_rating_true_pred(

From 2799676bb5745ad035af68a7f13fdde570fb9524 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 14:51:17 +0200
Subject: [PATCH 04/43] Security tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 setup.py                                   |  2 +-
 tests/ci/azureml_tests/test_groups.py      | 88 +++++++++++-----------
 tests/security/test_dependency_security.py | 27 +++++++
 3 files changed, 72 insertions(+), 45 deletions(-)
 create mode 100644 tests/security/test_dependency_security.py

diff --git a/setup.py b/setup.py
index 5930d3be23..e20f6080c4 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
     "transformers>=2.5.0,<5",
     "category_encoders>=1.3.0,<2",
     "jinja2>=2,<3.1",
-    "requests>=2.0.0,<3",
+    "requests>=2.31.0,<3",
     "cornac>=1.1.2,<1.15.2;python_version<='3.7'",
     "cornac>=1.15.2,<2;python_version>='3.8'",  # After 1.15.2, Cornac requires python 3.8
     "retrying>=1.3.3",
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 5fdf5a1b28..53f6064471 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -152,6 +152,50 @@
 }
 
 pr_gate_test_groups = {
+    "group_cpu_001": [  # Total group time: 525.96s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
+        "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_retry",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_download_path",
+        "tests/unit/recommenders/utils/test_timer.py::test_timer",
+        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
+        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
+        "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_mae",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rsquared",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_exp_var",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items_largek",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_ndcg_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_map_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_precision",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_recall",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_auc",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_logloss",
+    ],
+    "group_notebooks_cpu_001": [  # Total group time: 226.42s
+        "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_template_runs",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter",
+        "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
+        "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
+    ],
     "group_spark_001": [  # Total group time: 270.41s
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
@@ -256,48 +300,4 @@
         "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm",
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",
     ],
-    "group_cpu_001": [  # Total group time: 525.96s
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
-        "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
-        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
-        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
-        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_retry",
-        "tests/unit/recommenders/datasets/test_download_utils.py::test_download_path",
-        "tests/unit/recommenders/utils/test_timer.py::test_timer",
-        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
-        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
-        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
-        "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
-        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
-        "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_mae",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rsquared",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_exp_var",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items_largek",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_ndcg_at_k",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_map_at_k",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_precision",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_recall",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_auc",
-        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_logloss",
-    ],
-    "group_notebooks_cpu_001": [  # Total group time: 226.42s
-        "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_template_runs",
-        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter",
-        "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
-        "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
-    ],
 }
diff --git a/tests/security/test_dependency_security.py b/tests/security/test_dependency_security.py
new file mode 100644
index 0000000000..bfbdcae258
--- /dev/null
+++ b/tests/security/test_dependency_security.py
@@ -0,0 +1,27 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+import requests
+import pandas as pd
+
+try:
+    import tensorflow as tf
+except ImportError:
+    pass  # skip this import if we are in cpu environment
+
+
+def test_requests():
+    # Security issue: https://github.com/psf/requests/releases/tag/v2.31.0
+    assert requests.__version__ >= "2.31.0"
+
+
+def test_pandas():
+    # Security issue: https://github.com/advisories/GHSA-cmm9-mgm5-9r42
+    assert pd.__version__ >= "1.0.3"
+
+
+@pytest.mark.gpu
+def test_tensorflow():
+    assert tf.__version__ >= "2.6.0"

From 8b61faff0eb1ecb6612739c630c479edab281e09 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:01:52 +0200
Subject: [PATCH 05/43] Security tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 setup.py                                   |  2 +-
 tests/ci/azureml_tests/test_groups.py      |  5 +++++
 tests/security/test_dependency_security.py | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index e20f6080c4..04c0dad111 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@
         "tensorflow~=2.6.1;python_version=='3.6'",
         "tensorflow~=2.7.0;python_version>='3.7'",
         "tf-slim>=1.1.0",
-        "torch>=1.8",  # for CUDA 11 support
+        "torch>=1.13.1",  # for CUDA 11 support
         "fastai>=1.0.46,<2",
     ],
     "spark": [
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 53f6064471..76849f0429 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -184,6 +184,9 @@
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_recall",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_auc",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_logloss",
+        "tests/security/test_dependency_security.py::test_requests",
+        "tests/security/test_dependency_security.py::test_numpy",
+        "tests/security/test_dependency_security.py::test_pandas",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
@@ -286,6 +289,8 @@
         "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
+        "tests/security/test_dependency_security.py::test_tensorflow",
+        "tests/security/test_dependency_security.py::test_torch",
     ],
     "group_notebooks_gpu_001": [  # Total group time: 563.35s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
diff --git a/tests/security/test_dependency_security.py b/tests/security/test_dependency_security.py
index bfbdcae258..d706f94718 100644
--- a/tests/security/test_dependency_security.py
+++ b/tests/security/test_dependency_security.py
@@ -4,10 +4,12 @@
 
 import pytest
 import requests
+import numpy as np
 import pandas as pd
 
 try:
     import tensorflow as tf
+    import torch
 except ImportError:
     pass  # skip this import if we are in cpu environment
 
@@ -17,6 +19,11 @@ def test_requests():
     assert requests.__version__ >= "2.31.0"
 
 
+def test_numpy():
+    # Security issue: https://github.com/advisories/GHSA-frgw-fgh6-9g52
+    assert np.__version__ >= "1.13.3"
+
+
 def test_pandas():
     # Security issue: https://github.com/advisories/GHSA-cmm9-mgm5-9r42
     assert pd.__version__ >= "1.0.3"
@@ -24,4 +31,12 @@ def test_pandas():
 
 @pytest.mark.gpu
 def test_tensorflow():
-    assert tf.__version__ >= "2.6.0"
+    # Security issue: https://github.com/advisories/GHSA-w5gh-2wr2-pm6g
+    # Security issue: https://github.com/advisories/GHSA-r6jx-9g48-2r5r
+    assert tf.__version__ >= "2.5.1"
+
+
+@pytest.mark.gpu
+def test_torch():
+    # Security issue: https://github.com/advisories/GHSA-47fc-vmwq-366v
+    assert torch.__version__ >= "1.13.1"

From bb66531c97c6a4c31568c1a8df359d1ee374ee67 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:11:16 +0200
Subject: [PATCH 06/43] Regression tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/regression/test_compatibility_tf.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 tests/regression/test_compatibility_tf.py

diff --git a/tests/regression/test_compatibility_tf.py b/tests/regression/test_compatibility_tf.py
new file mode 100644
index 0000000000..c949f42312
--- /dev/null
+++ b/tests/regression/test_compatibility_tf.py
@@ -0,0 +1,14 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+
+
+@pytest.mark.gpu
+def test_compatibility_tf():
+    """Some of our code uses TF1 and some TF2. Here we just check that we
+    can import both versions.
+    """
+    import tensorflow as tf
+    from tensorflow.compat.v1 import placeholder

From 7bd6bd7628fa102578533972ba4fef32a8663e13 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:16:10 +0200
Subject: [PATCH 07/43] Regression tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 76849f0429..49c9b7371b 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -291,6 +291,7 @@
         "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
         "tests/security/test_dependency_security.py::test_tensorflow",
         "tests/security/test_dependency_security.py::test_torch",
+        "tests/regression/test_compatibility_tf.py",
     ],
     "group_notebooks_gpu_001": [  # Total group time: 563.35s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)

From e483b62f5130bd9abf28dc175fde36b53857bc5d Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:32:05 +0200
Subject: [PATCH 08/43] Criteo responsible AI

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py             |  1 +
 .../recommenders/datasets/test_criteo_privacy.py  | 15 +++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 49c9b7371b..3900e4e113 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -187,6 +187,7 @@
         "tests/security/test_dependency_security.py::test_requests",
         "tests/security/test_dependency_security.py::test_numpy",
         "tests/security/test_dependency_security.py::test_pandas",
+        "test/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
diff --git a/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py b/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py
new file mode 100644
index 0000000000..61dfc22956
--- /dev/null
+++ b/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py
@@ -0,0 +1,15 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pandas as pd
+
+from recommenders.datasets import criteo
+
+
+def test_criteo_privacy(criteo_first_row):
+    """Check that there are no privacy concerns. In Criteo, we check that the
+    data is anonymized.
+    """
+    df = criteo.load_pandas_df(size="sample")
+    assert df.loc[0].equals(pd.Series(criteo_first_row))

From f985b66f651551d42e9bfcb54bc7c3ac999d7aa5 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:42:41 +0200
Subject: [PATCH 09/43] Movielens responsible AI

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 .../datasets/test_movielens_privacy.py             | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py

diff --git a/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py b/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py
new file mode 100644
index 0000000000..dd6a16ccc6
--- /dev/null
+++ b/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py
@@ -0,0 +1,14 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+from recommenders.datasets import movielens
+
+
+def test_movielens_privacy():
+    """Check that there are no privacy concerns. In Movielens, we check that all the
+    userID are numbers.
+    """
+    df = movielens.load_pandas_df(size="100k")
+    users = df["userID"].values.tolist()
+    assert all(isinstance(x, int) for x in users)

From d6e5dbd45cae95db86eca2e8830dbfaf306f2a9d Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 15:44:36 +0200
Subject: [PATCH 10/43] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 3900e4e113..de35f7334e 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -188,6 +188,7 @@
         "tests/security/test_dependency_security.py::test_numpy",
         "tests/security/test_dependency_security.py::test_pandas",
         "test/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
+        "test/responsible_ai/recommenders/datasets/test_movielens_privacy.py",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",

From 0ddd3f9e0a57a87c54b906b1f758bc6db82c15af Mon Sep 17 00:00:00 2001
From: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
Date: Fri, 15 Sep 2023 19:10:27 +0200
Subject: [PATCH 11/43] Forgot s

---
 tests/ci/azureml_tests/test_groups.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index de35f7334e..8fd37eea76 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -187,8 +187,8 @@
         "tests/security/test_dependency_security.py::test_requests",
         "tests/security/test_dependency_security.py::test_numpy",
         "tests/security/test_dependency_security.py::test_pandas",
-        "test/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
-        "test/responsible_ai/recommenders/datasets/test_movielens_privacy.py",
+        "tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
+        "tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",

From 46c6ffc46a69c7f025440c49fea6677e83a704ef Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 09:09:36 +0200
Subject: [PATCH 12/43] criteo

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py                |  8 ++++----
 .../recommenders/datasets}/test_criteo.py            | 12 ++++--------
 2 files changed, 8 insertions(+), 12 deletions(-)
 rename tests/{smoke/recommenders/dataset => data_validation/recommenders/datasets}/test_criteo.py (78%)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 8fd37eea76..edea1e261d 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -48,9 +48,9 @@
         "tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
     ],
     "group_cpu_003": [  # Total group time: 2253s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_download_criteo",  # 1.05s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_extract_criteo",  # 1.22s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_criteo_load_pandas_df",  # 1.73s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_download_criteo_sample",  # 1.05s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_extract_criteo_sample",  # 1.22s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_pandas_df_sample",  # 1.73s
         "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df",  # 1368.63s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
@@ -141,7 +141,7 @@
         "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
         #
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df",  # 6.83s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df_sample",  # 6.83s
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
         "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df",  # 374.64s
         #
diff --git a/tests/smoke/recommenders/dataset/test_criteo.py b/tests/data_validation/recommenders/datasets/test_criteo.py
similarity index 78%
rename from tests/smoke/recommenders/dataset/test_criteo.py
rename to tests/data_validation/recommenders/datasets/test_criteo.py
index ad92e52f45..7adc54c4d5 100644
--- a/tests/smoke/recommenders/dataset/test_criteo.py
+++ b/tests/data_validation/recommenders/datasets/test_criteo.py
@@ -7,17 +7,15 @@
 from recommenders.datasets import criteo
 
 
-@pytest.mark.smoke
-def test_criteo_load_pandas_df(criteo_first_row):
+def test_criteo_load_pandas_df_sample(criteo_first_row):
     df = criteo.load_pandas_df(size="sample")
     assert df.shape[0] == 100000
     assert df.shape[1] == 40
     assert df.loc[0].equals(pd.Series(criteo_first_row))
 
 
-@pytest.mark.smoke
 @pytest.mark.spark
-def test_criteo_load_spark_df(spark, criteo_first_row):
+def test_criteo_load_spark_df_sample(spark, criteo_first_row):
     df = criteo.load_spark_df(spark, size="sample")
     assert df.count() == 100000
     assert len(df.columns) == 40
@@ -25,15 +23,13 @@ def test_criteo_load_spark_df(spark, criteo_first_row):
     assert first_row == criteo_first_row
 
 
-@pytest.mark.smoke
-def test_download_criteo(tmp_path):
+def test_download_criteo_sample(tmp_path):
     filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
     statinfo = os.stat(filepath)
     assert statinfo.st_size == 8787154
 
 
-@pytest.mark.smoke
-def test_extract_criteo(tmp_path):
+def test_extract_criteo_sample(tmp_path):
     filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
     filename = criteo.extract_criteo(size="sample", compressed_file=filepath)
     statinfo = os.stat(filename)

From 5d3b1ab3332151461c97c2c9660424a70b2eab70 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 09:18:53 +0200
Subject: [PATCH 13/43] criteo

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  4 +-
 .../recommenders/datasets/test_criteo.py      | 44 ++++++++++++++-----
 .../recommenders/datasets/test_criteo.py      | 29 ------------
 3 files changed, 35 insertions(+), 42 deletions(-)
 delete mode 100644 tests/integration/recommenders/datasets/test_criteo.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index edea1e261d..63b3cd7bbf 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -51,7 +51,7 @@
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_download_criteo_sample",  # 1.05s
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_extract_criteo_sample",  # 1.22s
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_pandas_df_sample",  # 1.73s
-        "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df",  # 1368.63s
+        "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df_full",  # 1368.63s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
         "tests/integration/examples/test_notebooks_python.py::test_sar_single_node_integration",  # 57.67s + 808.83s
@@ -142,8 +142,8 @@
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
         #
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df_sample",  # 6.83s
+        "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df_full",  # 374.64s
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
-        "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df",  # 374.64s
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke",  # 49.53s
         "tests/integration/examples/test_notebooks_pyspark.py::test_als_pyspark_integration",  # 110.58s
diff --git a/tests/data_validation/recommenders/datasets/test_criteo.py b/tests/data_validation/recommenders/datasets/test_criteo.py
index 7adc54c4d5..11666c4983 100644
--- a/tests/data_validation/recommenders/datasets/test_criteo.py
+++ b/tests/data_validation/recommenders/datasets/test_criteo.py
@@ -2,11 +2,26 @@
 # Licensed under the MIT License.
 
 import os
+import gc
 import pytest
 import pandas as pd
+
 from recommenders.datasets import criteo
 
 
+def test_download_criteo_sample(tmp_path):
+    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
+    statinfo = os.stat(filepath)
+    assert statinfo.st_size == 8787154
+
+
+def test_extract_criteo_sample(tmp_path):
+    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
+    filename = criteo.extract_criteo(size="sample", compressed_file=filepath)
+    statinfo = os.stat(filename)
+    assert statinfo.st_size == 24328072
+
+
 def test_criteo_load_pandas_df_sample(criteo_first_row):
     df = criteo.load_pandas_df(size="sample")
     assert df.shape[0] == 100000
@@ -14,6 +29,15 @@ def test_criteo_load_pandas_df_sample(criteo_first_row):
     assert df.loc[0].equals(pd.Series(criteo_first_row))
 
 
+def test_criteo_load_pandas_df_full(criteo_first_row):
+    df = criteo.load_pandas_df(size="full")
+    assert df.shape[0] == 45840617
+    assert df.shape[1] == 40
+    assert df.loc[0].equals(pd.Series(criteo_first_row))
+    del df
+    gc.collect()
+
+
 @pytest.mark.spark
 def test_criteo_load_spark_df_sample(spark, criteo_first_row):
     df = criteo.load_spark_df(spark, size="sample")
@@ -23,14 +47,12 @@ def test_criteo_load_spark_df_sample(spark, criteo_first_row):
     assert first_row == criteo_first_row
 
 
-def test_download_criteo_sample(tmp_path):
-    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
-    statinfo = os.stat(filepath)
-    assert statinfo.st_size == 8787154
-
-
-def test_extract_criteo_sample(tmp_path):
-    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
-    filename = criteo.extract_criteo(size="sample", compressed_file=filepath)
-    statinfo = os.stat(filename)
-    assert statinfo.st_size == 24328072
+@pytest.mark.spark
+def test_criteo_load_spark_df_full(spark, criteo_first_row):
+    df = criteo.load_spark_df(spark, size="full")
+    assert df.count() == 45840617
+    assert len(df.columns) == 40
+    first_row = df.limit(1).collect()[0].asDict()
+    assert first_row == criteo_first_row
+    del df
+    gc.collect()
diff --git a/tests/integration/recommenders/datasets/test_criteo.py b/tests/integration/recommenders/datasets/test_criteo.py
deleted file mode 100644
index bba0f1f1e6..0000000000
--- a/tests/integration/recommenders/datasets/test_criteo.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import pytest
-import pandas as pd
-from recommenders.datasets import criteo
-import gc
-
-
-@pytest.mark.integration
-def test_criteo_load_pandas_df(criteo_first_row):
-    df = criteo.load_pandas_df(size="full")
-    assert df.shape[0] == 45840617
-    assert df.shape[1] == 40
-    assert df.loc[0].equals(pd.Series(criteo_first_row))
-    del df
-    gc.collect()
-
-
-@pytest.mark.spark
-@pytest.mark.integration
-def test_criteo_load_spark_df(spark, criteo_first_row):
-    df = criteo.load_spark_df(spark, size="full")
-    assert df.count() == 45840617
-    assert len(df.columns) == 40
-    first_row = df.limit(1).collect()[0].asDict()
-    assert first_row == criteo_first_row
-    del df
-    gc.collect()

From 79de76fe7b2b175282910cb91de30450489e3d5a Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 10:19:41 +0200
Subject: [PATCH 14/43] mind

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  15 +-
 tests/data_validation/examples/test_mind.py   |  37 +++++
 .../recommenders/datasets/test_mind.py        | 157 ++++++++++++++++++
 .../recommenders/datasets/test_mind.py        |  65 --------
 tests/smoke/examples/test_notebooks_python.py |  12 --
 tests/smoke/recommenders/dataset/test_mind.py | 108 ------------
 6 files changed, 203 insertions(+), 191 deletions(-)
 create mode 100644 tests/data_validation/examples/test_mind.py
 create mode 100644 tests/data_validation/recommenders/datasets/test_mind.py
 delete mode 100644 tests/integration/recommenders/datasets/test_mind.py
 delete mode 100644 tests/smoke/recommenders/dataset/test_mind.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 63b3cd7bbf..c90bc31ffb 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -24,12 +24,15 @@
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.59s + 3.59s + 8.44s
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 37.33s + 352.99s + 673.61s
         #
-        "tests/smoke/recommenders/dataset/test_mind.py::test_mind_url",  # 0.38s
-        "tests/smoke/recommenders/dataset/test_mind.py::test_extract_mind",  # 10.23s
-        "tests/smoke/examples/test_notebooks_python.py::test_mind_utils",  # 219.77s
-        "tests/integration/recommenders/datasets/test_mind.py::test_download_mind",  # 37.63s
-        "tests/integration/recommenders/datasets/test_mind.py::test_extract_mind",  # 56.30s
-        "tests/integration/recommenders/datasets/test_mind.py::test_mind_utils_integration",  # 219.26s
+        "tests/data_validation/recommenders/dataset/test_mind.py::test_mind_url",  # 0.38s
+        "tests/data_validation/recommenders/dataset/test_mind.py::test_download_mind_demo",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_demo",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_small",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_small",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_large",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_large",
+        "tests/data_validation/examples/test_mind.py::test_mind_utils_runs",  # 219.77s
+        "tests/data_validation/examples/test_mind.py::test_mind_utils_values",  # 219.26s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_lightgbm_quickstart_smoke",  # 46.42s
         #
diff --git a/tests/data_validation/examples/test_mind.py b/tests/data_validation/examples/test_mind.py
new file mode 100644
index 0000000000..e03162bf9b
--- /dev/null
+++ b/tests/data_validation/examples/test_mind.py
@@ -0,0 +1,37 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+
+def test_mind_utils_runs(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["mind_utils"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(mind_type="small", word_embedding_dim=300),
+    )
+
+
+def test_mind_utils_values(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["mind_utils"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(mind_type="small", word_embedding_dim=300),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["utils_state"]["vert_num"] == 17
+    assert results["utils_state"]["subvert_num"] == 17
+    assert results["utils_state"]["word_num"] == 23404
+    assert results["utils_state"]["word_num_all"] == 41074
+    assert results["utils_state"]["embedding_exist_num"] == 22408
+    assert results["utils_state"]["embedding_exist_num_all"] == 37634
+    assert results["utils_state"]["uid2index"] == 5000
diff --git a/tests/data_validation/recommenders/datasets/test_mind.py b/tests/data_validation/recommenders/datasets/test_mind.py
new file mode 100644
index 0000000000..944b65df9d
--- /dev/null
+++ b/tests/data_validation/recommenders/datasets/test_mind.py
@@ -0,0 +1,157 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import os
+import pytest
+import requests
+import papermill as pm
+import scrapbook as sb
+
+from recommenders.datasets.mind import download_mind, extract_mind
+
+
+@pytest.mark.parametrize(
+    "url, content_length, etag",
+    [
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
+            "17372879",
+            '"0x8D8B8AD5B233930"',
+        ),  # NOTE: the z20 blob returns the etag with ""
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
+            "10080022",
+            '"0x8D8B8AD5B188839"',
+        ),
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
+            "97292694",
+            '"0x8D8B8AD5B126C3B"',
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
+            "52952752",
+            "0x8D834F2EB31BDEC",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
+            "30945572",
+            "0x8D834F2EBA8D865",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
+            "155178106",
+            "0x8D87F67F4AEB960",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
+            "530196631",
+            "0x8D8244E90C15C07",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
+            "103456245",
+            "0x8D8244E92005849",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
+            "150359301",
+            "0x8D87F67E6CA4364",
+        ),
+    ],
+)
+def test_mind_url(url, content_length, etag):
+    url_headers = requests.head(url).headers
+    assert url_headers["Content-Length"] == content_length
+    assert url_headers["ETag"] == etag
+
+
+def test_download_mind_demo(tmp):
+    train_path, valid_path = download_mind(size="demo", dest_path=tmp)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 17372879
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 10080022
+
+
+def test_download_mind_small(tmp):
+    train_path, valid_path = download_mind(size="small", dest_path=tmp)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 52952752
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 30945572
+
+
+def test_extract_mind_demo(tmp):
+    train_zip, valid_zip = download_mind(size="demo", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 14707247
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 16077470
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 23120370
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 4434762
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 11591565
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 15624320
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+
+def test_extract_mind_small(tmp):
+    train_zip, valid_zip = download_mind(size="small", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 92019716
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 25811015
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 41202121
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 42838544
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 21960998
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 33519092
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+
+def test_download_mind_large(tmp_path):
+    train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 530196631
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 103456245
+
+
+def test_extract_mind_large(tmp):
+    train_zip, valid_zip = download_mind(size="large", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 1373844151
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 40305151
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 84881998
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 230662527
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 31958202
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 59055351
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
diff --git a/tests/integration/recommenders/datasets/test_mind.py b/tests/integration/recommenders/datasets/test_mind.py
deleted file mode 100644
index 17953c4678..0000000000
--- a/tests/integration/recommenders/datasets/test_mind.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-import papermill as pm
-import scrapbook as sb
-
-from recommenders.datasets.mind import download_mind, extract_mind
-
-
-@pytest.mark.integration
-def test_download_mind(tmp_path):
-    train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
-    statinfo = os.stat(train_path)
-    assert statinfo.st_size == 530196631
-    statinfo = os.stat(valid_path)
-    assert statinfo.st_size == 103456245
-
-
-@pytest.mark.integration
-def test_extract_mind(tmp):
-    train_zip, valid_zip = download_mind(size="large", dest_path=tmp)
-    train_path, valid_path = extract_mind(train_zip, valid_zip)
-
-    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-    assert statinfo.st_size == 1373844151
-    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-    assert statinfo.st_size == 40305151
-    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-    assert statinfo.st_size == 84881998
-    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-    assert statinfo.st_size == 1044588
-
-    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-    assert statinfo.st_size == 230662527
-    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-    assert statinfo.st_size == 31958202
-    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-    assert statinfo.st_size == 59055351
-    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-    assert statinfo.st_size == 1044588
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-def test_mind_utils_integration(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["mind_utils"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(mind_type="small", word_embedding_dim=300),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["utils_state"]["vert_num"] == 17
-    assert results["utils_state"]["subvert_num"] == 17
-    assert results["utils_state"]["word_num"] == 23404
-    assert results["utils_state"]["word_num_all"] == 41074
-    assert results["utils_state"]["embedding_exist_num"] == 22408
-    assert results["utils_state"]["embedding_exist_num_all"] == 37634
-    assert results["utils_state"]["uid2index"] == 5000
diff --git a/tests/smoke/examples/test_notebooks_python.py b/tests/smoke/examples/test_notebooks_python.py
index 4673d71c8b..b30c46524e 100644
--- a/tests/smoke/examples/test_notebooks_python.py
+++ b/tests/smoke/examples/test_notebooks_python.py
@@ -156,15 +156,3 @@ def test_cornac_bpr_smoke(notebooks, output_notebook, kernel_name):
     assert results["ndcg"] == pytest.approx(0.4034, rel=TOL, abs=ABS_TOL)
     assert results["precision"] == pytest.approx(0.3550, rel=TOL, abs=ABS_TOL)
     assert results["recall"] == pytest.approx(0.1802, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.smoke
-@pytest.mark.notebooks
-def test_mind_utils(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["mind_utils"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(mind_type="small", word_embedding_dim=300),
-    )
diff --git a/tests/smoke/recommenders/dataset/test_mind.py b/tests/smoke/recommenders/dataset/test_mind.py
deleted file mode 100644
index 332815128a..0000000000
--- a/tests/smoke/recommenders/dataset/test_mind.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import pytest
-import os
-import requests
-from recommenders.datasets.mind import download_mind, extract_mind
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "url, content_length, etag",
-    [
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
-            "17372879",
-            '"0x8D8B8AD5B233930"',
-        ),  # NOTE: the z20 blob returns the etag with ""
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
-            "10080022",
-            '"0x8D8B8AD5B188839"',
-        ),
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
-            "97292694",
-            '"0x8D8B8AD5B126C3B"',
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
-            "52952752",
-            "0x8D834F2EB31BDEC",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
-            "30945572",
-            "0x8D834F2EBA8D865",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
-            "155178106",
-            "0x8D87F67F4AEB960",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
-            "530196631",
-            "0x8D8244E90C15C07",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
-            "103456245",
-            "0x8D8244E92005849",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
-            "150359301",
-            "0x8D87F67E6CA4364",
-        ),
-    ],
-)
-def test_mind_url(url, content_length, etag):
-    url_headers = requests.head(url).headers
-    assert url_headers["Content-Length"] == content_length
-    assert url_headers["ETag"] == etag
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize("size", [("demo"), ("small")])
-def test_extract_mind(size, tmp):
-    train_zip, valid_zip = download_mind(size, dest_path=tmp)
-    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
-
-    if size == "demo":
-        statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-        assert statinfo.st_size == 14707247
-        statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 16077470
-        statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-        assert statinfo.st_size == 23120370
-        statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-        statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-        assert statinfo.st_size == 4434762
-        statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 11591565
-        statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-        assert statinfo.st_size == 15624320
-        statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-    elif size == "small":
-        statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-        assert statinfo.st_size == 92019716
-        statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 25811015
-        statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-        assert statinfo.st_size == 41202121
-        statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-        statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-        assert statinfo.st_size == 42838544
-        statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 21960998
-        statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-        assert statinfo.st_size == 33519092
-        statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-    else:
-        assert False

From abaa152d1d3180d09cea165426250cdcf28f57f4 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 10:23:59 +0200
Subject: [PATCH 15/43] movielens

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 .../recommenders/datasets/test_mind.py                |  2 --
 .../recommenders/datasets/test_movielens.py           | 11 +++++++----
 .../recommenders/datasets/test_wikidata.py            |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/data_validation/recommenders/datasets/test_mind.py b/tests/data_validation/recommenders/datasets/test_mind.py
index 944b65df9d..d4f5f8c1f4 100644
--- a/tests/data_validation/recommenders/datasets/test_mind.py
+++ b/tests/data_validation/recommenders/datasets/test_mind.py
@@ -4,8 +4,6 @@
 import os
 import pytest
 import requests
-import papermill as pm
-import scrapbook as sb
 
 from recommenders.datasets.mind import download_mind, extract_mind
 
diff --git a/tests/data_validation/recommenders/datasets/test_movielens.py b/tests/data_validation/recommenders/datasets/test_movielens.py
index 81ba7e02ca..555981b1b5 100644
--- a/tests/data_validation/recommenders/datasets/test_movielens.py
+++ b/tests/data_validation/recommenders/datasets/test_movielens.py
@@ -1,5 +1,11 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
 import os
 import pandas
+from pandas.core.series import Series
+from pytest_mock import MockerFixture
 import pytest
 
 from recommenders.datasets.movielens import MockMovielensSchema
@@ -9,13 +15,10 @@
     MOCK_DATA_FORMAT,
     DEFAULT_HEADER,
     DEFAULT_ITEM_COL,
-    DEFAULT_USER_COL
+    DEFAULT_USER_COL,
 )
 from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 
-from pandas.core.series import Series
-from pytest_mock import MockerFixture
-
 
 @pytest.mark.parametrize("size", [10, 100])
 def test_mock_movielens_schema__has_default_col_names(size):
diff --git a/tests/data_validation/recommenders/datasets/test_wikidata.py b/tests/data_validation/recommenders/datasets/test_wikidata.py
index 2162e7a495..1df27671ff 100644
--- a/tests/data_validation/recommenders/datasets/test_wikidata.py
+++ b/tests/data_validation/recommenders/datasets/test_wikidata.py
@@ -1,8 +1,8 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
 
+import pytest
 
 from recommenders.datasets.wikidata import (
     find_wikidata_id,

From da9dffc4e5ba87dafc8fbcfb36278407995af425 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 10:50:02 +0200
Subject: [PATCH 16/43] movielens WIP

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 16 ++--
 .../recommenders/datasets/test_movielens.py   | 95 +++++++++++++------
 .../recommenders/datasets/test_movielens.py   | 20 ----
 .../recommenders/dataset/test_movielens.py    | 30 ------
 4 files changed, 74 insertions(+), 87 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index c90bc31ffb..467bbb7f17 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -15,12 +15,12 @@
 
 nightly_test_groups = {
     "group_cpu_001": [  # Total group time: 1883s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_download_and_extract_movielens",  # 0.45s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_download_and_extract_movielens",  # 0.45s + 0.61s + 3.47s + 8.28s
+        #
         "tests/smoke/recommenders/dataset/test_movielens.py::test_load_item_df",  # 0.47s
         "tests/smoke/recommenders/dataset/test_movielens.py::test_load_pandas_df",  # 2.45s
         #
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 16.87s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_download_and_extract_movielens",  # 0.61s + 3.47s + 8.28s
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.59s + 3.59s + 8.44s
         "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 37.33s + 352.99s + 673.61s
         #
@@ -156,11 +156,13 @@
 
 pr_gate_test_groups = {
     "group_cpu_001": [  # Total group time: 525.96s
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_invalid_param__return_failure",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_data__no_name_collision",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
         "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
@@ -205,11 +207,11 @@
         "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
     ],
     "group_spark_001": [  # Total group time: 270.41s
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
-        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_stratified_splitter",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_chrono_splitter",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_diversity_item_feature_vector",
diff --git a/tests/data_validation/recommenders/datasets/test_movielens.py b/tests/data_validation/recommenders/datasets/test_movielens.py
index 555981b1b5..d31e7679f9 100644
--- a/tests/data_validation/recommenders/datasets/test_movielens.py
+++ b/tests/data_validation/recommenders/datasets/test_movielens.py
@@ -5,11 +5,27 @@
 import os
 import pandas
 from pandas.core.series import Series
-from pytest_mock import MockerFixture
 import pytest
+from pytest_mock import MockerFixture
+
+try:
+    from pyspark.sql.types import (
+        StructType,
+        StructField,
+        IntegerType,
+    )
+    from pyspark.sql.functions import col
+except ImportError:
+    pass  # skip this import if we are in pure python environment
 
 from recommenders.datasets.movielens import MockMovielensSchema
-from recommenders.datasets.movielens import load_pandas_df, load_spark_df
+from recommenders.datasets.movielens import (
+    load_pandas_df,
+    load_spark_df,
+    load_item_df,
+    download_movielens,
+    extract_movielens,
+)
 from recommenders.datasets.movielens import (
     DATA_FORMAT,
     MOCK_DATA_FORMAT,
@@ -66,6 +82,53 @@ def test_mock_movielens_schema__get_df__return_success(
         assert len(df[DEFAULT_GENRE_COL]) == size
 
 
+def test_mock_movielens_data__no_name_collision():
+    """
+    Making sure that no common names are shared between the mock and real dataset sizes
+    """
+    dataset_name = set(DATA_FORMAT.keys())
+    dataset_name_mock = set(MOCK_DATA_FORMAT.keys())
+    collision = dataset_name.intersection(dataset_name_mock)
+    assert not collision
+
+
+def test_load_pandas_df_mock_100__with_default_param__succeed():
+    df = load_pandas_df("mock100")
+    assert type(df) == pandas.DataFrame
+    assert len(df) == 100
+    assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()
+
+
+def test_load_pandas_df_mock_100__with_custom_param__succeed():
+    df = load_pandas_df(
+        "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
+    )
+    assert type(df[DEFAULT_TITLE_COL]) == Series
+    assert type(df[DEFAULT_GENRE_COL]) == Series
+    assert len(df) == 100
+    assert "|" in df.loc[0, DEFAULT_GENRE_COL]
+    assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
+
+
+@pytest.mark.parametrize("size", ["100k", "1m", "10m", "20m"])
+def test_download_and_extract_movielens(size, tmp):
+    """Test movielens data download and extract"""
+    zip_path = os.path.join(tmp, "ml.zip")
+    download_movielens(size, dest_path=zip_path)
+    assert len(os.listdir(tmp)) == 1
+    assert os.path.exists(zip_path)
+
+    rating_path = os.path.join(tmp, "rating.dat")
+    item_path = os.path.join(tmp, "item.dat")
+    extract_movielens(
+        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
+    )
+    # Test if raw-zip file, rating file, and item file are cached
+    assert len(os.listdir(tmp)) == 3
+    assert os.path.exists(rating_path)
+    assert os.path.exists(item_path)
+
+
 @pytest.mark.spark
 @pytest.mark.parametrize("keep_genre_col", [True, False])
 @pytest.mark.parametrize("keep_title_col", [True, False])
@@ -109,29 +172,12 @@ def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(
     assert df.count() == data_size
 
 
-def test_mock_movielens_data__no_name_collision():
-    """
-    Making sure that no common names are shared between the mock and real dataset sizes
-    """
-    dataset_name = set(DATA_FORMAT.keys())
-    dataset_name_mock = set(MOCK_DATA_FORMAT.keys())
-    collision = dataset_name.intersection(dataset_name_mock)
-    assert not collision
-
-
 @pytest.mark.spark
 def test_load_spark_df_mock_100__with_default_param__succeed(spark):
     df = load_spark_df(spark, "mock100")
     assert df.count() == 100
 
 
-def test_load_pandas_df_mock_100__with_default_param__succeed():
-    df = load_pandas_df("mock100")
-    assert type(df) == pandas.DataFrame
-    assert len(df) == 100
-    assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()
-
-
 @pytest.mark.spark
 def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
     df = load_spark_df(
@@ -142,14 +188,3 @@ def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
     assert df.count() == 100
     assert "|" in df.take(1)[0][DEFAULT_GENRE_COL]
     assert df.take(1)[0][DEFAULT_TITLE_COL] == "foo"
-
-
-def test_load_pandas_df_mock_100__with_custom_param__succeed():
-    df = load_pandas_df(
-        "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
-    )
-    assert type(df[DEFAULT_TITLE_COL]) == Series
-    assert type(df[DEFAULT_GENRE_COL]) == Series
-    assert len(df) == 100
-    assert "|" in df.loc[0, DEFAULT_GENRE_COL]
-    assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
diff --git a/tests/integration/recommenders/datasets/test_movielens.py b/tests/integration/recommenders/datasets/test_movielens.py
index 886418ee23..11dc5388b7 100644
--- a/tests/integration/recommenders/datasets/test_movielens.py
+++ b/tests/integration/recommenders/datasets/test_movielens.py
@@ -271,23 +271,3 @@ def test_load_spark_df(
     assert len(df.columns) == 4
     del df
     gc.collect()
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize("size", ["1m", "10m", "20m"])
-def test_download_and_extract_movielens(size, tmp):
-    """Test movielens data download and extract"""
-    zip_path = os.path.join(tmp, "ml.zip")
-    download_movielens(size, dest_path=zip_path)
-    assert len(os.listdir(tmp)) == 1
-    assert os.path.exists(zip_path)
-
-    rating_path = os.path.join(tmp, "rating.dat")
-    item_path = os.path.join(tmp, "item.dat")
-    extract_movielens(
-        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
-    )
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-    assert os.path.exists(rating_path)
-    assert os.path.exists(item_path)
diff --git a/tests/smoke/recommenders/dataset/test_movielens.py b/tests/smoke/recommenders/dataset/test_movielens.py
index 0649c2ab25..393e94f5de 100644
--- a/tests/smoke/recommenders/dataset/test_movielens.py
+++ b/tests/smoke/recommenders/dataset/test_movielens.py
@@ -11,16 +11,6 @@
     extract_movielens,
 )
 
-try:
-    from pyspark.sql.types import (
-        StructType,
-        StructField,
-        IntegerType,
-    )
-    from pyspark.sql.functions import col
-except ImportError:
-    pass  # skip this import if we are in pure python environment
-
 
 @pytest.mark.smoke
 @pytest.mark.parametrize(
@@ -210,23 +200,3 @@ def test_load_spark_df(
     assert df.count() == num_samples
     # user, item, rating and timestamp
     assert len(df.columns) == 4
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize("size", ["100k"])
-def test_download_and_extract_movielens(size, tmp):
-    """Test movielens data download and extract"""
-    zip_path = os.path.join(tmp, "ml.zip")
-    download_movielens(size, dest_path=zip_path)
-    assert len(os.listdir(tmp)) == 1
-    assert os.path.exists(zip_path)
-
-    rating_path = os.path.join(tmp, "rating.dat")
-    item_path = os.path.join(tmp, "item.dat")
-    extract_movielens(
-        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
-    )
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-    assert os.path.exists(rating_path)
-    assert os.path.exists(item_path)

From 51bca98b6f8e60840ff4515486645ff79d8f3702 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 17:58:17 +0200
Subject: [PATCH 17/43] movielens

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  16 +-
 .../recommenders/datasets/test_movielens.py   | 269 ++++++++++++++++-
 tests/integration/recommenders/__init__.py    |   1 -
 .../recommenders/datasets/__init__.py         |   1 -
 .../recommenders/datasets/test_movielens.py   | 273 ------------------
 tests/smoke/recommenders/dataset/__init__.py  |   1 -
 .../recommenders/dataset/test_movielens.py    | 202 -------------
 7 files changed, 273 insertions(+), 490 deletions(-)
 delete mode 100644 tests/integration/recommenders/__init__.py
 delete mode 100644 tests/integration/recommenders/datasets/__init__.py
 delete mode 100644 tests/integration/recommenders/datasets/test_movielens.py
 delete mode 100644 tests/smoke/recommenders/dataset/__init__.py
 delete mode 100644 tests/smoke/recommenders/dataset/test_movielens.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 467bbb7f17..660991897f 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -16,16 +16,11 @@
 nightly_test_groups = {
     "group_cpu_001": [  # Total group time: 1883s
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_download_and_extract_movielens",  # 0.45s + 0.61s + 3.47s + 8.28s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.47s + 0.59s + 3.59s + 8.44s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 16.87s + 37.33s + 352.99s + 673.61s
         #
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_item_df",  # 0.47s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_pandas_df",  # 2.45s
-        #
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 16.87s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.59s + 3.59s + 8.44s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 37.33s + 352.99s + 673.61s
-        #
-        "tests/data_validation/recommenders/dataset/test_mind.py::test_mind_url",  # 0.38s
-        "tests/data_validation/recommenders/dataset/test_mind.py::test_download_mind_demo",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url",  # 0.38s
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_demo",
         "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_demo",
         "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_small",
         "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_small",
@@ -141,8 +136,7 @@
         "tests/smoke/recommenders/recommender/test_newsrec_model.py::test_model_lstur",  # 194.88s
     ],
     "group_spark_001": [  # Total group time: 987.16s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
         #
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df_sample",  # 6.83s
         "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df_full",  # 374.64s
diff --git a/tests/data_validation/recommenders/datasets/test_movielens.py b/tests/data_validation/recommenders/datasets/test_movielens.py
index d31e7679f9..5af7e9673f 100644
--- a/tests/data_validation/recommenders/datasets/test_movielens.py
+++ b/tests/data_validation/recommenders/datasets/test_movielens.py
@@ -3,6 +3,7 @@
 
 
 import os
+import gc
 import pandas
 from pandas.core.series import Series
 import pytest
@@ -18,6 +19,7 @@
 except ImportError:
     pass  # skip this import if we are in pure python environment
 
+from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 from recommenders.datasets.movielens import MockMovielensSchema
 from recommenders.datasets.movielens import (
     load_pandas_df,
@@ -33,7 +35,6 @@
     DEFAULT_ITEM_COL,
     DEFAULT_USER_COL,
 )
-from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 
 
 @pytest.mark.parametrize("size", [10, 100])
@@ -129,6 +130,161 @@ def test_download_and_extract_movielens(size, tmp):
     assert os.path.exists(item_path)
 
 
+@pytest.mark.parametrize(
+    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
+    [
+        (
+            "100k",
+            100000,
+            1682,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
+        (
+            "1m",
+            1000209,
+            3883,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
+        (
+            "10m",
+            10000054,
+            10681,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+        (
+            "20m",
+            20000263,
+            27278,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+    ],
+)
+def test_load_pandas_df(
+    size,
+    num_samples,
+    num_movies,
+    movie_example,
+    title_example,
+    genres_example,
+    year_example,
+    tmp,
+):
+    """Test MovieLens dataset load as pd.DataFrame"""
+    # Test if correct data are loaded
+    header = ["a", "b", "c"]
+    df = load_pandas_df(size=size, local_cache_path=tmp, header=header)
+    assert len(df) == num_samples
+    assert len(df.columns) == len(header)
+    # Test if raw-zip file, rating file, and item file are cached
+    assert len(os.listdir(tmp)) == 3
+
+    # Test title, genres, and released year load
+    header = ["a", "b", "c", "d", "e"]
+    with pytest.warns(Warning):
+        df = load_pandas_df(
+            size=size,
+            header=header,
+            local_cache_path=tmp,
+            title_col="Title",
+            genres_col="Genres",
+            year_col="Year",
+        )
+        assert len(df) == num_samples
+        assert (
+            len(df.columns) == 7
+        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
+        assert "e" not in df.columns  # only the first 4 header columns are used
+        # Get two records of the same items and check if the item-features are the same.
+        head = df.loc[df["b"] == movie_example][:2]
+        title = head["Title"].values
+        assert title[0] == title[1]
+        assert title[0] == title_example
+        genres = head["Genres"].values
+        assert genres[0] == genres[1]
+        assert genres[0] == genres_example
+        year = head["Year"].values
+        assert year[0] == year[1]
+        assert year[0] == year_example
+
+    # Test default arguments
+    df = load_pandas_df(size)
+    assert len(df) == num_samples
+    # user, item, rating and timestamp
+    assert len(df.columns) == 4
+    del df
+    gc.collect()
+
+
+@pytest.mark.parametrize(
+    "size, num_movies, movie_example, title_example, genres_example, year_example",
+    [
+        ("100k", 1682, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995"),
+        ("1m", 3883, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995"),
+        (
+            "10m",
+            10681,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+        (
+            "20m",
+            27278,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+    ],
+)
+def test_load_item_df(
+    size,
+    num_movies,
+    movie_example,
+    title_example,
+    genres_example,
+    year_example,
+    tmp,
+):
+    """Test movielens item data load (not rating data)"""
+    df = load_item_df(size, local_cache_path=tmp, title_col="title")
+    assert len(df) == num_movies
+    # movie_col and title_col should be loaded
+    assert len(df.columns) == 2
+    assert df["title"][0] == title_example
+
+    # Test title and genres
+    df = load_item_df(
+        size,
+        local_cache_path=tmp,
+        movie_col="item",
+        genres_col="genres",
+        year_col="year",
+    )
+    assert len(df) == num_movies
+    # movile_col, genres_col and year_col
+    assert len(df.columns) == 3
+
+    assert df["item"][0] == movie_example
+    assert df["genres"][0] == genres_example
+    assert df["year"][0] == year_example
+    del df
+    gc.collect()
+
+
 @pytest.mark.spark
 @pytest.mark.parametrize("keep_genre_col", [True, False])
 @pytest.mark.parametrize("keep_title_col", [True, False])
@@ -188,3 +344,114 @@ def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
     assert df.count() == 100
     assert "|" in df.take(1)[0][DEFAULT_GENRE_COL]
     assert df.take(1)[0][DEFAULT_TITLE_COL] == "foo"
+
+
+@pytest.mark.spark
+@pytest.mark.parametrize(
+    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
+    [
+        (
+            "100k",
+            100000,
+            1682,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
+        (
+            "1m",
+            1000209,
+            3883,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
+        (
+            "10m",
+            10000054,
+            10681,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+        (
+            "20m",
+            20000263,
+            27278,
+            1,
+            "Toy Story (1995)",
+            "Adventure|Animation|Children|Comedy|Fantasy",
+            "1995",
+        ),
+    ],
+)
+def test_load_spark_df(
+    size,
+    num_samples,
+    num_movies,
+    movie_example,
+    title_example,
+    genres_example,
+    year_example,
+    tmp,
+    spark,
+):
+    """Test MovieLens dataset load into pySpark.DataFrame"""
+
+    # Test if correct data are loaded
+    header = ["1", "2", "3"]
+    schema = StructType(
+        [
+            StructField("u", IntegerType()),
+            StructField("m", IntegerType()),
+        ]
+    )
+    with pytest.warns(Warning):
+        df = load_spark_df(
+            spark, size=size, local_cache_path=tmp, header=header, schema=schema
+        )
+        assert df.count() == num_samples
+        # Test if schema is used when both schema and header are provided
+        assert len(df.columns) == len(schema)
+        # Test if raw-zip file, rating file, and item file are cached
+        assert len(os.listdir(tmp)) == 3
+
+    # Test title, genres, and released year load
+    header = ["a", "b", "c", "d", "e"]
+    with pytest.warns(Warning):
+        df = load_spark_df(
+            spark,
+            size=size,
+            local_cache_path=tmp,
+            header=header,
+            title_col="Title",
+            genres_col="Genres",
+            year_col="Year",
+        )
+        assert df.count() == num_samples
+        assert (
+            len(df.columns) == 7
+        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
+        assert "e" not in df.columns  # only the first 4 header columns are used
+        # Get two records of the same items and check if the item-features are the same.
+        head = df.filter(col("b") == movie_example).limit(2)
+        title = head.select("Title").collect()
+        assert title[0][0] == title[1][0]
+        assert title[0][0] == title_example
+        genres = head.select("Genres").collect()
+        assert genres[0][0] == genres[1][0]
+        assert genres[0][0] == genres_example
+        year = head.select("Year").collect()
+        assert year[0][0] == year[1][0]
+        assert year[0][0] == year_example
+
+    # Test default arguments
+    df = load_spark_df(spark, size)
+    assert df.count() == num_samples
+    # user, item, rating and timestamp
+    assert len(df.columns) == 4
+    del df
+    gc.collect()
diff --git a/tests/integration/recommenders/__init__.py b/tests/integration/recommenders/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/recommenders/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/recommenders/datasets/__init__.py b/tests/integration/recommenders/datasets/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/recommenders/datasets/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/recommenders/datasets/test_movielens.py b/tests/integration/recommenders/datasets/test_movielens.py
deleted file mode 100644
index 11dc5388b7..0000000000
--- a/tests/integration/recommenders/datasets/test_movielens.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-from recommenders.datasets.movielens import (
-    load_pandas_df,
-    load_spark_df,
-    load_item_df,
-    download_movielens,
-    extract_movielens,
-)
-import gc
-
-try:
-    from pyspark.sql.types import (
-        StructType,
-        StructField,
-        IntegerType,
-    )
-    from pyspark.sql.functions import col
-except ImportError:
-    pass  # skip this import if we are in pure python environment
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "1m",
-            1000209,
-            3883,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        ),
-        (
-            "10m",
-            10000054,
-            10681,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-        (
-            "20m",
-            20000263,
-            27278,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-    ],
-)
-def test_load_pandas_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test MovieLens dataset load as pd.DataFrame"""
-    # Test if correct data are loaded
-    header = ["a", "b", "c"]
-    df = load_pandas_df(size=size, local_cache_path=tmp, header=header)
-    assert len(df) == num_samples
-    assert len(df.columns) == len(header)
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_pandas_df(
-            size=size,
-            header=header,
-            local_cache_path=tmp,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert len(df) == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.loc[df["b"] == movie_example][:2]
-        title = head["Title"].values
-        assert title[0] == title[1]
-        assert title[0] == title_example
-        genres = head["Genres"].values
-        assert genres[0] == genres[1]
-        assert genres[0] == genres_example
-        year = head["Year"].values
-        assert year[0] == year[1]
-        assert year[0] == year_example
-
-    # Test default arguments
-    df = load_pandas_df(size)
-    assert len(df) == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4
-    del df
-    gc.collect()
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        ("1m", 3883, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995"),
-        (
-            "10m",
-            10681,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-        (
-            "20m",
-            27278,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-    ],
-)
-def test_load_item_df(
-    size,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test movielens item data load (not rating data)"""
-    df = load_item_df(size, local_cache_path=tmp, title_col="title")
-    assert len(df) == num_movies
-    # movie_col and title_col should be loaded
-    assert len(df.columns) == 2
-    assert df["title"][0] == title_example
-
-    # Test title and genres
-    df = load_item_df(
-        size,
-        local_cache_path=tmp,
-        movie_col="item",
-        genres_col="genres",
-        year_col="year",
-    )
-    assert len(df) == num_movies
-    # movile_col, genres_col and year_col
-    assert len(df.columns) == 3
-
-    assert df["item"][0] == movie_example
-    assert df["genres"][0] == genres_example
-    assert df["year"][0] == year_example
-    del df
-    gc.collect()
-
-
-@pytest.mark.integration
-@pytest.mark.spark
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "1m",
-            1000209,
-            3883,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        ),
-        (
-            "10m",
-            10000054,
-            10681,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-        (
-            "20m",
-            20000263,
-            27278,
-            1,
-            "Toy Story (1995)",
-            "Adventure|Animation|Children|Comedy|Fantasy",
-            "1995",
-        ),
-    ],
-)
-def test_load_spark_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-    spark,
-):
-    """Test MovieLens dataset load into pySpark.DataFrame"""
-
-    # Test if correct data are loaded
-    header = ["1", "2", "3"]
-    schema = StructType(
-        [
-            StructField("u", IntegerType()),
-            StructField("m", IntegerType()),
-        ]
-    )
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark, size=size, local_cache_path=tmp, header=header, schema=schema
-        )
-        assert df.count() == num_samples
-        # Test if schema is used when both schema and header are provided
-        assert len(df.columns) == len(schema)
-        # Test if raw-zip file, rating file, and item file are cached
-        assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark,
-            size=size,
-            local_cache_path=tmp,
-            header=header,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert df.count() == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.filter(col("b") == movie_example).limit(2)
-        title = head.select("Title").collect()
-        assert title[0][0] == title[1][0]
-        assert title[0][0] == title_example
-        genres = head.select("Genres").collect()
-        assert genres[0][0] == genres[1][0]
-        assert genres[0][0] == genres_example
-        year = head.select("Year").collect()
-        assert year[0][0] == year[1][0]
-        assert year[0][0] == year_example
-
-    # Test default arguments
-    df = load_spark_df(spark, size)
-    assert df.count() == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4
-    del df
-    gc.collect()
diff --git a/tests/smoke/recommenders/dataset/__init__.py b/tests/smoke/recommenders/dataset/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/smoke/recommenders/dataset/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/smoke/recommenders/dataset/test_movielens.py b/tests/smoke/recommenders/dataset/test_movielens.py
deleted file mode 100644
index 393e94f5de..0000000000
--- a/tests/smoke/recommenders/dataset/test_movielens.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-from recommenders.datasets.movielens import (
-    load_pandas_df,
-    load_spark_df,
-    load_item_df,
-    download_movielens,
-    extract_movielens,
-)
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "100k",
-            100000,
-            1682,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        )
-    ],
-)
-def test_load_pandas_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test MovieLens dataset load as pd.DataFrame"""
-    # Test if correct data are loaded
-    header = ["a", "b", "c"]
-    df = load_pandas_df(size=size, local_cache_path=tmp, header=header)
-    assert len(df) == num_samples
-    assert len(df.columns) == len(header)
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_pandas_df(
-            size=size,
-            header=header,
-            local_cache_path=tmp,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert len(df) == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.loc[df["b"] == movie_example][:2]
-        title = head["Title"].values
-        assert title[0] == title[1]
-        assert title[0] == title_example
-        genres = head["Genres"].values
-        assert genres[0] == genres[1]
-        assert genres[0] == genres_example
-        year = head["Year"].values
-        assert year[0] == year[1]
-        assert year[0] == year_example
-
-    # Test default arguments
-    df = load_pandas_df(size)
-    assert len(df) == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "size, num_movies, movie_example, title_example, genres_example, year_example",
-    [("100k", 1682, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995")],
-)
-def test_load_item_df(
-    size,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test movielens item data load (not rating data)"""
-    df = load_item_df(size, local_cache_path=tmp, title_col="title")
-    assert len(df) == num_movies
-    # movie_col and title_col should be loaded
-    assert len(df.columns) == 2
-    assert df["title"][0] == title_example
-
-    # Test title and genres
-    df = load_item_df(
-        size,
-        local_cache_path=tmp,
-        movie_col="item",
-        genres_col="genres",
-        year_col="year",
-    )
-    assert len(df) == num_movies
-    # movile_col, genres_col and year_col
-    assert len(df.columns) == 3
-
-    assert df["item"][0] == movie_example
-    assert df["genres"][0] == genres_example
-    assert df["year"][0] == year_example
-
-
-@pytest.mark.smoke
-@pytest.mark.spark
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "100k",
-            100000,
-            1682,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        )
-    ],
-)
-def test_load_spark_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-    spark,
-):
-    """Test MovieLens dataset load into pySpark.DataFrame"""
-
-    # Test if correct data are loaded
-    header = ["1", "2", "3"]
-    schema = StructType(
-        [
-            StructField("u", IntegerType()),
-            StructField("m", IntegerType()),
-        ]
-    )
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark, size=size, local_cache_path=tmp, header=header, schema=schema
-        )
-        assert df.count() == num_samples
-        # Test if schema is used when both schema and header are provided
-        assert len(df.columns) == len(schema)
-        # Test if raw-zip file, rating file, and item file are cached
-        assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark,
-            size=size,
-            local_cache_path=tmp,
-            header=header,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert df.count() == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.filter(col("b") == movie_example).limit(2)
-        title = head.select("Title").collect()
-        assert title[0][0] == title[1][0]
-        assert title[0][0] == title_example
-        genres = head.select("Genres").collect()
-        assert genres[0][0] == genres[1][0]
-        assert genres[0][0] == genres_example
-        year = head.select("Year").collect()
-        assert year[0][0] == year[1][0]
-        assert year[0][0] == year_example
-
-    # Test default arguments
-    df = load_spark_df(spark, size)
-    assert df.count() == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4

From a42cea04ba7f7a7f52b25366b4b2f26596318d5a Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:00:11 +0200
Subject: [PATCH 18/43] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 660991897f..9064ae619d 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -140,10 +140,12 @@
         #
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df_sample",  # 6.83s
         "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df_full",  # 374.64s
+        #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke",  # 49.53s
         "tests/integration/examples/test_notebooks_pyspark.py::test_als_pyspark_integration",  # 110.58s
+        #
         "tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark",  # 142s
     ],
 }

From c243389699dd817acfc2586b1e7ce48772a69517 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:09:11 +0200
Subject: [PATCH 19/43] integration to functional

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  48 +-
 tests/integration/__init__.py                 |   1 -
 tests/integration/examples/__init__.py        |   1 -
 .../examples/test_notebooks_gpu.py            | 754 ------------------
 .../examples/test_notebooks_pyspark.py        |  97 ---
 .../examples/test_notebooks_python.py         | 336 --------
 6 files changed, 24 insertions(+), 1213 deletions(-)
 delete mode 100644 tests/integration/__init__.py
 delete mode 100644 tests/integration/examples/__init__.py
 delete mode 100644 tests/integration/examples/test_notebooks_gpu.py
 delete mode 100644 tests/integration/examples/test_notebooks_pyspark.py
 delete mode 100644 tests/integration/examples/test_notebooks_python.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 9064ae619d..08715249d1 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -32,18 +32,18 @@
         "tests/smoke/examples/test_notebooks_python.py::test_lightgbm_quickstart_smoke",  # 46.42s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_cornac_bpr_smoke",  # 16.62s
-        "tests/integration/examples/test_notebooks_python.py::test_cornac_bpr_integration",  # 165.72s
+        "tests/functional/examples/test_notebooks_python.py::test_cornac_bpr_functional",  # 165.72s
     ],
     "group_cpu_002": [  # Total group time: 1801s
         "tests/smoke/examples/test_notebooks_python.py::test_baseline_deep_dive_smoke",  # 15.98s
-        "tests/integration/examples/test_notebooks_python.py::test_baseline_deep_dive_integration",  # 170.73s
+        "tests/functional/examples/test_notebooks_python.py::test_baseline_deep_dive_functional",  # 170.73s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_surprise_svd_smoke",  # 45.88s
-        "tests/integration/examples/test_notebooks_python.py::test_surprise_svd_integration",  # 503.54s
+        "tests/functional/examples/test_notebooks_python.py::test_surprise_svd_functional",  # 503.54s
         #
-        "tests/integration/examples/test_notebooks_python.py::test_geoimc_integration",  # 1006.19s
+        "tests/functional/examples/test_notebooks_python.py::test_geoimc_functional",  # 1006.19s
         #
-        "tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
+        "tests/functional/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
     ],
     "group_cpu_003": [  # Total group time: 2253s
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_download_criteo_sample",  # 1.05s
@@ -52,9 +52,9 @@
         "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df_full",  # 1368.63s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
-        "tests/integration/examples/test_notebooks_python.py::test_sar_single_node_integration",  # 57.67s + 808.83s
+        "tests/functional/examples/test_notebooks_python.py::test_sar_single_node_functional",  # 57.67s + 808.83s
         # FIXME: Add experimental tests in a later iteration
-        # "tests/integration/examples/test_notebooks_python.py::test_xlearn_fm_integration",  # 255.73s
+        # "tests/functional/examples/test_notebooks_python.py::test_xlearn_fm_functional",  # 255.73s
     ],
     "group_gpu_001": [  # Total group time: 1937.01s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -63,14 +63,14 @@
         "tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_news_iterator",  # 3.04s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn",  # 6.03s
-        "tests/integration/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_integration",  # 19.45s
+        "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional",  # 19.45s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum",  # 27.23s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_dkn",  # 187.20s
-        "tests/integration/examples/test_notebooks_gpu.py::test_dkn_quickstart_integration",  # 1167.93s
+        "tests/functional/examples/test_notebooks_gpu.py::test_dkn_quickstart_functional",  # 1167.93s
         #
-        "tests/integration/examples/test_notebooks_gpu.py::test_slirec_quickstart_integration",  # 175.00s
+        "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional",  # 175.00s
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec",  # 346.72s
     ],
     "group_gpu_002": [  # Total group time: 1896.76s
@@ -78,22 +78,22 @@
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_xdeepfm",  # 3.10s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
         # "tests/smoke/examples/test_notebooks_gpu.py::test_xdeepfm_smoke",  # 77.93s
-        "tests/integration/examples/test_notebooks_gpu.py::test_xdeepfm_integration",  # 470.11s
+        "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional",  # 470.11s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_cornac_bivae_smoke",  # 67.84s
-        "tests/integration/examples/test_notebooks_gpu.py::test_cornac_bivae_integration",  # 453.21s
+        "tests/functional/examples/test_notebooks_gpu.py::test_cornac_bivae_functional",  # 453.21s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_wide_deep_smoke",  # 122.71s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_fastai_smoke",  # 33.22s
-        "tests/integration/examples/test_notebooks_gpu.py::test_fastai_integration",  # 667.88s
+        "tests/functional/examples/test_notebooks_gpu.py::test_fastai_functional",  # 667.88s
     ],
     "group_gpu_003": [  # Total group time: 2072.15s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         "tests/smoke/examples/test_notebooks_gpu.py::test_ncf_smoke",  # 114.39s
-        "tests/integration/examples/test_notebooks_gpu.py::test_ncf_integration",  # 1046.97s
+        "tests/functional/examples/test_notebooks_gpu.py::test_ncf_functional",  # 1046.97s
         "tests/smoke/examples/test_notebooks_gpu.py::test_ncf_deep_dive_smoke",  # 102.71s
-        "tests/integration/examples/test_notebooks_gpu.py::test_ncf_deep_dive_integration",  # 351.17s
+        "tests/functional/examples/test_notebooks_gpu.py::test_ncf_deep_dive_functional",  # 351.17s
         #
         "tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_naml_iterator",  # 5.50s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
@@ -103,20 +103,20 @@
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         "tests/smoke/examples/test_notebooks_gpu.py::test_nrms_smoke",  # 232.55s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_nrms_quickstart_integration",  # 857.05s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_nrms_quickstart_functional",  # 857.05s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_lstur_smoke",  # 246.46s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_lstur_quickstart_integration",  # 766.52s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_lstur_quickstart_functional",  # 766.52s
     ],
     "group_gpu_005": [  # Total group time: 1844.05s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_wide_deep_integration",  # 1843.29s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_wide_deep_functional",  # 1843.29s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_npa_smoke",  # 366.22s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_npa_quickstart_integration",  # 810.92s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_npa_quickstart_functional",  # 810.92s
     ],
     "group_gpu_006": [  # Total group time: 1763.99s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -128,11 +128,11 @@
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
         # "tests/smoke/examples/test_notebooks_gpu.py::test_naml_smoke",  # 620.13s
         #
-        "tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu",  # 226s
+        "tests/functional/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu",  # 226s
         # FIXME: Reduce test time https://github.com/microsoft/recommenders/issues/1731
-        # "tests/integration/examples/test_notebooks_gpu.py::test_naml_quickstart_integration",  # 2033.85s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_naml_quickstart_functional",  # 2033.85s
         # FIXME: https://github.com/microsoft/recommenders/issues/1716
-        # "tests/integration/examples/test_notebooks_gpu.py::test_sasrec_quickstart_integration",  # 448.06s + 614.69s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_sasrec_quickstart_functional",  # 448.06s + 614.69s
         "tests/smoke/recommenders/recommender/test_newsrec_model.py::test_model_lstur",  # 194.88s
     ],
     "group_spark_001": [  # Total group time: 987.16s
@@ -144,9 +144,9 @@
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke",  # 49.53s
-        "tests/integration/examples/test_notebooks_pyspark.py::test_als_pyspark_integration",  # 110.58s
+        "tests/functional/examples/test_notebooks_pyspark.py::test_als_pyspark_functional",  # 110.58s
         #
-        "tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark",  # 142s
+        "tests/functional/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark",  # 142s
     ],
 }
 
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/examples/__init__.py b/tests/integration/examples/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/examples/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py
deleted file mode 100644
index 9b63315b5f..0000000000
--- a/tests/integration/examples/test_notebooks_gpu.py
+++ /dev/null
@@ -1,754 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
-
-from recommenders.utils.gpu_utils import get_number_gpus
-
-
-TOL = 0.1
-ABS_TOL = 0.05
-
-
-@pytest.mark.gpu
-@pytest.mark.integration
-def test_gpu_vm():
-    assert get_number_gpus() >= 1
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, epochs, expected_values, seed",
-    [
-        (
-            "1m",
-            10,
-            {
-                "map": 0.0255283,
-                "ndcg": 0.15656,
-                "precision": 0.145646,
-                "recall": 0.0557367,
-            },
-            42,
-        ),
-        # ("10m", 5, {"map": 0.024821, "ndcg": 0.153396, "precision": 0.143046, "recall": 0.056590})# takes too long
-    ],
-)
-def test_ncf_integration(
-    notebooks, output_notebook, kernel_name, size, epochs, expected_values, seed
-):
-    notebook_path = notebooks["ncf"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs, BATCH_SIZE=512, SEED=seed
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, epochs, batch_size, expected_values, seed",
-    [
-        (
-            "100k",
-            10,
-            512,
-            {
-                "map": 0.0435856,
-                "ndcg": 0.37586,
-                "precision": 0.169353,
-                "recall": 0.0923963,
-                "map2": 0.0510391,
-                "ndcg2": 0.202186,
-                "precision2": 0.179533,
-                "recall2": 0.106434,
-            },
-            42,
-        )
-    ],
-)
-def test_ncf_deep_dive_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    size,
-    epochs,
-    batch_size,
-    expected_values,
-    seed,
-):
-    notebook_path = notebooks["ncf_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            TOP_K=10,
-            MOVIELENS_DATA_SIZE=size,
-            EPOCHS=epochs,
-            BATCH_SIZE=batch_size,
-            SEED=seed,
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, epochs, expected_values",
-    [
-        (
-            "1m",
-            10,
-            {
-                "map": 0.025739,
-                "ndcg": 0.183417,
-                "precision": 0.167246,
-                "recall": 0.054307,
-                "rmse": 0.881267,
-                "mae": 0.700747,
-                "rsquared": 0.379963,
-                "exp_var": 0.382842,
-            },
-        ),
-        # ("10m", 5, ), # it gets an OOM on pred = learner.model.forward(u, m)
-    ],
-)
-def test_fastai_integration(
-    notebooks, output_notebook, kernel_name, size, epochs, expected_values
-):
-    notebook_path = notebooks["fastai"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "epochs, expected_values, seed",
-    [
-        (
-            5,
-            {"auc": 0.742, "logloss": 0.4964},
-            42,
-        )
-    ],
-)
-def test_xdeepfm_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    epochs,
-    expected_values,
-    seed,
-):
-    notebook_path = notebooks["xdeepfm_quickstart"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            EPOCHS=epochs,
-            BATCH_SIZE=1024,
-            RANDOM_SEED=seed,
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, steps, batch_size, expected_values, seed",
-    [
-        (
-            "100k",
-            10000,
-            32,
-            {
-                "rmse": 0.924958,
-                "mae": 0.741425,
-                "rsquared": 0.262963,
-                "exp_var": 0.268413,
-                "ndcg_at_k": 0.118114,
-                "map_at_k": 0.0139213,
-                "precision_at_k": 0.107087,
-                "recall_at_k": 0.0328638,
-            },
-            42,
-        )
-    ],
-)
-def test_wide_deep_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    size,
-    steps,
-    batch_size,
-    expected_values,
-    seed,
-    tmp,
-):
-    notebook_path = notebooks["wide_deep"]
-
-    params = {
-        "MOVIELENS_DATA_SIZE": size,
-        "STEPS": steps,
-        "BATCH_SIZE": batch_size,
-        "EVALUATE_WHILE_TRAINING": False,
-        "MODEL_DIR": tmp,
-        "EXPORT_DIR_BASE": tmp,
-        "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
-        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
-        "RANDOM_SEED": seed,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "yaml_file, data_path, epochs, batch_size, expected_values, seed",
-    [
-        (
-            "recommenders/models/deeprec/config/sli_rec.yaml",
-            os.path.join("tests", "resources", "deeprec", "slirec"),
-            10,
-            400,
-            {"res_syn": {"auc": 0.7183, "logloss": 0.6045}},
-            42,
-        )
-    ],
-)
-def test_slirec_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    yaml_file,
-    data_path,
-    epochs,
-    batch_size,
-    expected_values,
-    seed,
-):
-    notebook_path = notebooks["slirec_quickstart"]
-
-    params = {
-        "yaml_file": yaml_file,
-        "data_path": data_path,
-        "EPOCHS": epochs,
-        "BATCH_SIZE": batch_size,
-        "RANDOM_SEED": seed,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL)
-
-        ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss
-        # assert results[key]["logloss"] == pytest.approx(
-        #     value["logloss"], rel=TOL, abs=ABS_TOL
-        # )
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "epochs, batch_size, seed, MIND_type, expected_values",
-    [
-        (
-            5,
-            64,
-            42,
-            "demo",
-            {
-                "res_syn": {
-                    "group_auc": 0.6217,
-                    "mean_mrr": 0.2783,
-                    "ndcg@5": 0.3024,
-                    "ndcg@10": 0.3719,
-                }
-            },
-        )
-    ],
-)
-def test_nrms_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    epochs,
-    batch_size,
-    seed,
-    MIND_type,
-    expected_values,
-):
-    notebook_path = notebooks["nrms_quickstart"]
-
-    params = {
-        "epochs": epochs,
-        "batch_size": batch_size,
-        "seed": seed,
-        "MIND_type": MIND_type,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key]["group_auc"] == pytest.approx(
-            value["group_auc"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["mean_mrr"] == pytest.approx(
-            value["mean_mrr"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@5"] == pytest.approx(
-            value["ndcg@5"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@10"] == pytest.approx(
-            value["ndcg@10"], rel=TOL, abs=ABS_TOL
-        )
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "epochs, batch_size, seed, MIND_type, expected_values",
-    [
-        (
-            5,
-            64,
-            42,
-            "demo",
-            {
-                "res_syn": {
-                    "group_auc": 0.6436,
-                    "mean_mrr": 0.2990,
-                    "ndcg@5": 0.3297,
-                    "ndcg@10": 0.3933,
-                }
-            },
-        )
-    ],
-)
-def test_naml_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    batch_size,
-    epochs,
-    seed,
-    MIND_type,
-    expected_values,
-):
-    notebook_path = notebooks["naml_quickstart"]
-
-    params = {
-        "epochs": epochs,
-        "batch_size": batch_size,
-        "seed": seed,
-        "MIND_type": MIND_type,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key]["group_auc"] == pytest.approx(
-            value["group_auc"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["mean_mrr"] == pytest.approx(
-            value["mean_mrr"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@5"] == pytest.approx(
-            value["ndcg@5"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@10"] == pytest.approx(
-            value["ndcg@10"], rel=TOL, abs=ABS_TOL
-        )
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "epochs, batch_size, seed, MIND_type, expected_values",
-    [
-        (
-            5,
-            64,
-            42,
-            "demo",
-            {
-                "res_syn": {
-                    "group_auc": 0.6444,
-                    "mean_mrr": 0.2983,
-                    "ndcg@5": 0.3287,
-                    "ndcg@10": 0.3938,
-                }
-            },
-        )
-    ],
-)
-def test_lstur_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    epochs,
-    batch_size,
-    seed,
-    MIND_type,
-    expected_values,
-):
-    notebook_path = notebooks["lstur_quickstart"]
-
-    params = {
-        "epochs": epochs,
-        "batch_size": batch_size,
-        "seed": seed,
-        "MIND_type": MIND_type,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key]["group_auc"] == pytest.approx(
-            value["group_auc"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["mean_mrr"] == pytest.approx(
-            value["mean_mrr"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@5"] == pytest.approx(
-            value["ndcg@5"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@10"] == pytest.approx(
-            value["ndcg@10"], rel=TOL, abs=ABS_TOL
-        )
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "epochs, batch_size, seed, MIND_type, expected_values",
-    [
-        (
-            5,
-            64,
-            42,
-            "demo",
-            {
-                "res_syn": {
-                    "group_auc": 0.6035,
-                    "mean_mrr": 0.2765,
-                    "ndcg@5": 0.2977,
-                    "ndcg@10": 0.3637,
-                }
-            },
-        )
-    ],
-)
-def test_npa_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    epochs,
-    batch_size,
-    seed,
-    MIND_type,
-    expected_values,
-):
-    notebook_path = notebooks["npa_quickstart"]
-
-    params = {
-        "epochs": epochs,
-        "batch_size": batch_size,
-        "seed": seed,
-        "MIND_type": MIND_type,
-    }
-    pm.execute_notebook(
-        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key]["group_auc"] == pytest.approx(
-            value["group_auc"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["mean_mrr"] == pytest.approx(
-            value["mean_mrr"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@5"] == pytest.approx(
-            value["ndcg@5"], rel=TOL, abs=ABS_TOL
-        )
-        assert results[key]["ndcg@10"] == pytest.approx(
-            value["ndcg@10"], rel=TOL, abs=ABS_TOL
-        )
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "yaml_file, data_path, size, epochs, batch_size, expected_values, seed",
-    [
-        (
-            "recommenders/models/deeprec/config/lightgcn.yaml",
-            os.path.join("tests", "resources", "deeprec", "lightgcn"),
-            "100k",
-            5,
-            1024,
-            {
-                "map": 0.094794,
-                "ndcg": 0.354145,
-                "precision": 0.308165,
-                "recall": 0.163034,
-            },
-            42,
-        )
-    ],
-)
-def test_lightgcn_deep_dive_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    yaml_file,
-    data_path,
-    size,
-    epochs,
-    batch_size,
-    expected_values,
-    seed,
-):
-    notebook_path = notebooks["lightgcn_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            TOP_K=10,
-            MOVIELENS_DATA_SIZE=size,
-            EPOCHS=epochs,
-            BATCH_SIZE=batch_size,
-            SEED=seed,
-            yaml_file=yaml_file,
-            user_file=os.path.join(data_path, r"user_embeddings"),
-            item_file=os.path.join(data_path, r"item_embeddings"),
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-def test_dkn_quickstart_integration(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["dkn_quickstart"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(EPOCHS=5, BATCH_SIZE=500),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["res"]["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL)
-    assert results["res"]["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL)
-    assert results["res"]["ndcg@5"] == pytest.approx(0.1735, rel=TOL, abs=ABS_TOL)
-    assert results["res"]["ndcg@10"] == pytest.approx(0.2301, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        ("1m", dict(map=0.081794, ndcg=0.400983, precision=0.367997, recall=0.138352)),
-        # 10m works but takes too long
-    ],
-)
-def test_cornac_bivae_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["cornac_bivae_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(MOVIELENS_DATA_SIZE=size),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "data_dir, num_epochs, batch_size, model_name, expected_values, seed",
-    [
-        (
-            os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
-            1,
-            128,
-            "sasrec",
-            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
-            42,
-        ),
-        (
-            os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
-            1,
-            128,
-            "ssept",
-            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
-            42,
-        ),
-    ],
-)
-def test_sasrec_quickstart_integration(
-    notebooks,
-    output_notebook,
-    kernel_name,
-    data_dir,
-    num_epochs,
-    batch_size,
-    model_name,
-    expected_values,
-    seed,
-):
-    notebook_path = notebooks["sasrec_quickstart"]
-    params = {
-        "data_dir": data_dir,
-        "num_epochs": num_epochs,
-        "batch_size": batch_size,
-        "model_name": model_name,
-        "seed": seed,
-    }
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=params,
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.gpu
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, algos, expected_values_ndcg",
-    [
-        (
-            ["100k"],
-            ["ncf", "fastai", "bivae", "lightgcn"],
-            [0.382793, 0.147583, 0.471722, 0.412664],
-        ),
-    ],
-)
-def test_benchmark_movielens_gpu(
-    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
-):
-    notebook_path = notebooks["benchmark_movielens"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(data_sizes=size, algorithms=algos),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-    assert len(results["results"]) == 4
-    for i, value in enumerate(results["results"]):
-        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/integration/examples/test_notebooks_pyspark.py
deleted file mode 100644
index 1dbc2f1399..0000000000
--- a/tests/integration/examples/test_notebooks_pyspark.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import sys
-import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
-
-TOL = 0.05
-ABS_TOL = 0.05
-
-
-# This is a flaky test that can fail unexpectedly
-@pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.spark
-@pytest.mark.notebooks
-@pytest.mark.integration
-def test_als_pyspark_integration(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["als_pyspark"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="1m"),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["map"] == pytest.approx(0.00201, rel=TOL, abs=ABS_TOL)
-    assert results["ndcg"] == pytest.approx(0.02516, rel=TOL, abs=ABS_TOL)
-    assert results["precision"] == pytest.approx(0.03172, rel=TOL, abs=ABS_TOL)
-    assert results["recall"] == pytest.approx(0.009302, rel=TOL, abs=ABS_TOL)
-    assert results["rmse"] == pytest.approx(0.8621, rel=TOL, abs=ABS_TOL)
-    assert results["mae"] == pytest.approx(0.68023, rel=TOL, abs=ABS_TOL)
-    assert results["exp_var"] == pytest.approx(0.4094, rel=TOL, abs=ABS_TOL)
-    assert results["rsquared"] == pytest.approx(0.4038, rel=TOL, abs=ABS_TOL)
-
-
-# This is a flaky test that can fail unexpectedly
-@pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.spark
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.skip(reason="It takes too long in the current test machine")
-@pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
-def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["mmlspark_lightgbm_criteo"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(DATA_SIZE="full", NUM_ITERATIONS=50),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.spark
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, algos, expected_values_ndcg",
-    [
-        (["100k"], ["als"], [0.035812]),
-    ],
-)
-def test_benchmark_movielens_pyspark(
-    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
-):
-    notebook_path = notebooks["benchmark_movielens"]
-
-    os.environ["PYSPARK_PYTHON"] = sys.executable
-    os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
-    os.environ.pop("SPARK_HOME", None)
-
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(data_sizes=size, algorithms=algos),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-    assert len(results["results"]) == 1
-    for i, value in enumerate(results["results"]):
-        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
diff --git a/tests/integration/examples/test_notebooks_python.py b/tests/integration/examples/test_notebooks_python.py
deleted file mode 100644
index 870c7fc0c0..0000000000
--- a/tests/integration/examples/test_notebooks_python.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
-
-TOL = 0.05
-ABS_TOL = 0.05
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        (
-            "1m",
-            {
-                "map": 0.060579,
-                "ndcg": 0.299245,
-                "precision": 0.270116,
-                "recall": 0.104350,
-            },
-        ),
-        (
-            "10m",
-            {
-                "map": 0.098745,
-                "ndcg": 0.319625,
-                "precision": 0.275756,
-                "recall": 0.154014,
-            },
-        ),
-    ],
-)
-def test_sar_single_node_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["sar_single_node"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        (
-            "1m",
-            {
-                "map": 0.033914,
-                "ndcg": 0.231570,
-                "precision": 0.211923,
-                "recall": 0.064663,
-            },
-        ),
-        # ("10m", {"map": , "ndcg": , "precision": , "recall": }), # OOM on test machine
-    ],
-)
-def test_baseline_deep_dive_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["baseline_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        (
-            "1m",
-            dict(
-                rmse=0.89,
-                mae=0.70,
-                rsquared=0.36,
-                exp_var=0.36,
-                map=0.011,
-                ndcg=0.10,
-                precision=0.093,
-                recall=0.025,
-            ),
-        ),
-        # 10m works but takes too long
-    ],
-)
-def test_surprise_svd_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["surprise_svd_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(MOVIELENS_DATA_SIZE=size),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        (
-            "1m",
-            dict(
-                rmse=0.959885,
-                mae=0.690133,
-                rsquared=0.264014,
-                exp_var=0.264417,
-                map=0.004857,
-                ndcg=0.055128,
-                precision=0.061142,
-                recall=0.017789,
-            ),
-        )
-    ],
-)
-@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
-def test_vw_deep_dive_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(MOVIELENS_DATA_SIZE=size, TOP_K=10),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.skip(reason="NNI pip package has installation incompatibilities")
-def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["nni_tuning_svd"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k",
-            SURPRISE_READER="ml-100k",
-            TMP_DIR=tmp,
-            MAX_TRIAL_NUM=1,
-            NUM_EPOCHS=1,
-            WAITING_TIME=20,
-            MAX_RETRIES=50,
-        ),
-    )
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.skip(reason="Wikidata API is unstable")
-def test_wikidata_integration(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["wikidata_knowledge_graph"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
-    assert results["length_result"] >= 1
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, expected_values",
-    [
-        ("1m", dict(map=0.081390, ndcg=0.406627, precision=0.373228, recall=0.132444)),
-        # 10m works but takes too long
-    ],
-)
-def test_cornac_bpr_integration(
-    notebooks, output_notebook, kernel_name, size, expected_values
-):
-    notebook_path = notebooks["cornac_bpr_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(MOVIELENS_DATA_SIZE=size),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, epochs, expected_values",
-    [
-        (
-            "100k",
-            10,
-            dict(
-                eval_precision=0.131601,
-                eval_recall=0.038056,
-                eval_precision2=0.145599,
-                eval_recall2=0.051338,
-            ),
-        ),
-    ],
-)
-def test_lightfm_integration(
-    notebooks, output_notebook, kernel_name, size, epochs, expected_values
-):
-    notebook_path = notebooks["lightfm_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(MOVIELENS_DATA_SIZE=size, NO_EPOCHS=epochs),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.experimental
-@pytest.mark.parametrize(
-    "expected_values",
-    [({"rmse": 0.4969, "mae": 0.4761})],
-)
-def test_geoimc_integration(notebooks, output_notebook, kernel_name, expected_values):
-    notebook_path = notebooks["geoimc_quickstart"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    for key, value in expected_values.items():
-        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.experimental
-def test_xlearn_fm_integration(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["xlearn_fm_deep_dive"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(LEARNING_RATE=0.2, EPOCH=10),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["auc_score"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "size, algos, expected_values_ndcg",
-    [
-        (["100k"], ["svd", "sar", "bpr"], [0.094444, 0.393818, 0.444990]),
-    ],
-)
-def test_benchmark_movielens_cpu(
-    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
-):
-    notebook_path = notebooks["benchmark_movielens"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(data_sizes=size, algorithms=algos),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-    assert len(results["results"]) == 3
-    for i, value in enumerate(results["results"]):
-        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)

From 6b011057b55c5cece2f39d6c25980a717e6d2e0c Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:09:31 +0200
Subject: [PATCH 20/43] integration to functional

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/functional/__init__.py                  |   1 +
 tests/functional/examples/__init__.py         |   1 +
 .../functional/examples/test_notebooks_gpu.py | 733 ++++++++++++++++++
 .../examples/test_notebooks_pyspark.py        |  89 +++
 .../examples/test_notebooks_python.py         | 321 ++++++++
 5 files changed, 1145 insertions(+)
 create mode 100644 tests/functional/__init__.py
 create mode 100644 tests/functional/examples/__init__.py
 create mode 100644 tests/functional/examples/test_notebooks_gpu.py
 create mode 100644 tests/functional/examples/test_notebooks_pyspark.py
 create mode 100644 tests/functional/examples/test_notebooks_python.py

diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py
new file mode 100644
index 0000000000..25dc11fb93
--- /dev/null
+++ b/tests/functional/__init__.py
@@ -0,0 +1 @@
+# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/functional/examples/__init__.py b/tests/functional/examples/__init__.py
new file mode 100644
index 0000000000..25dc11fb93
--- /dev/null
+++ b/tests/functional/examples/__init__.py
@@ -0,0 +1 @@
+# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py
new file mode 100644
index 0000000000..2d8c6b0a08
--- /dev/null
+++ b/tests/functional/examples/test_notebooks_gpu.py
@@ -0,0 +1,733 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import os
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+from recommenders.utils.gpu_utils import get_number_gpus
+
+
+TOL = 0.1
+ABS_TOL = 0.05
+
+
+@pytest.mark.gpu
+def test_gpu_vm():
+    assert get_number_gpus() >= 1
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, epochs, expected_values, seed",
+    [
+        (
+            "1m",
+            10,
+            {
+                "map": 0.0255283,
+                "ndcg": 0.15656,
+                "precision": 0.145646,
+                "recall": 0.0557367,
+            },
+            42,
+        ),
+        # ("10m", 5, {"map": 0.024821, "ndcg": 0.153396, "precision": 0.143046, "recall": 0.056590})# takes too long
+    ],
+)
+def test_ncf_functional(
+    notebooks, output_notebook, kernel_name, size, epochs, expected_values, seed
+):
+    notebook_path = notebooks["ncf"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs, BATCH_SIZE=512, SEED=seed
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, epochs, batch_size, expected_values, seed",
+    [
+        (
+            "100k",
+            10,
+            512,
+            {
+                "map": 0.0435856,
+                "ndcg": 0.37586,
+                "precision": 0.169353,
+                "recall": 0.0923963,
+                "map2": 0.0510391,
+                "ndcg2": 0.202186,
+                "precision2": 0.179533,
+                "recall2": 0.106434,
+            },
+            42,
+        )
+    ],
+)
+def test_ncf_deep_dive_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    size,
+    epochs,
+    batch_size,
+    expected_values,
+    seed,
+):
+    notebook_path = notebooks["ncf_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            TOP_K=10,
+            MOVIELENS_DATA_SIZE=size,
+            EPOCHS=epochs,
+            BATCH_SIZE=batch_size,
+            SEED=seed,
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, epochs, expected_values",
+    [
+        (
+            "1m",
+            10,
+            {
+                "map": 0.025739,
+                "ndcg": 0.183417,
+                "precision": 0.167246,
+                "recall": 0.054307,
+                "rmse": 0.881267,
+                "mae": 0.700747,
+                "rsquared": 0.379963,
+                "exp_var": 0.382842,
+            },
+        ),
+        # ("10m", 5, ), # it gets an OOM on pred = learner.model.forward(u, m)
+    ],
+)
+def test_fastai_functional(
+    notebooks, output_notebook, kernel_name, size, epochs, expected_values
+):
+    notebook_path = notebooks["fastai"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "epochs, expected_values, seed",
+    [
+        (
+            5,
+            {"auc": 0.742, "logloss": 0.4964},
+            42,
+        )
+    ],
+)
+def test_xdeepfm_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    epochs,
+    expected_values,
+    seed,
+):
+    notebook_path = notebooks["xdeepfm_quickstart"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            EPOCHS=epochs,
+            BATCH_SIZE=1024,
+            RANDOM_SEED=seed,
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, steps, batch_size, expected_values, seed",
+    [
+        (
+            "100k",
+            10000,
+            32,
+            {
+                "rmse": 0.924958,
+                "mae": 0.741425,
+                "rsquared": 0.262963,
+                "exp_var": 0.268413,
+                "ndcg_at_k": 0.118114,
+                "map_at_k": 0.0139213,
+                "precision_at_k": 0.107087,
+                "recall_at_k": 0.0328638,
+            },
+            42,
+        )
+    ],
+)
+def test_wide_deep_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    size,
+    steps,
+    batch_size,
+    expected_values,
+    seed,
+    tmp,
+):
+    notebook_path = notebooks["wide_deep"]
+
+    params = {
+        "MOVIELENS_DATA_SIZE": size,
+        "STEPS": steps,
+        "BATCH_SIZE": batch_size,
+        "EVALUATE_WHILE_TRAINING": False,
+        "MODEL_DIR": tmp,
+        "EXPORT_DIR_BASE": tmp,
+        "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
+        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
+        "RANDOM_SEED": seed,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "yaml_file, data_path, epochs, batch_size, expected_values, seed",
+    [
+        (
+            "recommenders/models/deeprec/config/sli_rec.yaml",
+            os.path.join("tests", "resources", "deeprec", "slirec"),
+            10,
+            400,
+            {"res_syn": {"auc": 0.7183, "logloss": 0.6045}},
+            42,
+        )
+    ],
+)
+def test_slirec_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    yaml_file,
+    data_path,
+    epochs,
+    batch_size,
+    expected_values,
+    seed,
+):
+    notebook_path = notebooks["slirec_quickstart"]
+
+    params = {
+        "yaml_file": yaml_file,
+        "data_path": data_path,
+        "EPOCHS": epochs,
+        "BATCH_SIZE": batch_size,
+        "RANDOM_SEED": seed,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL)
+
+        ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss
+        # assert results[key]["logloss"] == pytest.approx(
+        #     value["logloss"], rel=TOL, abs=ABS_TOL
+        # )
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "epochs, batch_size, seed, MIND_type, expected_values",
+    [
+        (
+            5,
+            64,
+            42,
+            "demo",
+            {
+                "res_syn": {
+                    "group_auc": 0.6217,
+                    "mean_mrr": 0.2783,
+                    "ndcg@5": 0.3024,
+                    "ndcg@10": 0.3719,
+                }
+            },
+        )
+    ],
+)
+def test_nrms_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    epochs,
+    batch_size,
+    seed,
+    MIND_type,
+    expected_values,
+):
+    notebook_path = notebooks["nrms_quickstart"]
+
+    params = {
+        "epochs": epochs,
+        "batch_size": batch_size,
+        "seed": seed,
+        "MIND_type": MIND_type,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key]["group_auc"] == pytest.approx(
+            value["group_auc"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["mean_mrr"] == pytest.approx(
+            value["mean_mrr"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@5"] == pytest.approx(
+            value["ndcg@5"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@10"] == pytest.approx(
+            value["ndcg@10"], rel=TOL, abs=ABS_TOL
+        )
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "epochs, batch_size, seed, MIND_type, expected_values",
+    [
+        (
+            5,
+            64,
+            42,
+            "demo",
+            {
+                "res_syn": {
+                    "group_auc": 0.6436,
+                    "mean_mrr": 0.2990,
+                    "ndcg@5": 0.3297,
+                    "ndcg@10": 0.3933,
+                }
+            },
+        )
+    ],
+)
+def test_naml_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    batch_size,
+    epochs,
+    seed,
+    MIND_type,
+    expected_values,
+):
+    notebook_path = notebooks["naml_quickstart"]
+
+    params = {
+        "epochs": epochs,
+        "batch_size": batch_size,
+        "seed": seed,
+        "MIND_type": MIND_type,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key]["group_auc"] == pytest.approx(
+            value["group_auc"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["mean_mrr"] == pytest.approx(
+            value["mean_mrr"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@5"] == pytest.approx(
+            value["ndcg@5"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@10"] == pytest.approx(
+            value["ndcg@10"], rel=TOL, abs=ABS_TOL
+        )
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "epochs, batch_size, seed, MIND_type, expected_values",
+    [
+        (
+            5,
+            64,
+            42,
+            "demo",
+            {
+                "res_syn": {
+                    "group_auc": 0.6444,
+                    "mean_mrr": 0.2983,
+                    "ndcg@5": 0.3287,
+                    "ndcg@10": 0.3938,
+                }
+            },
+        )
+    ],
+)
+def test_lstur_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    epochs,
+    batch_size,
+    seed,
+    MIND_type,
+    expected_values,
+):
+    notebook_path = notebooks["lstur_quickstart"]
+
+    params = {
+        "epochs": epochs,
+        "batch_size": batch_size,
+        "seed": seed,
+        "MIND_type": MIND_type,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key]["group_auc"] == pytest.approx(
+            value["group_auc"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["mean_mrr"] == pytest.approx(
+            value["mean_mrr"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@5"] == pytest.approx(
+            value["ndcg@5"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@10"] == pytest.approx(
+            value["ndcg@10"], rel=TOL, abs=ABS_TOL
+        )
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "epochs, batch_size, seed, MIND_type, expected_values",
+    [
+        (
+            5,
+            64,
+            42,
+            "demo",
+            {
+                "res_syn": {
+                    "group_auc": 0.6035,
+                    "mean_mrr": 0.2765,
+                    "ndcg@5": 0.2977,
+                    "ndcg@10": 0.3637,
+                }
+            },
+        )
+    ],
+)
+def test_npa_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    epochs,
+    batch_size,
+    seed,
+    MIND_type,
+    expected_values,
+):
+    notebook_path = notebooks["npa_quickstart"]
+
+    params = {
+        "epochs": epochs,
+        "batch_size": batch_size,
+        "seed": seed,
+        "MIND_type": MIND_type,
+    }
+    pm.execute_notebook(
+        notebook_path, output_notebook, kernel_name=kernel_name, parameters=params
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key]["group_auc"] == pytest.approx(
+            value["group_auc"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["mean_mrr"] == pytest.approx(
+            value["mean_mrr"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@5"] == pytest.approx(
+            value["ndcg@5"], rel=TOL, abs=ABS_TOL
+        )
+        assert results[key]["ndcg@10"] == pytest.approx(
+            value["ndcg@10"], rel=TOL, abs=ABS_TOL
+        )
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "yaml_file, data_path, size, epochs, batch_size, expected_values, seed",
+    [
+        (
+            "recommenders/models/deeprec/config/lightgcn.yaml",
+            os.path.join("tests", "resources", "deeprec", "lightgcn"),
+            "100k",
+            5,
+            1024,
+            {
+                "map": 0.094794,
+                "ndcg": 0.354145,
+                "precision": 0.308165,
+                "recall": 0.163034,
+            },
+            42,
+        )
+    ],
+)
+def test_lightgcn_deep_dive_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    yaml_file,
+    data_path,
+    size,
+    epochs,
+    batch_size,
+    expected_values,
+    seed,
+):
+    notebook_path = notebooks["lightgcn_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            TOP_K=10,
+            MOVIELENS_DATA_SIZE=size,
+            EPOCHS=epochs,
+            BATCH_SIZE=batch_size,
+            SEED=seed,
+            yaml_file=yaml_file,
+            user_file=os.path.join(data_path, r"user_embeddings"),
+            item_file=os.path.join(data_path, r"item_embeddings"),
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["dkn_quickstart"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(EPOCHS=5, BATCH_SIZE=500),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["res"]["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL)
+    assert results["res"]["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL)
+    assert results["res"]["ndcg@5"] == pytest.approx(0.1735, rel=TOL, abs=ABS_TOL)
+    assert results["res"]["ndcg@10"] == pytest.approx(0.2301, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        ("1m", dict(map=0.081794, ndcg=0.400983, precision=0.367997, recall=0.138352)),
+        # 10m works but takes too long
+    ],
+)
+def test_cornac_bivae_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["cornac_bivae_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(MOVIELENS_DATA_SIZE=size),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "data_dir, num_epochs, batch_size, model_name, expected_values, seed",
+    [
+        (
+            os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
+            1,
+            128,
+            "sasrec",
+            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
+            42,
+        ),
+        (
+            os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
+            1,
+            128,
+            "ssept",
+            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
+            42,
+        ),
+    ],
+)
+def test_sasrec_quickstart_functional(
+    notebooks,
+    output_notebook,
+    kernel_name,
+    data_dir,
+    num_epochs,
+    batch_size,
+    model_name,
+    expected_values,
+    seed,
+):
+    notebook_path = notebooks["sasrec_quickstart"]
+    params = {
+        "data_dir": data_dir,
+        "num_epochs": num_epochs,
+        "batch_size": batch_size,
+        "model_name": model_name,
+        "seed": seed,
+    }
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=params,
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.gpu
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, algos, expected_values_ndcg",
+    [
+        (
+            ["100k"],
+            ["ncf", "fastai", "bivae", "lightgcn"],
+            [0.382793, 0.147583, 0.471722, 0.412664],
+        ),
+    ],
+)
+def test_benchmark_movielens_gpu(
+    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
+):
+    notebook_path = notebooks["benchmark_movielens"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(data_sizes=size, algorithms=algos),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+    assert len(results["results"]) == 4
+    for i, value in enumerate(results["results"]):
+        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
diff --git a/tests/functional/examples/test_notebooks_pyspark.py b/tests/functional/examples/test_notebooks_pyspark.py
new file mode 100644
index 0000000000..57bd879283
--- /dev/null
+++ b/tests/functional/examples/test_notebooks_pyspark.py
@@ -0,0 +1,89 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import os
+import sys
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+TOL = 0.05
+ABS_TOL = 0.05
+
+
+# This is a flaky test that can fail unexpectedly
+@pytest.mark.flaky(reruns=5, reruns_delay=2)
+@pytest.mark.spark
+@pytest.mark.notebooks
+def test_als_pyspark_functional(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["als_pyspark"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="1m"),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["map"] == pytest.approx(0.00201, rel=TOL, abs=ABS_TOL)
+    assert results["ndcg"] == pytest.approx(0.02516, rel=TOL, abs=ABS_TOL)
+    assert results["precision"] == pytest.approx(0.03172, rel=TOL, abs=ABS_TOL)
+    assert results["recall"] == pytest.approx(0.009302, rel=TOL, abs=ABS_TOL)
+    assert results["rmse"] == pytest.approx(0.8621, rel=TOL, abs=ABS_TOL)
+    assert results["mae"] == pytest.approx(0.68023, rel=TOL, abs=ABS_TOL)
+    assert results["exp_var"] == pytest.approx(0.4094, rel=TOL, abs=ABS_TOL)
+    assert results["rsquared"] == pytest.approx(0.4038, rel=TOL, abs=ABS_TOL)
+
+
+# This is a flaky test that can fail unexpectedly
+@pytest.mark.flaky(reruns=5, reruns_delay=2)
+@pytest.mark.spark
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="It takes too long in the current test machine")
+@pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
+def test_mmlspark_lightgbm_criteo_functional(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["mmlspark_lightgbm_criteo"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(DATA_SIZE="full", NUM_ITERATIONS=50),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.spark
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, algos, expected_values_ndcg",
+    [
+        (["100k"], ["als"], [0.035812]),
+    ],
+)
+def test_benchmark_movielens_pyspark(
+    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
+):
+    notebook_path = notebooks["benchmark_movielens"]
+
+    os.environ["PYSPARK_PYTHON"] = sys.executable
+    os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
+    os.environ.pop("SPARK_HOME", None)
+
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(data_sizes=size, algorithms=algos),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+    assert len(results["results"]) == 1
+    for i, value in enumerate(results["results"]):
+        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
diff --git a/tests/functional/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py
new file mode 100644
index 0000000000..ab3dbd738a
--- /dev/null
+++ b/tests/functional/examples/test_notebooks_python.py
@@ -0,0 +1,321 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+
+TOL = 0.05
+ABS_TOL = 0.05
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        (
+            "1m",
+            {
+                "map": 0.060579,
+                "ndcg": 0.299245,
+                "precision": 0.270116,
+                "recall": 0.104350,
+            },
+        ),
+        (
+            "10m",
+            {
+                "map": 0.098745,
+                "ndcg": 0.319625,
+                "precision": 0.275756,
+                "recall": 0.154014,
+            },
+        ),
+    ],
+)
+def test_sar_single_node_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["sar_single_node"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        (
+            "1m",
+            {
+                "map": 0.033914,
+                "ndcg": 0.231570,
+                "precision": 0.211923,
+                "recall": 0.064663,
+            },
+        ),
+        # ("10m", {"map": , "ndcg": , "precision": , "recall": }), # OOM on test machine
+    ],
+)
+def test_baseline_deep_dive_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["baseline_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        (
+            "1m",
+            dict(
+                rmse=0.89,
+                mae=0.70,
+                rsquared=0.36,
+                exp_var=0.36,
+                map=0.011,
+                ndcg=0.10,
+                precision=0.093,
+                recall=0.025,
+            ),
+        ),
+        # 10m works but takes too long
+    ],
+)
+def test_surprise_svd_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["surprise_svd_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(MOVIELENS_DATA_SIZE=size),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        (
+            "1m",
+            dict(
+                rmse=0.959885,
+                mae=0.690133,
+                rsquared=0.264014,
+                exp_var=0.264417,
+                map=0.004857,
+                ndcg=0.055128,
+                precision=0.061142,
+                recall=0.017789,
+            ),
+        )
+    ],
+)
+@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
+def test_vw_deep_dive_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(MOVIELENS_DATA_SIZE=size, TOP_K=10),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="NNI pip package has installation incompatibilities")
+def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["nni_tuning_svd"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k",
+            SURPRISE_READER="ml-100k",
+            TMP_DIR=tmp,
+            MAX_TRIAL_NUM=1,
+            NUM_EPOCHS=1,
+            WAITING_TIME=20,
+            MAX_RETRIES=50,
+        ),
+    )
+
+
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="Wikidata API is unstable")
+def test_wikidata_functional(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["wikidata_knowledge_graph"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
+    assert results["length_result"] >= 1
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, expected_values",
+    [
+        ("1m", dict(map=0.081390, ndcg=0.406627, precision=0.373228, recall=0.132444)),
+        # 10m works but takes too long
+    ],
+)
+def test_cornac_bpr_functional(
+    notebooks, output_notebook, kernel_name, size, expected_values
+):
+    notebook_path = notebooks["cornac_bpr_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(MOVIELENS_DATA_SIZE=size),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, epochs, expected_values",
+    [
+        (
+            "100k",
+            10,
+            dict(
+                eval_precision=0.131601,
+                eval_recall=0.038056,
+                eval_precision2=0.145599,
+                eval_recall2=0.051338,
+            ),
+        ),
+    ],
+)
+def test_lightfm_functional(
+    notebooks, output_notebook, kernel_name, size, epochs, expected_values
+):
+    notebook_path = notebooks["lightfm_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(MOVIELENS_DATA_SIZE=size, NO_EPOCHS=epochs),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.experimental
+@pytest.mark.parametrize(
+    "expected_values",
+    [({"rmse": 0.4969, "mae": 0.4761})],
+)
+def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_values):
+    notebook_path = notebooks["geoimc_quickstart"]
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    for key, value in expected_values.items():
+        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.experimental
+def test_xlearn_fm_functional(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["xlearn_fm_deep_dive"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(LEARNING_RATE=0.2, EPOCH=10),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["auc_score"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL)
+
+
+@pytest.mark.notebooks
+@pytest.mark.parametrize(
+    "size, algos, expected_values_ndcg",
+    [
+        (["100k"], ["svd", "sar", "bpr"], [0.094444, 0.393818, 0.444990]),
+    ],
+)
+def test_benchmark_movielens_cpu(
+    notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg
+):
+    notebook_path = notebooks["benchmark_movielens"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(data_sizes=size, algorithms=algos),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+    assert len(results["results"]) == 3
+    for i, value in enumerate(results["results"]):
+        assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL)

From 64929e7b7dd1a02ec880edce5dbb7131b378c901 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:21:39 +0200
Subject: [PATCH 21/43] functional CPU

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  9 +++++--
 .../data_validation/examples/test_wikidata.py | 27 +++++++++++++++++++
 .../examples/test_notebooks_python.py         | 20 --------------
 3 files changed, 34 insertions(+), 22 deletions(-)
 create mode 100644 tests/data_validation/examples/test_wikidata.py

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 08715249d1..65c709a1aa 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -29,6 +29,8 @@
         "tests/data_validation/examples/test_mind.py::test_mind_utils_runs",  # 219.77s
         "tests/data_validation/examples/test_mind.py::test_mind_utils_values",  # 219.26s
         #
+        "tests/data_validation/examples/test_wikidata.py::test_wikidata_values",
+        #
         "tests/smoke/examples/test_notebooks_python.py::test_lightgbm_quickstart_smoke",  # 46.42s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_cornac_bpr_smoke",  # 16.62s
@@ -44,6 +46,8 @@
         "tests/functional/examples/test_notebooks_python.py::test_geoimc_functional",  # 1006.19s
         #
         "tests/functional/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
+        #
+        "tests/functional/examples/test_notebooks_python.py::test_lightfm_functional",
     ],
     "group_cpu_003": [  # Total group time: 2253s
         "tests/data_validation/recommenders/dataset/test_criteo.py::test_download_criteo_sample",  # 1.05s
@@ -53,8 +57,9 @@
         #
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
         "tests/functional/examples/test_notebooks_python.py::test_sar_single_node_functional",  # 57.67s + 808.83s
-        # FIXME: Add experimental tests in a later iteration
-        # "tests/functional/examples/test_notebooks_python.py::test_xlearn_fm_functional",  # 255.73s
+        "tests/functional/examples/test_notebooks_python.py::test_xlearn_fm_functional",  # 255.73s
+        "tests/functional/examples/test_notebooks_python.py::test_vw_deep_dive_functional",
+        "tests/functional/examples/test_notebooks_python.py::test_nni_tuning_svd",
     ],
     "group_gpu_001": [  # Total group time: 1937.01s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
diff --git a/tests/data_validation/examples/test_wikidata.py b/tests/data_validation/examples/test_wikidata.py
new file mode 100644
index 0000000000..25fb7dd8e7
--- /dev/null
+++ b/tests/data_validation/examples/test_wikidata.py
@@ -0,0 +1,27 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="Wikidata API is unstable")
+def test_wikidata_values(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["wikidata_knowledge_graph"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
+    assert results["length_result"] >= 1
diff --git a/tests/functional/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py
index ab3dbd738a..f232576271 100644
--- a/tests/functional/examples/test_notebooks_python.py
+++ b/tests/functional/examples/test_notebooks_python.py
@@ -182,26 +182,6 @@ def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp):
     )
 
 
-@pytest.mark.notebooks
-@pytest.mark.skip(reason="Wikidata API is unstable")
-def test_wikidata_functional(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["wikidata_knowledge_graph"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
-    assert results["length_result"] >= 1
-
-
 @pytest.mark.notebooks
 @pytest.mark.parametrize(
     "size, expected_values",

From 106aee100eff31fd46fe0fe9cc6a8d172d07dbbc Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:25:53 +0200
Subject: [PATCH 22/43] functional GPU and Spark

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 65c709a1aa..c7a99cb4b5 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -147,6 +147,7 @@
         "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df_full",  # 374.64s
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
+        "tests/functional/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_functional",
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke",  # 49.53s
         "tests/functional/examples/test_notebooks_pyspark.py::test_als_pyspark_functional",  # 110.58s

From 1eb331308e33acdec4ec561731a2480af887bec0 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:30:15 +0200
Subject: [PATCH 23/43] Integration

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py                            | 1 +
 tests/{unit => integration}/recommenders/utils/test_k8s_utils.py | 0
 2 files changed, 1 insertion(+)
 rename tests/{unit => integration}/recommenders/utils/test_k8s_utils.py (100%)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index c7a99cb4b5..709d40fed5 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -196,6 +196,7 @@
         "tests/security/test_dependency_security.py::test_pandas",
         "tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
         "tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py",
+        "tests/integration/recommenders/utils/test_k8s_utils.py",
     ],
     "group_notebooks_cpu_001": [  # Total group time: 226.42s
         "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
diff --git a/tests/unit/recommenders/utils/test_k8s_utils.py b/tests/integration/recommenders/utils/test_k8s_utils.py
similarity index 100%
rename from tests/unit/recommenders/utils/test_k8s_utils.py
rename to tests/integration/recommenders/utils/test_k8s_utils.py

From b9b2a21eb8d6090032d6aca6eed42e38a56d49b0 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:33:13 +0200
Subject: [PATCH 24/43] Reviewing smoke

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/smoke/examples/test_notebooks_gpu.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/tests/smoke/examples/test_notebooks_gpu.py b/tests/smoke/examples/test_notebooks_gpu.py
index 9f9b7ef4f3..082b8664bb 100644
--- a/tests/smoke/examples/test_notebooks_gpu.py
+++ b/tests/smoke/examples/test_notebooks_gpu.py
@@ -3,12 +3,8 @@
 
 
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
+import scrapbook as sb
 
 from recommenders.utils.gpu_utils import get_number_gpus
 
@@ -17,14 +13,12 @@
 ABS_TOL = 0.05
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_gpu_vm():
     assert get_number_gpus() >= 1
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_ncf_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["ncf"]
@@ -45,7 +39,6 @@ def test_ncf_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["ncf_deep_dive"]
@@ -73,7 +66,6 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_fastai_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["fastai"]
@@ -98,7 +90,6 @@ def test_fastai_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["xdeepfm_quickstart"]
@@ -125,7 +116,6 @@ def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp):
     notebook_path = notebooks["wide_deep"]
@@ -154,7 +144,6 @@ def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_naml_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["naml_quickstart"]
@@ -175,7 +164,6 @@ def test_naml_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_nrms_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["nrms_quickstart"]
@@ -196,7 +184,6 @@ def test_nrms_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_npa_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["npa_quickstart"]
@@ -217,7 +204,6 @@ def test_npa_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_lstur_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lstur_quickstart"]
@@ -238,7 +224,6 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_cornac_bivae_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["cornac_bivae_deep_dive"]

From 1a8ab4a55bb2dd57a39a14abcdc76eea554e2257 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:36:13 +0200
Subject: [PATCH 25/43] Reviewing smoke

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py          |  1 +
 tests/smoke/examples/test_notebooks_pyspark.py | 11 +++--------
 tests/smoke/examples/test_notebooks_python.py  | 15 +++------------
 3 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 709d40fed5..db459ff5db 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -58,6 +58,7 @@
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
         "tests/functional/examples/test_notebooks_python.py::test_sar_single_node_functional",  # 57.67s + 808.83s
         "tests/functional/examples/test_notebooks_python.py::test_xlearn_fm_functional",  # 255.73s
+        "tests/smoke/examples/test_notebooks_python.py::test_vw_deep_dive_smoke",
         "tests/functional/examples/test_notebooks_python.py::test_vw_deep_dive_functional",
         "tests/functional/examples/test_notebooks_python.py::test_nni_tuning_svd",
     ],
diff --git a/tests/smoke/examples/test_notebooks_pyspark.py b/tests/smoke/examples/test_notebooks_pyspark.py
index 561b45b68f..2e521104a6 100644
--- a/tests/smoke/examples/test_notebooks_pyspark.py
+++ b/tests/smoke/examples/test_notebooks_pyspark.py
@@ -1,14 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
+import scrapbook as sb
 
 
 TOL = 0.05
@@ -17,7 +14,6 @@
 
 # This is a flaky test that can fail unexpectedly
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.smoke
 @pytest.mark.spark
 @pytest.mark.notebooks
 def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name):
@@ -45,7 +41,6 @@ def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name):
 
 # This is a flaky test that can fail unexpectedly
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.smoke
 @pytest.mark.spark
 @pytest.mark.notebooks
 @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
diff --git a/tests/smoke/examples/test_notebooks_python.py b/tests/smoke/examples/test_notebooks_python.py
index b30c46524e..0bd359ce39 100644
--- a/tests/smoke/examples/test_notebooks_python.py
+++ b/tests/smoke/examples/test_notebooks_python.py
@@ -1,20 +1,16 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
 
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import pytest
+import papermill as pm
+import scrapbook as sb
 
 
 TOL = 0.05
 ABS_TOL = 0.05
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["sar_single_node"]
@@ -34,7 +30,6 @@ def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.176385, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["baseline_deep_dive"]
@@ -58,7 +53,6 @@ def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.108826, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["surprise_svd_deep_dive"]
@@ -82,7 +76,6 @@ def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.032, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 @pytest.mark.skip(reason="VW pip package has installation incompatibilities")
 def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name):
@@ -107,7 +100,6 @@ def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.037612, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lightgbm_quickstart"]
@@ -138,7 +130,6 @@ def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_cornac_bpr_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["cornac_bpr_deep_dive"]

From 3fc4e0dcfa1f8a19a95acd865fcf10dfad279dad Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 22:43:11 +0200
Subject: [PATCH 26/43] unit tests notebooks

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         |  2 +
 .../data_validation/examples/test_wikidata.py | 17 ++++++++
 tests/unit/examples/test_notebooks_gpu.py     |  7 +---
 tests/unit/examples/test_notebooks_pyspark.py |  7 +---
 tests/unit/examples/test_notebooks_python.py  | 42 +++++--------------
 5 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index db459ff5db..4cabcc48cd 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -29,6 +29,7 @@
         "tests/data_validation/examples/test_mind.py::test_mind_utils_runs",  # 219.77s
         "tests/data_validation/examples/test_mind.py::test_mind_utils_values",  # 219.26s
         #
+        "tests/data_validation/examples/test_wikidata.py::test_wikidata_runs",
         "tests/data_validation/examples/test_wikidata.py::test_wikidata_values",
         #
         "tests/smoke/examples/test_notebooks_python.py::test_lightgbm_quickstart_smoke",  # 46.42s
@@ -209,6 +210,7 @@
         "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
         "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
         "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_vw_deep_dive_runs",
     ],
     "group_spark_001": [  # Total group time: 270.41s
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
diff --git a/tests/data_validation/examples/test_wikidata.py b/tests/data_validation/examples/test_wikidata.py
index 25fb7dd8e7..cdee1699b9 100644
--- a/tests/data_validation/examples/test_wikidata.py
+++ b/tests/data_validation/examples/test_wikidata.py
@@ -7,6 +7,23 @@
 import scrapbook as sb
 
 
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="Wikidata API is unstable")
+def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["wikidata_knowledge_graph"]
+    MOVIELENS_SAMPLE_SIZE = 5
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k",
+            MOVIELENS_SAMPLE=True,
+            MOVIELENS_SAMPLE_SIZE=MOVIELENS_SAMPLE_SIZE,
+        ),
+    )
+
+
 @pytest.mark.notebooks
 @pytest.mark.skip(reason="Wikidata API is unstable")
 def test_wikidata_values(notebooks, output_notebook, kernel_name):
diff --git a/tests/unit/examples/test_notebooks_gpu.py b/tests/unit/examples/test_notebooks_gpu.py
index 251ef44eb3..45073daf5f 100644
--- a/tests/unit/examples/test_notebooks_gpu.py
+++ b/tests/unit/examples/test_notebooks_gpu.py
@@ -1,13 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
-
-try:
-    import papermill as pm
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
 
 from recommenders.utils.gpu_utils import get_number_gpus
 
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 156aae7dc7..372fe6f238 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -1,13 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
 
 from recommenders.utils.constants import (
     DEFAULT_RATING_COL,
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 37b71d591d..ed3d494fdf 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -1,15 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
+import papermill as pm
+import scrapbook as sb
 
 TOL = 0.05
 ABS_TOL = 0.05
@@ -60,13 +56,6 @@ def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name):
     )
 
 
-@pytest.mark.notebooks
-@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
-def test_vw_deep_dive_runs(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
-
-
 @pytest.mark.notebooks
 def test_lightgbm(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lightgbm_quickstart"]
@@ -86,24 +75,13 @@ def test_lightgbm(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.skip(reason="Wikidata API is unstable")
-def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["wikidata_knowledge_graph"]
-    MOVIELENS_SAMPLE_SIZE = 5
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k",
-            MOVIELENS_SAMPLE=True,
-            MOVIELENS_SAMPLE_SIZE=MOVIELENS_SAMPLE_SIZE,
-        ),
-    )
+def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["cornac_bpr_deep_dive"]
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
 
 
-@pytest.mark.experimental
 @pytest.mark.notebooks
+@pytest.mark.experimental
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["rlrmc_quickstart"]
     pm.execute_notebook(
@@ -115,6 +93,8 @@ def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["cornac_bpr_deep_dive"]
+@pytest.mark.experimental
+@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
+def test_vw_deep_dive_runs(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)

From 61868d243cb19a94511620d28fe1158f4d8825ab Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 22:56:36 +0200
Subject: [PATCH 27/43] unit tests dataset

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 17 +++++++-
 .../datasets/test_pandas_df_utils.py          |  2 +-
 .../datasets/test_python_splitter.py          | 17 ++++----
 .../datasets/test_spark_splitter.py           | 41 ++++++++++---------
 .../unit/recommenders/datasets/test_sparse.py | 12 +++---
 5 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 4cabcc48cd..c12751ccd2 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -168,10 +168,26 @@
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
         "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_retry",
         "tests/unit/recommenders/datasets/test_download_utils.py::test_download_path",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_negative_feedback_sampler",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_filter_by",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_csv_to_libffm",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_columns",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_same_base_dtype",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_lru_cache_df",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_split_pandas_data",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_min_rating_filter",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_random_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_chrono_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_int_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitters.py::test_float_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_sparse.py::test_df_to_sparse",
+        "tests/unit/recommenders/datasets/test_sparse.py::test_sparse_to_df",
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
@@ -247,7 +263,6 @@
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_init_spark_rating_eval",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_catalog_coverage",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_distributional_coverage",
-        "tests/unit/recommenders/datasets/test_spark_splitter.py::test_min_rating_filter",
     ],
     "group_notebooks_spark_001": [  # Total group time: 794s
         "tests/unit/examples/test_notebooks_pyspark.py::test_als_deep_dive_runs",  # 287.70s
diff --git a/tests/unit/recommenders/datasets/test_pandas_df_utils.py b/tests/unit/recommenders/datasets/test_pandas_df_utils.py
index d414936989..7fe502d188 100644
--- a/tests/unit/recommenders/datasets/test_pandas_df_utils.py
+++ b/tests/unit/recommenders/datasets/test_pandas_df_utils.py
@@ -1,11 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+import os
 import numpy as np
 import pandas as pd
 import pytest
 from tempfile import TemporaryDirectory
-import os
 
 from recommenders.datasets.pandas_df_utils import (
     filter_by,
diff --git a/tests/unit/recommenders/datasets/test_python_splitter.py b/tests/unit/recommenders/datasets/test_python_splitter.py
index 16f4fd3b21..39e622ea97 100644
--- a/tests/unit/recommenders/datasets/test_python_splitter.py
+++ b/tests/unit/recommenders/datasets/test_python_splitter.py
@@ -1,15 +1,21 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import pandas as pd
 import numpy as np
-import pytest
 
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+)
 from recommenders.datasets.split_utils import (
     min_rating_filter_pandas,
     split_pandas_data_with_ratios,
 )
-
 from recommenders.datasets.python_splitters import (
     python_chrono_split,
     python_random_split,
@@ -17,13 +23,6 @@
     numpy_stratified_split,
 )
 
-from recommenders.utils.constants import (
-    DEFAULT_USER_COL,
-    DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
-)
-
 
 @pytest.fixture(scope="module")
 def test_specs():
diff --git a/tests/unit/recommenders/datasets/test_spark_splitter.py b/tests/unit/recommenders/datasets/test_spark_splitter.py
index 9051d84440..9f6d402544 100644
--- a/tests/unit/recommenders/datasets/test_spark_splitter.py
+++ b/tests/unit/recommenders/datasets/test_spark_splitter.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
@@ -48,6 +49,25 @@ def spark_dataset(spark):
     )
 
 
+def _if_later(data1, data2):
+    """Helper function to test if records in data1 are earlier than that in data2.
+    Returns:
+        bool: True or False indicating if data1 is earlier than data2.
+    """
+
+    max_times = data1.groupBy(DEFAULT_USER_COL).agg(
+        F.max(DEFAULT_TIMESTAMP_COL).alias("max")
+    )
+    min_times = data2.groupBy(DEFAULT_USER_COL).agg(
+        F.min(DEFAULT_TIMESTAMP_COL).alias("min")
+    )
+    all_times = max_times.join(min_times, on=DEFAULT_USER_COL).select(
+        (F.col("max") <= F.col("min"))
+    )
+
+    return all([x[0] for x in all_times.collect()])
+
+
 @pytest.mark.spark
 def test_min_rating_filter(spark_dataset):
     dfs_user = min_rating_filter_spark(spark_dataset, min_rating=5, filter_by="user")
@@ -190,22 +210,3 @@ def test_timestamp_splitter(spark_dataset):
     max_split1 = splits[1].agg(F.max(DEFAULT_TIMESTAMP_COL)).first()[0]
     min_split2 = splits[2].agg(F.min(DEFAULT_TIMESTAMP_COL)).first()[0]
     assert max_split1 <= min_split2
-
-
-def _if_later(data1, data2):
-    """Helper function to test if records in data1 are earlier than that in data2.
-    Returns:
-        bool: True or False indicating if data1 is earlier than data2.
-    """
-
-    max_times = data1.groupBy(DEFAULT_USER_COL).agg(
-        F.max(DEFAULT_TIMESTAMP_COL).alias("max")
-    )
-    min_times = data2.groupBy(DEFAULT_USER_COL).agg(
-        F.min(DEFAULT_TIMESTAMP_COL).alias("min")
-    )
-    all_times = max_times.join(min_times, on=DEFAULT_USER_COL).select(
-        (F.col("max") <= F.col("min"))
-    )
-
-    return all([x[0] for x in all_times.collect()])
diff --git a/tests/unit/recommenders/datasets/test_sparse.py b/tests/unit/recommenders/datasets/test_sparse.py
index 47ae4c5d5f..e4df1b9be0 100644
--- a/tests/unit/recommenders/datasets/test_sparse.py
+++ b/tests/unit/recommenders/datasets/test_sparse.py
@@ -80,7 +80,7 @@ def random_date_generator(start_date, range_in_days):
     return results
 
 
-def test_df_to_sparse(test_specs, python_dataset):
+def test_df_to_sparse(python_dataset):
     # initialize the splitter
     header = {
         "col_user": DEFAULT_USER_COL,
@@ -100,7 +100,7 @@ def test_df_to_sparse(test_specs, python_dataset):
     )
 
 
-def test_sparse_to_df(test_specs, python_dataset):
+def test_sparse_to_df(python_dataset):
     # initialize the splitter
     header = {
         "col_user": DEFAULT_USER_COL,
@@ -115,20 +115,20 @@ def test_sparse_to_df(test_specs, python_dataset):
     X, _, _ = am.gen_affinity_matrix()
 
     # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
-    DF = am.map_back_sparse(X, kind="ratings")
+    df = am.map_back_sparse(X, kind="ratings")
 
     # tests: check that the two dataframes have the same elements in the same positions.
     assert (
-        DF.userID.values.all()
+        df.userID.values.all()
         == python_dataset.sort_values(by=["userID"]).userID.values.all()
     )
 
     assert (
-        DF.itemID.values.all()
+        df.itemID.values.all()
         == python_dataset.sort_values(by=["userID"]).itemID.values.all()
     )
 
     assert (
-        DF.rating.values.all()
+        df.rating.values.all()
         == python_dataset.sort_values(by=["userID"]).rating.values.all()
     )

From dfa0c55339a6eadb4b4aeabec7a7e1f2ea41736e Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 23:05:07 +0200
Subject: [PATCH 28/43] unit python evaluation

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 30 +++++++++++++++++++
 .../evaluation/test_python_evaluation.py      |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index c12751ccd2..a92129840b 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -188,6 +188,36 @@
         "tests/unit/recommenders/datasets/test_python_splitters.py::test_float_numpy_stratified_splitter",
         "tests/unit/recommenders/datasets/test_sparse.py::test_df_to_sparse",
         "tests/unit/recommenders/datasets/test_sparse.py::test_sparse_to_df",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_column_dtypes_match",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_merge_rating",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_merge_ranking",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_rmse",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_mae",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_rsquared",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_exp_var",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_get_top_k_items",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_get_top_k_items_largek",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_ndcg_at_k",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_map_at_k",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_precision",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_recall",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_auc",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_logloss",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_errors",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_catalog_coverage",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_distributional_coverage",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_item_novelty",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_novelty",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_diversity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_diversity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_diversity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_diversity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector",
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
index e5837fc663..72ac95ead4 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -40,6 +41,7 @@
     distributional_coverage,
 )
 
+
 TOL = 0.0001
 
 

From 94986422af8ad326186803a0ca913614b7b5f560 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Sep 2023 23:11:57 +0200
Subject: [PATCH 29/43] unit pyspark evaluation

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py                       | 1 +
 tests/unit/recommenders/evaluation/test_spark_evaluation.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index a92129840b..80e40637b9 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -259,6 +259,7 @@
         "tests/unit/examples/test_notebooks_python.py::test_vw_deep_dive_runs",
     ],
     "group_spark_001": [  # Total group time: 270.41s
+        "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_init_spark",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
         "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 9cf35ee3ec..ada8c02e9e 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 from pandas.util.testing import assert_frame_equal
 
 from recommenders.evaluation.python_evaluation import (

From 0eea4d8c184e83118b7ec7c8412a73456eaaeb2d Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 08:23:53 +0200
Subject: [PATCH 30/43] Added en extra s

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 80e40637b9..71e1942dc1 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -179,13 +179,13 @@
         "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_columns",
         "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_same_base_dtype",
         "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_lru_cache_df",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_split_pandas_data",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_min_rating_filter",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_random_splitter",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_chrono_splitter",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_stratified_splitter",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_int_numpy_stratified_splitter",
-        "tests/unit/recommenders/datasets/test_python_splitters.py::test_float_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_split_pandas_data",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_min_rating_filter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_random_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_chrono_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_int_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_float_numpy_stratified_splitter",
         "tests/unit/recommenders/datasets/test_sparse.py::test_df_to_sparse",
         "tests/unit/recommenders/datasets/test_sparse.py::test_sparse_to_df",
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_column_dtypes_match",

From 224e15b0c503676860ca2079ff2641eb4235b2d5 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 08:40:52 +0200
Subject: [PATCH 31/43] unit models WIP

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 setup.py                                        |  1 +
 tests/ci/azureml_tests/test_groups.py           | 17 +++++++++++++++--
 .../recommenders/models/test_cornac_utils.py    |  3 ++-
 .../recommenders/models/test_deeprec_model.py   |  1 +
 .../recommenders/models/test_deeprec_utils.py   |  1 +
 tests/unit/recommenders/models/test_geoimc.py   | 12 +++++++-----
 .../recommenders/models/test_lightfm_utils.py   |  2 ++
 7 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index 04c0dad111..679252462b 100644
--- a/setup.py
+++ b/setup.py
@@ -89,6 +89,7 @@
     "vowpalwabbit>=8.9.0,<9",
     # nni needs to be upgraded
     "nni==1.5",
+    "pymanopt>=0.2.5",
 ]
 
 # The following dependency can be installed as below, however PyPI does not allow direct URLs.
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 71e1942dc1..7cacc1efd8 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -224,7 +224,19 @@
         "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
+        "tests/unit/recommenders/models/test_cornac_utils.py::test_predict",
         "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_geoimc.py::test_dataptr",
+        "tests/unit/recommenders/models/test_geoimc.py::test_length_normalize",
+        "tests/unit/recommenders/models/test_geoimc.py::test_mean_center",
+        "tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims",
+        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
+        "tests/unit/recommenders/models/test_geoimc.py::test_inferer_init",
+        "tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_interactions",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_fitting",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_users",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_items",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
@@ -313,16 +325,15 @@
         "tests/unit/recommenders/models/test_rbm.py::test_sampling_funct",
         "tests/unit/recommenders/models/test_rbm.py::test_train_param_init",
         "tests/unit/recommenders/models/test_rbm.py::test_save_load",
-        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_model",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_neumf_save_load",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_regular_save_load",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
+        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_naml_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_lstur_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_nrms_component_definition",
-        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_npa_component_definition",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_fit",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_init",
@@ -348,6 +359,8 @@
         "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
+        "tests/unit/recommenders/models/test_deeprec_utils.py::test_prepare_hparams",
+        "tests/unit/recommenders/models/test_deeprec_utils.py::test_load_yaml_file",
         "tests/security/test_dependency_security.py::test_tensorflow",
         "tests/security/test_dependency_security.py::test_torch",
         "tests/regression/test_compatibility_tf.py",
diff --git a/tests/unit/recommenders/models/test_cornac_utils.py b/tests/unit/recommenders/models/test_cornac_utils.py
index dad7f3e446..51dde4a8fd 100644
--- a/tests/unit/recommenders/models/test_cornac_utils.py
+++ b/tests/unit/recommenders/models/test_cornac_utils.py
@@ -2,8 +2,8 @@
 # Licensed under the MIT License.
 
 
-import pandas as pd
 import pytest
+import pandas as pd
 import cornac
 
 from recommenders.utils.constants import (
@@ -14,6 +14,7 @@
 from recommenders.models.cornac.cornac_utils import predict, predict_ranking
 from recommenders.evaluation.python_evaluation import mae, rmse, ndcg_at_k, recall_at_k
 
+
 TOL = 0.001
 
 
diff --git a/tests/unit/recommenders/models/test_deeprec_model.py b/tests/unit/recommenders/models/test_deeprec_model.py
index 6bd672f417..8207e0bf16 100644
--- a/tests/unit/recommenders/models/test_deeprec_model.py
+++ b/tests/unit/recommenders/models/test_deeprec_model.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
diff --git a/tests/unit/recommenders/models/test_deeprec_utils.py b/tests/unit/recommenders/models/test_deeprec_utils.py
index d54e470871..310e4ef3a3 100644
--- a/tests/unit/recommenders/models/test_deeprec_utils.py
+++ b/tests/unit/recommenders/models/test_deeprec_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
diff --git a/tests/unit/recommenders/models/test_geoimc.py b/tests/unit/recommenders/models/test_geoimc.py
index fd4dfc4da1..0eabc339d9 100644
--- a/tests/unit/recommenders/models/test_geoimc.py
+++ b/tests/unit/recommenders/models/test_geoimc.py
@@ -1,12 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-try:
-    import collections
-    import pytest
-    import numpy as np
-    from scipy.sparse import csr_matrix
 
+import collections
+import pytest
+import numpy as np
+from scipy.sparse import csr_matrix
+
+try:
     from recommenders.models.geoimc.geoimc_data import DataPtr
     from recommenders.models.geoimc.geoimc_predict import Inferer
     from recommenders.models.geoimc.geoimc_algorithm import IMCProblem
@@ -19,6 +20,7 @@
 except:
     pass  # skip if pymanopt not installed
 
+
 _IMC_TEST_DATA = [
     (
         csr_matrix(np.array([[1, 5, 3], [7, 2, 1]])),
diff --git a/tests/unit/recommenders/models/test_lightfm_utils.py b/tests/unit/recommenders/models/test_lightfm_utils.py
index c0990f2055..dbd82a63d0 100644
--- a/tests/unit/recommenders/models/test_lightfm_utils.py
+++ b/tests/unit/recommenders/models/test_lightfm_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import pytest
 import itertools
 import numpy as np
@@ -13,6 +14,7 @@
     similar_items,
 )
 
+
 SEEDNO = 42
 TEST_PERCENTAGE = 0.25
 TEST_USER_ID = 2

From 34665b0b9270ef11364ece93eb6e8329d4a9e9c6 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 08:51:31 +0200
Subject: [PATCH 32/43] unit models WIP

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 24 +++++++++++--------
 .../recommenders/models/test_lightfm_utils.py |  3 ++-
 .../recommenders/models/test_ncf_dataset.py   |  2 ++
 .../models/test_ncf_singlenode.py             |  3 ++-
 .../recommenders/models/test_newsrec_model.py | 16 ++++++-------
 tests/unit/recommenders/models/test_rbm.py    |  3 ++-
 6 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 7cacc1efd8..e311fac6e8 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -322,29 +322,33 @@
     ],
     "group_gpu_001": [  # Total group time: 492.62s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
+        "tests/unit/recommenders/models/test_rbm.py::test_class_init",
         "tests/unit/recommenders/models/test_rbm.py::test_sampling_funct",
         "tests/unit/recommenders/models/test_rbm.py::test_train_param_init",
         "tests/unit/recommenders/models/test_rbm.py::test_save_load",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_train_loader",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_test_loader",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_unsorted",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_empty",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_negative_sampler",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_init",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_fit",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_neumf_save_load",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_regular_save_load",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_predict",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_naml_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_lstur_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_nrms_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_npa_component_definition",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_fit",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_init",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_test_loader",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_train_loader",
-        "tests/unit/recommenders/models/test_rbm.py::test_class_init",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_unsorted",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_predict",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
+        "tests/unit/recommenders/models/test_newsrec_utils.py::test_prepare_hparams",
+        "tests/unit/recommenders/models/test_newsrec_utils.py::test_load_yaml_file",
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_prepare_data", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
diff --git a/tests/unit/recommenders/models/test_lightfm_utils.py b/tests/unit/recommenders/models/test_lightfm_utils.py
index dbd82a63d0..52f743fa65 100644
--- a/tests/unit/recommenders/models/test_lightfm_utils.py
+++ b/tests/unit/recommenders/models/test_lightfm_utils.py
@@ -6,8 +6,9 @@
 import itertools
 import numpy as np
 import pandas as pd
-from lightfm import LightFM, cross_validation
 from lightfm.data import Dataset
+from lightfm import LightFM, cross_validation
+
 from recommenders.models.lightfm.lightfm_utils import (
     track_model_metrics,
     similar_users,
diff --git a/tests/unit/recommenders/models/test_ncf_dataset.py b/tests/unit/recommenders/models/test_ncf_dataset.py
index 4a148a13d3..b48554ae84 100644
--- a/tests/unit/recommenders/models/test_ncf_dataset.py
+++ b/tests/unit/recommenders/models/test_ncf_dataset.py
@@ -1,9 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 import pandas as pd
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
diff --git a/tests/unit/recommenders/models/test_ncf_singlenode.py b/tests/unit/recommenders/models/test_ncf_singlenode.py
index 80531d2939..918bd368d9 100644
--- a/tests/unit/recommenders/models/test_ncf_singlenode.py
+++ b/tests/unit/recommenders/models/test_ncf_singlenode.py
@@ -1,11 +1,12 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import shutil
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 
 try:
     from recommenders.models.ncf.ncf_singlenode import NCF
diff --git a/tests/unit/recommenders/models/test_newsrec_model.py b/tests/unit/recommenders/models/test_newsrec_model.py
index bb2dab7b5b..ac3c751a44 100644
--- a/tests/unit/recommenders/models/test_newsrec_model.py
+++ b/tests/unit/recommenders/models/test_newsrec_model.py
@@ -22,11 +22,11 @@ def test_nrms_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -54,11 +54,11 @@ def test_naml_component_definition(mind_resource_path):
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -86,11 +86,11 @@ def test_npa_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "npa.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -116,11 +116,11 @@ def test_lstur_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "mind", "utils"),
             "MINDdemo_utils.zip",
         )
diff --git a/tests/unit/recommenders/models/test_rbm.py b/tests/unit/recommenders/models/test_rbm.py
index d9430d8cfd..7f0174e472 100644
--- a/tests/unit/recommenders/models/test_rbm.py
+++ b/tests/unit/recommenders/models/test_rbm.py
@@ -1,8 +1,9 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import numpy as np
+
 import pytest
+import numpy as np
 
 try:
     from recommenders.models.rbm.rbm import RBM

From c26a6a3ca174644339948b6c6208399742110381 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 09:09:17 +0200
Subject: [PATCH 33/43] unit models

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 33 +++++++++++++++--
 .../models/test_sar_singlenode.py             |  5 ++-
 .../recommenders/models/test_sasrec_model.py  |  6 ++--
 .../models/test_surprise_utils.py             | 30 ++++++++--------
 .../recommenders/models/test_tfidf_utils.py   | 35 ++++++++++---------
 .../recommenders/models/test_vowpal_wabbit.py | 13 +++----
 .../models/test_wide_deep_utils.py            |  2 ++
 7 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index e311fac6e8..4c8f92ec3f 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -221,9 +221,14 @@
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
-        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_init",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_fit",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_stop_words",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_recommend_top_k_items",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_top_k_recommendations",
         "tests/unit/recommenders/models/test_cornac_utils.py::test_predict",
         "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
         "tests/unit/recommenders/models/test_geoimc.py::test_dataptr",
@@ -237,6 +242,29 @@
         "tests/unit/recommenders/models/test_lightfm_utils.py::test_fitting",
         "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_users",
         "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_init",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_fit",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_predict",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_predict_all_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_user_affinity",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_item_based_topk",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_popularity_based_topk",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_normalized_scores",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_match_similarity_type_from_json_file",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_dataset_with_duplicates",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_topk_most_similar_users",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_item_frequencies",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_user_frequencies",
+        "tests/unit/recommenders/models/test_surprise_utils.py::test_predict",
+        "tests/unit/recommenders/models/test_surprise_utils.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_vw_init_del",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_to_vw_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_parse_train_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_parse_test_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_to_vw_file",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_fit_and_predict",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
@@ -341,8 +369,9 @@
         "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
         "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
-        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
+        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
+        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_naml_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_lstur_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_nrms_component_definition",
diff --git a/tests/unit/recommenders/models/test_sar_singlenode.py b/tests/unit/recommenders/models/test_sar_singlenode.py
index c5fe4ab1c6..19e79b2337 100644
--- a/tests/unit/recommenders/models/test_sar_singlenode.py
+++ b/tests/unit/recommenders/models/test_sar_singlenode.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import itertools
+
 import json
 import pytest
+import itertools
 import numpy as np
 import pandas as pd
 from pandas.testing import assert_frame_equal
@@ -250,7 +251,6 @@ def test_recommend_k_items(
 
 
 def test_get_item_based_topk(header, pandas_dummy):
-
     sar = SAR(**header)
     sar.fit(pandas_dummy)
 
@@ -299,7 +299,6 @@ def test_get_item_based_topk(header, pandas_dummy):
 
 
 def test_get_popularity_based_topk(header):
-
     train_df = pd.DataFrame(
         {
             header["col_user"]: [1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4],
diff --git a/tests/unit/recommenders/models/test_sasrec_model.py b/tests/unit/recommenders/models/test_sasrec_model.py
index 7f3cde7673..0e85d5f836 100644
--- a/tests/unit/recommenders/models/test_sasrec_model.py
+++ b/tests/unit/recommenders/models/test_sasrec_model.py
@@ -1,8 +1,9 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
+
 import os
+import pytest
 from collections import defaultdict
 
 try:
@@ -15,7 +16,6 @@
         download_and_extract,
         _reviews_preprocessing,
     )
-
 except ImportError:
     pass  # skip if in cpu environment
 
@@ -180,7 +180,6 @@ def test_sampler():
 
 @pytest.mark.gpu
 def test_sasrec(model_parameters):
-
     params = model_parameters
 
     model = SASREC(
@@ -202,7 +201,6 @@ def test_sasrec(model_parameters):
 
 @pytest.mark.gpu
 def test_ssept(model_parameters):
-
     params = model_parameters
 
     model = SSEPT(
diff --git a/tests/unit/recommenders/models/test_surprise_utils.py b/tests/unit/recommenders/models/test_surprise_utils.py
index 2568fc4acc..c80e63818d 100644
--- a/tests/unit/recommenders/models/test_surprise_utils.py
+++ b/tests/unit/recommenders/models/test_surprise_utils.py
@@ -2,22 +2,20 @@
 # Licensed under the MIT License.
 
 
-try:
-    import pandas as pd
-    import pytest
-    import surprise
-
-    from recommenders.utils.constants import (
-        DEFAULT_USER_COL,
-        DEFAULT_ITEM_COL,
-        DEFAULT_RATING_COL,
-    )
-    from recommenders.models.surprise.surprise_utils import (
-        predict,
-        compute_ranking_predictions,
-    )
-except:
-    pass  # skip if surprise not installed
+import pytest
+import pandas as pd
+import surprise
+
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+)
+from recommenders.models.surprise.surprise_utils import (
+    predict,
+    compute_ranking_predictions,
+)
+
 
 TOL = 0.001
 
diff --git a/tests/unit/recommenders/models/test_tfidf_utils.py b/tests/unit/recommenders/models/test_tfidf_utils.py
index 58450b138d..a52047950f 100644
--- a/tests/unit/recommenders/models/test_tfidf_utils.py
+++ b/tests/unit/recommenders/models/test_tfidf_utils.py
@@ -1,10 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import pytest
-from recommenders.models.tfidf.tfidf_utils import TfidfRecommender
-import pandas as pd
 import scipy
+import pandas as pd
+
+from recommenders.models.tfidf.tfidf_utils import TfidfRecommender
+
 
 CLEAN_COL = "cleaned_text"
 K = 2
@@ -51,6 +54,20 @@ def model():
     return TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
 
 
+@pytest.fixture(scope="module")
+def df_clean(model, df):
+    return model.clean_dataframe(df, ["abstract", "full_text"], new_col_name=CLEAN_COL)
+
+
+@pytest.fixture(scope="module")
+def model_fit(model, df_clean):
+    model_fit = TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
+    tf, vectors_tokenized = model_fit.tokenize_text(df_clean)
+    model_fit.fit(tf, vectors_tokenized)
+
+    return model_fit
+
+
 def test_init(model):
     assert model.id_col == "cord_uid"
     assert model.tokenization_method == "scibert"
@@ -69,11 +86,6 @@ def test_clean_dataframe(model, df):
     assert False not in isalphanumeric
 
 
-@pytest.fixture(scope="module")
-def df_clean(model, df):
-    return model.clean_dataframe(df, ["abstract", "full_text"], new_col_name=CLEAN_COL)
-
-
 def test_tokenize_text(model, df_clean):
     _, vectors_tokenized = model.tokenize_text(df_clean)
     assert True not in list(df_clean[CLEAN_COL] == vectors_tokenized)
@@ -85,15 +97,6 @@ def test_fit(model, df_clean):
     assert type(model.tfidf_matrix) == scipy.sparse.csr.csr_matrix
 
 
-@pytest.fixture(scope="module")
-def model_fit(model, df_clean):
-    model_fit = TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
-    tf, vectors_tokenized = model_fit.tokenize_text(df_clean)
-    model_fit.fit(tf, vectors_tokenized)
-
-    return model_fit
-
-
 def test_get_tokens(model_fit):
     tokens = model_fit.get_tokens()
     assert type(tokens) == dict
diff --git a/tests/unit/recommenders/models/test_vowpal_wabbit.py b/tests/unit/recommenders/models/test_vowpal_wabbit.py
index b8c07c1867..0db5bdd704 100644
--- a/tests/unit/recommenders/models/test_vowpal_wabbit.py
+++ b/tests/unit/recommenders/models/test_vowpal_wabbit.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 from unittest import mock
@@ -23,7 +24,7 @@ def model():
     del model
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_vw_init_del():
     model = VW()
     tempdir = model.tempdir.name
@@ -33,7 +34,7 @@ def test_vw_init_del():
     assert not os.path.exists(tempdir)
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_to_vw_cmd():
     expected = [
         "vw",
@@ -60,7 +61,7 @@ def test_to_vw_cmd():
     assert VW.to_vw_cmd(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_parse_train_cmd(model):
     expected = [
         "vw",
@@ -77,7 +78,7 @@ def test_parse_train_cmd(model):
     assert model.parse_train_params(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_parse_test_cmd(model):
     expected = [
         "vw",
@@ -98,7 +99,7 @@ def test_parse_test_cmd(model):
     assert model.parse_test_params(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_to_vw_file(model, df):
     expected = ["1 0|user 1 |item 8", "5 1|user 3 |item 7", "3 2|user 2 |item 7"]
     model.to_vw_file(df, train=True)
@@ -107,7 +108,7 @@ def test_to_vw_file(model, df):
     del model
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_fit_and_predict(model, df):
     # generate fake predictions
     with open(model.prediction_file, "w") as f:
diff --git a/tests/unit/recommenders/models/test_wide_deep_utils.py b/tests/unit/recommenders/models/test_wide_deep_utils.py
index 85522f5e4f..2d4168e99f 100644
--- a/tests/unit/recommenders/models/test_wide_deep_utils.py
+++ b/tests/unit/recommenders/models/test_wide_deep_utils.py
@@ -1,9 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 import pandas as pd
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,

From 66e6dca6c2026bb506d1c8c37a4f71a427e4751b Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 09:16:35 +0200
Subject: [PATCH 34/43] unit tuning

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py          | 14 +++++++++++++-
 .../unit/recommenders/tuning/test_ncf_utils.py |  8 ++++++--
 .../unit/recommenders/tuning/test_nni_utils.py | 18 +++++++++++++++---
 tests/unit/recommenders/tuning/test_sweep.py   |  4 +---
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 4c8f92ec3f..5849cbb41a 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -219,7 +219,6 @@
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector",
         "tests/unit/recommenders/utils/test_timer.py::test_timer",
-        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_init",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe",
@@ -265,6 +264,19 @@
         "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_parse_test_cmd",
         "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_to_vw_file",
         "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_fit_and_predict",
+        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_get_experiment_status",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_done",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_tuner_no_more_trial",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_running",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_no_more_trial",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_failed",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_stopped_timeout",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_stopped",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_metrics_written",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_metrics_written_timeout",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_get_trials",
+        "tests/unit/recommenders/tuning/test_sweep.py::test_param_sweep",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
diff --git a/tests/unit/recommenders/tuning/test_ncf_utils.py b/tests/unit/recommenders/tuning/test_ncf_utils.py
index 1cfb334cd2..3f2039bc53 100644
--- a/tests/unit/recommenders/tuning/test_ncf_utils.py
+++ b/tests/unit/recommenders/tuning/test_ncf_utils.py
@@ -1,17 +1,21 @@
-import pytest
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
 
+import pytest
 from unittest.mock import Mock
 
 from recommenders.tuning.nni.ncf_utils import compute_test_results
 from recommenders.datasets.movielens import MockMovielensSchema
 
+
 DATA_SIZE = 1  # setting to 1 so all IDs are unique
 
 
 @pytest.fixture(scope="module")
 def mock_model():
     def mock_predict(*args, is_list=False):
-        """ Mock model predict method"""
+        """Mock model predict method"""
         if is_list:
             return [0] * DATA_SIZE
         else:
diff --git a/tests/unit/recommenders/tuning/test_nni_utils.py b/tests/unit/recommenders/tuning/test_nni_utils.py
index ca7f2e4886..bdd5fb166c 100644
--- a/tests/unit/recommenders/tuning/test_nni_utils.py
+++ b/tests/unit/recommenders/tuning/test_nni_utils.py
@@ -1,12 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import json
+
 import os
 import sys
-from tempfile import TemporaryDirectory
-from unittest.mock import patch
+import json
 import pytest
+from unittest.mock import patch
+from tempfile import TemporaryDirectory
 
 from recommenders.tuning.nni.nni_utils import (
     get_experiment_status,
@@ -52,6 +53,7 @@ def mock_exception():
     raise Exception()
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_get_experiment_status():
     content = "some_status"
@@ -64,6 +66,7 @@ def test_get_experiment_status():
         assert nni_status["errors"] == [""]
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_done():
     content = "DONE"
@@ -74,6 +77,7 @@ def test_check_experiment_status_done():
         check_experiment_status(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_tuner_no_more_trial():
     content = "TUNER_NO_MORE_TRIAL"
@@ -84,6 +88,7 @@ def test_check_experiment_status_tuner_no_more_trial():
         check_experiment_status(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_running():
     content = "RUNNING"
@@ -97,6 +102,7 @@ def test_check_experiment_status_running():
     assert "check_experiment_status() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_no_more_trial():
     content = "NO_MORE_TRIAL"
@@ -110,6 +116,7 @@ def test_check_experiment_status_no_more_trial():
     assert "check_experiment_status() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_failed():
     content = "some_failed_status"
@@ -126,6 +133,7 @@ def test_check_experiment_status_failed():
     )
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_stopped_timeout():
     content = "some_status"
@@ -139,12 +147,14 @@ def test_check_stopped_timeout():
     assert "check_stopped() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_stopped():
     with patch("requests.get", side_effect=mock_exception):
         check_stopped(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_metrics_written():
     content = [{"finalMetricData": None}, {"finalMetricData": None}]
@@ -152,6 +162,7 @@ def test_check_metrics_written():
         check_metrics_written(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_metrics_written_timeout():
     content = [{"logPath": "/p"}, {"logPath": "/q"}]
@@ -163,6 +174,7 @@ def test_check_metrics_written_timeout():
     assert "check_metrics_written() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_get_trials():
     with TemporaryDirectory() as tmp_dir1, TemporaryDirectory() as tmp_dir2:
diff --git a/tests/unit/recommenders/tuning/test_sweep.py b/tests/unit/recommenders/tuning/test_sweep.py
index ae3f67d52a..640eb46e7d 100644
--- a/tests/unit/recommenders/tuning/test_sweep.py
+++ b/tests/unit/recommenders/tuning/test_sweep.py
@@ -9,9 +9,7 @@
 
 @pytest.fixture(scope="module")
 def parameter_dictionary():
-    params = {"param1": [1, 2, 3], "param2": [4, 5, 6], "param3": 1}
-
-    return params
+    return {"param1": [1, 2, 3], "param2": [4, 5, 6], "param3": 1}
 
 
 def test_param_sweep(parameter_dictionary):

From 3cdb4d32af08559b03d682a4ffc5a70e650b52e6 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 09:33:12 +0200
Subject: [PATCH 35/43] unit utils

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py         | 31 ++++++++++++++++---
 .../recommenders/utils/test_general_utils.py  |  1 +
 .../unit/recommenders/utils/test_gpu_utils.py | 15 +++++----
 .../recommenders/utils/test_notebook_utils.py | 18 +++++------
 tests/unit/recommenders/utils/test_plot.py    |  1 +
 .../recommenders/utils/test_python_utils.py   |  3 +-
 .../unit/recommenders/utils/test_tf_utils.py  |  6 ++--
 tests/unit/recommenders/utils/test_timer.py   |  3 +-
 8 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index 5849cbb41a..b56847693a 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -218,7 +218,6 @@
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector",
-        "tests/unit/recommenders/utils/test_timer.py::test_timer",
         "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_init",
         "tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe",
@@ -277,6 +276,20 @@
         "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_metrics_written_timeout",
         "tests/unit/recommenders/tuning/test_nni_utils.py::test_get_trials",
         "tests/unit/recommenders/tuning/test_sweep.py::test_param_sweep",
+        "tests/unit/recommenders/utils/test_general_utils.py::test_invert_dictionary",
+        "tests/unit/recommenders/utils/test_general_utils.py::test_get_number_processors",
+        "tests/unit/recommenders/utils/test_plot.py::test_line_graph",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_python_jaccard",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_python_lift",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_exponential_decay",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_get_top_k_scored_items",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_binarize",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_rescale",
+        "tests/unit/recommenders/utils/test_timer.py::test_no_time",
+        "tests/unit/recommenders/utils/test_timer.py::test_stop_before_start",
+        "tests/unit/recommenders/utils/test_timer.py::test_interval_before_stop",
+        "tests/unit/recommenders/utils/test_timer.py::test_timer",
+        "tests/unit/recommenders/utils/test_timer.py::test_timer_format",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
         "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
@@ -348,6 +361,7 @@
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_distributional_coverage",
     ],
     "group_notebooks_spark_001": [  # Total group time: 794s
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_databricks",
         "tests/unit/examples/test_notebooks_pyspark.py::test_als_deep_dive_runs",  # 287.70s
         "tests/unit/examples/test_notebooks_pyspark.py::test_als_pyspark_runs",  # 374.15s
         "tests/unit/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_runs",  # 132.09s
@@ -378,9 +392,6 @@
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_neumf_save_load",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_regular_save_load",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_predict",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
@@ -394,6 +405,18 @@
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_ssept", # FIXME: it takes too long to run
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_gpu_info",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_number_gpus",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_clear_memory_all_gpus",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_cuda_version",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_cudnn_version",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_cudnn_enabled",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_tensorflow_gpu",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_pytorch_gpu",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_build_optimizer",
     ],
     "group_gpu_002": [  # Total group time:
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
diff --git a/tests/unit/recommenders/utils/test_general_utils.py b/tests/unit/recommenders/utils/test_general_utils.py
index 9bca7eac97..c8ca548744 100644
--- a/tests/unit/recommenders/utils/test_general_utils.py
+++ b/tests/unit/recommenders/utils/test_general_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 from recommenders.utils.general_utils import invert_dictionary, get_number_processors
 
 
diff --git a/tests/unit/recommenders/utils/test_gpu_utils.py b/tests/unit/recommenders/utils/test_gpu_utils.py
index 4bbbac54fc..230a4ba860 100644
--- a/tests/unit/recommenders/utils/test_gpu_utils.py
+++ b/tests/unit/recommenders/utils/test_gpu_utils.py
@@ -1,24 +1,23 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
 
 try:
     import tensorflow as tf
     import torch
+    from recommenders.utils.gpu_utils import (
+        get_cuda_version,
+        get_cudnn_version,
+        get_gpu_info,
+        get_number_gpus,
+    )
 except ImportError:
     pass  # skip this import if we are in cpu environment
 
 
-from recommenders.utils.gpu_utils import (
-    get_cuda_version,
-    get_cudnn_version,
-    get_gpu_info,
-    get_number_gpus,
-)
-
-
 @pytest.mark.gpu
 def test_get_gpu_info():
     assert len(get_gpu_info()) >= 1
diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py
index 4a0484d4fa..24223b703f 100644
--- a/tests/unit/recommenders/utils/test_notebook_utils.py
+++ b/tests/unit/recommenders/utils/test_notebook_utils.py
@@ -1,14 +1,12 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-from pathlib import Path
+
 import pytest
+import papermill as pm
+import scrapbook as sb
+from pathlib import Path
 
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
 from recommenders.utils.notebook_utils import is_jupyter, is_databricks
 
 
@@ -33,6 +31,8 @@ def test_is_jupyter(output_notebook, kernel_name):
     assert not result_is_databricks
 
 
-# @pytest.mark.notebooks
-# def test_is_databricks():
-#     TODO Currently, we cannot pytest modules on Databricks
+@pytest.mark.spark
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="TODO: Implement this")
+def test_is_databricks():
+    pass
diff --git a/tests/unit/recommenders/utils/test_plot.py b/tests/unit/recommenders/utils/test_plot.py
index e2f2653307..358093c81a 100644
--- a/tests/unit/recommenders/utils/test_plot.py
+++ b/tests/unit/recommenders/utils/test_plot.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import matplotlib
 import matplotlib.pyplot as plt
 from recommenders.utils.plot import line_graph
diff --git a/tests/unit/recommenders/utils/test_python_utils.py b/tests/unit/recommenders/utils/test_python_utils.py
index e29eb81c76..0ee806a1e1 100644
--- a/tests/unit/recommenders/utils/test_python_utils.py
+++ b/tests/unit/recommenders/utils/test_python_utils.py
@@ -2,8 +2,8 @@
 # Licensed under the MIT License.
 
 
-import numpy as np
 import pytest
+import numpy as np
 
 from recommenders.utils.python_utils import (
     exponential_decay,
@@ -14,6 +14,7 @@
     rescale,
 )
 
+
 TOL = 0.0001
 
 
diff --git a/tests/unit/recommenders/utils/test_tf_utils.py b/tests/unit/recommenders/utils/test_tf_utils.py
index 07dae225c3..f064797f54 100644
--- a/tests/unit/recommenders/utils/test_tf_utils.py
+++ b/tests/unit/recommenders/utils/test_tf_utils.py
@@ -1,11 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import itertools
+
 import os
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
+import itertools
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
diff --git a/tests/unit/recommenders/utils/test_timer.py b/tests/unit/recommenders/utils/test_timer.py
index 65bb3ae70a..35d174f9ab 100644
--- a/tests/unit/recommenders/utils/test_timer.py
+++ b/tests/unit/recommenders/utils/test_timer.py
@@ -2,8 +2,9 @@
 # Licensed under the MIT License.
 
 
-import pytest
 import time
+import pytest
+
 from recommenders.utils.timer import Timer
 
 

From 0fbbec1628926b173b07eb7da8374abf896c3f07 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 09:48:43 +0200
Subject: [PATCH 36/43] :memo:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/unit/recommenders/utils/test_plot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit/recommenders/utils/test_plot.py b/tests/unit/recommenders/utils/test_plot.py
index 358093c81a..471889ef2d 100644
--- a/tests/unit/recommenders/utils/test_plot.py
+++ b/tests/unit/recommenders/utils/test_plot.py
@@ -20,6 +20,8 @@ def test_line_graph():
         y_name="Accuracy",
         legend_loc="best",
     )
+    assert plt.gca().get_xlabel() == "Epoch"
+    assert plt.gca().get_ylabel() == "Accuracy"
     plt.close()
 
     # Single graph as a subplot

From c40ffce5c6e49e7b09224a8d4912572fdb4e1220 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 10:53:48 +0200
Subject: [PATCH 37/43] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 recommenders/utils/gpu_utils.py                 | 8 ++++----
 tests/unit/recommenders/utils/test_gpu_utils.py | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/recommenders/utils/gpu_utils.py b/recommenders/utils/gpu_utils.py
index 882952f16a..ec8cd0341a 100644
--- a/recommenders/utils/gpu_utils.py
+++ b/recommenders/utils/gpu_utils.py
@@ -97,7 +97,7 @@ def get_cuda_version():
                 data = f.read().replace("\n", "")
             return data
         else:
-            return "Cannot find CUDA in this machine"
+            return None
 
 
 def get_cudnn_version():
@@ -125,14 +125,14 @@ def find_cudnn_in_headers(candiates):
             if version:
                 return version
             else:
-                return "Cannot find CUDNN version"
+                return None
         else:
-            return "Cannot find CUDNN version"
+            return None
 
     try:
         import torch
 
-        return torch.backends.cudnn.version()
+        return str(torch.backends.cudnn.version())
     except (ImportError, ModuleNotFoundError):
         if sys.platform == "win32":
             candidates = [r"C:\NVIDIA\cuda\include\cudnn.h"]
diff --git a/tests/unit/recommenders/utils/test_gpu_utils.py b/tests/unit/recommenders/utils/test_gpu_utils.py
index 230a4ba860..deff1d6f91 100644
--- a/tests/unit/recommenders/utils/test_gpu_utils.py
+++ b/tests/unit/recommenders/utils/test_gpu_utils.py
@@ -37,12 +37,12 @@ def test_clear_memory_all_gpus():
 @pytest.mark.gpu
 @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
 def test_get_cuda_version():
-    assert get_cuda_version() > "9.0.0"
+    assert int(get_cuda_version().split(".")[0]) > 9
 
 
 @pytest.mark.gpu
 def test_get_cudnn_version():
-    assert get_cudnn_version() > "7.0.0"
+    assert int(get_cudnn_version()[0]) > 7
 
 
 @pytest.mark.gpu
@@ -52,7 +52,7 @@ def test_cudnn_enabled():
 
 @pytest.mark.gpu
 def test_tensorflow_gpu():
-    assert tf.test.is_gpu_available()
+    assert len(tf.config.list_physical_devices("GPU")) > 0
 
 
 @pytest.mark.gpu

From de11824bffc730dffe83565305eb7ad8ea857de7 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 11:36:43 +0200
Subject: [PATCH 38/43] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/unit/recommenders/utils/test_gpu_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/recommenders/utils/test_gpu_utils.py b/tests/unit/recommenders/utils/test_gpu_utils.py
index deff1d6f91..8d361b61fa 100644
--- a/tests/unit/recommenders/utils/test_gpu_utils.py
+++ b/tests/unit/recommenders/utils/test_gpu_utils.py
@@ -51,6 +51,7 @@ def test_cudnn_enabled():
 
 
 @pytest.mark.gpu
+@pytest.mark.skip(reason="This function in TF is flaky")
 def test_tensorflow_gpu():
     assert len(tf.config.list_physical_devices("GPU")) > 0
 

From e26f4ff938634435acb813efb9e78c80aa754a7e Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 12:17:49 +0200
Subject: [PATCH 39/43] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/unit/recommenders/datasets/test_download_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/unit/recommenders/datasets/test_download_utils.py b/tests/unit/recommenders/datasets/test_download_utils.py
index 9a8e8857aa..de3e650875 100644
--- a/tests/unit/recommenders/datasets/test_download_utils.py
+++ b/tests/unit/recommenders/datasets/test_download_utils.py
@@ -12,7 +12,9 @@
 
 @pytest.fixture
 def files_fixtures():
-    file_url = "https://raw.githubusercontent.com/Microsoft/Recommenders/main/LICENSE"
+    file_url = (
+        "https://raw.githubusercontent.com/recommenders-team/recommenders/main/LICENSE"
+    )
     filepath = "license.txt"
     return file_url, filepath
 

From fdee57913af525046e892fec29badf1b7686cb29 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 21:47:54 +0200
Subject: [PATCH 40/43] Update readme tests

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/README.md | 140 ++++++++++--------------------------------------
 1 file changed, 27 insertions(+), 113 deletions(-)

diff --git a/tests/README.md b/tests/README.md
index cb868ea93a..fde5ee1dd9 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -63,7 +63,9 @@ In this section we show how to create tests and add them to the test pipeline. T
 1. Create your code in the library and/or notebooks.
 1. Design the unit tests for the code.
 1. If you have written a notebook, design the notebook tests and check that the metrics they return is what you expect.
-1. Add the tests to the AzureML pipeline in the corresponding [test group](./ci/azureml_tests/test_groups.py). **Please note that if you don't add your tests to the pipeline, they will not be executed.**
+1. Add the tests to the AzureML pipeline in the corresponding [test group](./ci/azureml_tests/test_groups.py). 
+
+**Please note that if you don't add your tests to the pipeline, they will not be executed.**
 
 ### How to create tests for the Recommenders library
 
@@ -74,8 +76,6 @@ You want to make sure that all your code works before you submit it to the repos
 * Use the mark `@pytest.mark.gpu` if you want the test to be executed
   in a GPU environment. Use `@pytest.mark.spark` if you want the test
   to be executed in a Spark environment.
-* Use `@pytest.mark.smoke` and `@pytest.mark.integration` to mark the
-  tests as smoke tests and integration tests.
 * Use `@pytest.mark.notebooks` if you are testing a notebook.
 * Avoid using `is` in the asserts, instead use the operator `==`.
 * Follow the pattern `assert computation == value`, for example:
@@ -113,7 +113,7 @@ For executing this test, first make sure you are in the correct environment as d
 *Notice that the next instruction executes the tests from the root folder.*
 
 ```bash
-pytest tests/unit/test_notebooks_python.py::test_sar_single_node_runs
+pytest tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs
 ```
 
 #### Developing nightly tests with Papermill and Scrapbook
@@ -159,14 +159,14 @@ For executing this test, first make sure you are in the correct environment as d
 *Notice that the next instructions execute the tests from the root folder.*
 
 ```
-pytest tests/smoke/test_notebooks_python.py::test_sar_single_node_smoke
+pytest tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke
 ```
 
 More details on how to integrate Papermill with notebooks can be found in their [repo](https://github.com/nteract/papermill). Also, you can check the [Scrapbook repo](https://github.com/nteract/scrapbook).
 
 ### How to add tests to the AzureML pipeline
 
-To add a new test to the AzureML pipeline, add the test path to an appropriate test group listed in [test_groups.py](https://github.com/microsoft/recommenders/blob/main/tests/ci/azureml_tests/test_groups.py). 
+To add a new test to the AzureML pipeline, add the test path to an appropriate test group listed in [test_groups.py](./ci/azureml_tests/test_groups.py). 
 
 Tests in `group_cpu_xxx` groups are executed on a CPU-only AzureML compute cluster node. Tests in `group_gpu_xxx` groups are executed on a GPU-enabled AzureML compute cluster node with GPU related dependencies added to the AzureML run environment. Tests in `group_pyspark_xxx` groups are executed on a CPU-only AzureML compute cluster node, with the PySpark related dependencies added to the AzureML run environment. 
 
@@ -177,15 +177,13 @@ Example of adding a new test:
 1. In the environment that you are running your code, first see if there is a group whose total runtime is less than the threshold.
 ```python
 "group_spark_001": [  # Total group time: 271.13s
-    "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-    "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+    "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
 ],
 ```
 2. Add the test to the group, add the time it takes to compute, and update the total group time.
 ```python
 "group_spark_001": [  # Total group time: 571.13s
-    "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-    "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+    "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
     #
     "tests/path/to/test_new.py::test_new_function", # 300s
 ],
@@ -217,133 +215,50 @@ Then, follow the steps below to create the AzureML infrastructure:
 
 To manually execute the tests in the CPU, GPU or Spark environments, first **make sure you are in the correct environment as described in the [SETUP.md](../SETUP.md)**.
 
-*Click on the following menus* to see more details on how to execute the unit, smoke and integration tests:
-
-<details>
-<summary><strong><em>Unit tests</em></strong></summary>
-
-Unit tests ensure that each class or function behaves as it should. Every time a developer makes a pull request to staging or main branch, a battery of unit tests is executed.
-
-*Note that the next instructions execute the tests from the root folder.*
-
-For executing the Python unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and not spark and not gpu" --durations 0
-
-For executing the Python unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and not spark and not gpu" --durations 0
-
-For executing the Python GPU unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and not spark and gpu" --durations 0
-
-For executing the Python GPU unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and not spark and gpu" --durations 0
-
-For executing the PySpark unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and spark and not gpu" --durations 0
-
-For executing the PySpark unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and spark and not gpu" --durations 0
-
-*NOTE: Adding `--durations 0` shows the computation time of all tests.*
-
-*NOTE: Adding `--disable-warnings` will disable the warning messages.*
-
-</details>
-
-<details>
-<summary><strong><em>Smoke tests</em></strong></summary>
-
-Smoke tests make sure that the system works and are executed just before the integration tests every night.
+### CPU tests
 
 *Note that the next instructions execute the tests from the root folder.*
 
-For executing the Python smoke tests:
+For executing the CPU tests for the utilities:
 
-    pytest tests/smoke -m "smoke and not spark and not gpu" --durations 0
+    pytest tests -m "not notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-For executing the Python GPU smoke tests:
+For executing the CPU tests for the notebooks:
 
-    pytest tests/smoke -m "smoke and not spark and gpu" --durations 0
+    pytest tests -m "notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-For executing the PySpark smoke tests:
+If you want to execute a specific test, you can use the following command:
 
-    pytest tests/smoke -m "smoke and spark and not gpu" --durations 0
+    pytest tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url --durations 0 --disable-warnings
 
-*NOTE: Adding `--durations 0` shows the computation time of all tests.*
+If you want to execute any of the tests types (data_validation, unit, smoke, functional, etc.) you can use the following command:
 
-*NOTE: Adding `--disable-warnings` will disable the warning messages.*
+    pytest tests/data_validation -m "not notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-</details>
+### GPU tests
 
-<details>
-<summary><strong><em>Integration tests</em></strong></summary>
+For executing the GPU tests for the utilities:
 
-Integration tests make sure that the program results are acceptable.
+    pytest tests -m "not notebooks and not spark and gpu" --durations 0 --disable-warnings
 
-*Note that the next instructions execute the tests from the root folder.*
+For executing the GPU tests for the notebooks:
 
-For executing the Python integration tests:
+    pytest tests -m "notebooks and not spark and gpu" --durations 0 --disable-warnings
 
-    pytest tests/integration -m "integration and not spark and not gpu" --durations 0
+### Spark tests
 
-For executing the Python GPU integration tests:
+For executing the PySpark tests for the utilities:
 
-    pytest tests/integration -m "integration and not spark and gpu" --durations 0
+    pytest tests -m "not notebooks and spark and not gpu" --durations 0 --disable-warnings
 
-For executing the PySpark integration tests:
+For executing the PySpark tests for the notebooks:
 
-    pytest tests/integration -m "integration and spark and not gpu" --durations 0
+    pytest tests -m "notebooks and spark and not gpu" --durations 0 --disable-warnings
 
 *NOTE: Adding `--durations 0` shows the computation time of all tests.*
 
 *NOTE: Adding `--disable-warnings` will disable the warning messages.*
 
-</details>
-
-<details>
-<summary><strong><em>Current Skipped Tests</em></strong></summary>
-
-Several of the tests are skipped for various reasons which are noted below.
-
-<table>
-<tr>
-<td>Test Module</td>
-<td>Test</td>
-<td>Test Environment</td>
-<td>Reason</td>
-</tr>
-<tr>
-<td>unit/recommenders/datasets/test_wikidata</td>
-<td>*</td>
-<td>Linux</td>
-<td>Wikidata API is unstable</td>
-</tr>
-<tr>
-<td>integration/recommenders/datasets/test_notebooks_python</td>
-<td>test_wikidata</td>
-<td>Linux</td>
-<td>Wikidata API is unstable</td>
-</tr>
-<tr>
-<td>*/test_notebooks_python</td>
-<td>test_vw*</td>
-<td>Linux</td>
-<td>VW pip package has installation incompatibilities</td>
-</tr>
-<tr>
-<td>*/test_notebooks_python</td>
-<td>test_nni*</td>
-<td>Linux</td>
-<td>NNI pip package has installation incompatibilities</td>
-</tr>
-</table>
-
 In order to skip a test because there is an OS or upstream issue which cannot be resolved you can use pytest [annotations](https://docs.pytest.org/en/latest/skipping.html).
 
 Example:
@@ -353,4 +268,3 @@ Example:
     def test_to_skip():
         assert False
 
-</details>

From 54b171bdb1709043eca769d1f5463d940a723d29 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 21:53:04 +0200
Subject: [PATCH 41/43] License file changed so the maybe download tests had to
 be updated

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/ci/azureml_tests/test_groups.py                   | 4 ++--
 tests/unit/recommenders/datasets/test_download_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index b56847693a..c21b8348ad 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -8,8 +8,8 @@
 # Standard_A8m_v2 with 8 vCPUs and 64 GiB memory.
 
 # IMPORTANT NOTE:
-# FOR INTEGRATION, NO GROUP SHOULD SURPASS 45MIN = 2700s !!!
-# FOR UNIT, NO GROUP SHOULD SURPASS 15MIN = 900s !!!
+# FOR NIGHTLY, NO GROUP SHOULD SURPASS 45MIN = 2700s !!!
+# FOR PR GATE, NO GROUP SHOULD SURPASS 15MIN = 900s !!!
 
 global nightly_test_groups, pr_gate_test_groups
 
diff --git a/tests/unit/recommenders/datasets/test_download_utils.py b/tests/unit/recommenders/datasets/test_download_utils.py
index de3e650875..c7d649796e 100644
--- a/tests/unit/recommenders/datasets/test_download_utils.py
+++ b/tests/unit/recommenders/datasets/test_download_utils.py
@@ -24,7 +24,7 @@ def test_maybe_download(files_fixtures):
     if os.path.exists(filepath):
         os.remove(filepath)
 
-    downloaded_filepath = maybe_download(file_url, "license.txt", expected_bytes=1162)
+    downloaded_filepath = maybe_download(file_url, "license.txt", expected_bytes=1212)
     assert os.path.exists(downloaded_filepath)
     assert os.path.basename(downloaded_filepath) == "license.txt"
 

From 1c1e1e45f8b9c73a8a3a6119393b937ce5658a1f Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 22:08:12 +0200
Subject: [PATCH 42/43] :memo:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 README.md                                     |  4 +-
 SETUP.md                                      |  4 +-
 pyproject.toml                                |  3 -
 tests/README.md                               |  3 +-
 .../recommender/test_deeprec_model.py         | 14 ----
 .../recommender/test_deeprec_utils.py         |  2 -
 .../recommender/test_newsrec_model.py         | 68 +++++++++----------
 .../recommender/test_newsrec_utils.py         | 35 +++++-----
 8 files changed, 54 insertions(+), 79 deletions(-)

diff --git a/README.md b/README.md
index b1d20bdebc..a555de8777 100644
--- a/README.md
+++ b/README.md
@@ -135,13 +135,13 @@ This project adheres to [Microsoft's Open Source Code of Conduct](CODE_OF_CONDUC
 
 ## Build Status
 
-These tests are the nightly builds, which compute the smoke and integration tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
+These tests are the nightly builds, which compute the asynchronous tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
 
 For more information about the testing pipelines, please see the [test documentation](tests/README.md).
 
 ### AzureML Nightly Build Status
 
-Smoke and integration tests are run daily on AzureML.
+The nightly build tests are run daily on AzureML.
 
 | Build Type | Branch | Status |  | Branch | Status |
 | --- | --- | --- | --- | --- | --- |
diff --git a/SETUP.md b/SETUP.md
index 64f5f20903..2334de0963 100644
--- a/SETUP.md
+++ b/SETUP.md
@@ -156,9 +156,9 @@ First make sure that the tag that you want to add, e.g. `0.6.0`, is added in [`r
 1. Make sure that the code in main passes all the tests (unit and nightly tests).
 1. Create a tag with the version number: e.g. `git tag -a 0.6.0 -m "Recommenders 0.6.0"`.
 1. Push the tag to the remote server: `git push origin 0.6.0`.
-1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (unit, smoke and integration), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
+1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (PR gate and nightly builds), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
 1. Fill up the draft release with all the recent changes in the code.
 1. Download the wheel and tar.gz locally, these files shouldn't have any bug, since they passed all the tests.
 1. Install twine: `pip install twine`
-1. Publish the wheel and tar.gz to pypi: `twine upload recommenders*`
+1. Publish the wheel and tar.gz to PyPI: `twine upload recommenders*`
 
diff --git a/pyproject.toml b/pyproject.toml
index 385f44302a..0ff4c8d962 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,10 +14,7 @@ build-backend = "setuptools.build_meta"
 [tool.pytest.ini_options]
 markers = [
     "experimental: tests that will not be executed and may need extra dependencies",
-    "flaky: flaky tests that can fail unexpectedly",
     "gpu: tests running on GPU",
-    "integration: integration tests",
     "notebooks: tests for notebooks",
-    "smoke: smoke tests",
     "spark: tests that requires Spark",
 ]
\ No newline at end of file
diff --git a/tests/README.md b/tests/README.md
index fde5ee1dd9..014fa84ebd 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -124,7 +124,7 @@ The first step is to tag the parameters that we are going to inject. For it we n
 
 The way papermill works to inject parameters is very simple, it generates a copy of the notebook (in our code we call it `OUTPUT_NOTEBOOK`), and creates a new cell with the injected variables.
 
-The second modification that we need to do to the notebook is to record the metrics we want to test using `sb.glue("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control in the smoke and integration tests.
+The second modification that we need to do to the notebook is to record the metrics we want to test using `sb.glue("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control in the smoke and functional tests.
 
 This is an example on how we do a smoke test. The complete code can be found in [smoke/examples/test_notebooks_python.py](./smoke/examples/test_notebooks_python.py):
 
@@ -136,7 +136,6 @@ import scrapbook as sb
 TOL = 0.05
 ABS_TOL = 0.05
 
-@pytest.mark.smoke
 def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["sar_single_node"]
     pm.execute_notebook(
diff --git a/tests/smoke/recommenders/recommender/test_deeprec_model.py b/tests/smoke/recommenders/recommender/test_deeprec_model.py
index 357e0aa814..ead368485e 100644
--- a/tests/smoke/recommenders/recommender/test_deeprec_model.py
+++ b/tests/smoke/recommenders/recommender/test_deeprec_model.py
@@ -30,9 +30,7 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_FFM_iterator(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "xdeepfm")
     yaml_file = os.path.join(data_path, "xDeepFM.yaml")
@@ -52,9 +50,7 @@ def test_FFM_iterator(deeprec_resource_path):
         assert isinstance(res, tuple)
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_xdeepfm(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "xdeepfm")
     yaml_file = os.path.join(data_path, "xDeepFM.yaml")
@@ -79,9 +75,7 @@ def test_model_xdeepfm(deeprec_resource_path):
     assert model.predict(data_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_dkn(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
     yaml_file = os.path.join(data_path, r"dkn.yaml")
@@ -116,10 +110,7 @@ def test_model_dkn(deeprec_resource_path):
     assert model.run_eval(valid_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
-@pytest.mark.sequential
 def test_model_slirec(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
     yaml_file = os.path.join(deeprec_config_path, "sli_rec.yaml")
@@ -182,10 +173,7 @@ def test_model_slirec(deeprec_resource_path, deeprec_config_path):
     assert model.predict(test_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
-@pytest.mark.sequential
 def test_model_sum(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
     yaml_file = os.path.join(deeprec_config_path, "sum.yaml")
@@ -248,9 +236,7 @@ def test_model_sum(deeprec_resource_path, deeprec_config_path):
     assert model.predict(valid_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_lightgcn(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
     yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")
diff --git a/tests/smoke/recommenders/recommender/test_deeprec_utils.py b/tests/smoke/recommenders/recommender/test_deeprec_utils.py
index ae9f924264..cd02871f2a 100644
--- a/tests/smoke/recommenders/recommender/test_deeprec_utils.py
+++ b/tests/smoke/recommenders/recommender/test_deeprec_utils.py
@@ -23,7 +23,6 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_DKN_iterator(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
@@ -82,7 +81,6 @@ def test_DKN_iterator(deeprec_resource_path):
             break
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_Sequential_Iterator(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
diff --git a/tests/smoke/recommenders/recommender/test_newsrec_model.py b/tests/smoke/recommenders/recommender/test_newsrec_model.py
index 968df738e2..7cad05ba35 100644
--- a/tests/smoke/recommenders/recommender/test_newsrec_model.py
+++ b/tests/smoke/recommenders/recommender/test_newsrec_model.py
@@ -17,33 +17,32 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_nrms(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -69,35 +68,34 @@ def test_model_nrms(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_naml(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -123,33 +121,32 @@ def test_model_naml(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_lstur(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -175,33 +172,32 @@ def test_model_lstur(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_npa(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
diff --git a/tests/smoke/recommenders/recommender/test_newsrec_utils.py b/tests/smoke/recommenders/recommender/test_newsrec_utils.py
index 6c65a6c9b5..08825e828c 100644
--- a/tests/smoke/recommenders/recommender/test_newsrec_utils.py
+++ b/tests/smoke/recommenders/recommender/test_newsrec_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
@@ -13,33 +14,32 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_news_iterator(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -69,35 +69,34 @@ def test_news_iterator(mind_resource_path):
         break
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_naml_iterator(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )

From 0d17767f441f8563d9dcfdca951ceaa243b1df27 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sun, 17 Sep 2023 23:30:42 +0200
Subject: [PATCH 43/43] ignoring one of the lightfm for a weird error

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/unit/recommenders/models/test_lightfm_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/recommenders/models/test_lightfm_utils.py b/tests/unit/recommenders/models/test_lightfm_utils.py
index 52f743fa65..2155fb6559 100644
--- a/tests/unit/recommenders/models/test_lightfm_utils.py
+++ b/tests/unit/recommenders/models/test_lightfm_utils.py
@@ -136,6 +136,7 @@ def test_interactions(interactions):
     assert user_features.shape == (10, 17)
 
 
+@pytest.mark.skip(reason="Flaky test")
 def test_fitting(fitting):
     output, _ = fitting
     assert output.shape == (4, 4)