diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml
index 3e803c800e..39364fa81f 100644
--- a/.github/actions/get-test-groups/action.yml
+++ b/.github/actions/get-test-groups/action.yml
@@ -29,6 +29,6 @@ runs:
         if [[ ${{ inputs.TEST_KIND }} == "nightly" ]]; then
           test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import nightly_test_groups; print([t for t in nightly_test_groups.keys() if "${{inputs.TEST_ENV}}" in t])')
         else
-          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import unit_test_groups; print(list(unit_test_groups.keys()))')
+          test_groups_str=$(python -c 'from tests.ci.azureml_tests.test_groups import pr_gate_test_groups; print(list(pr_gate_test_groups.keys()))')
         fi
         echo "test_groups=$test_groups_str" >> $GITHUB_OUTPUT
diff --git a/README.md b/README.md
index b1d20bdebc..a555de8777 100644
--- a/README.md
+++ b/README.md
@@ -135,13 +135,13 @@ This project adheres to [Microsoft's Open Source Code of Conduct](CODE_OF_CONDUC
 
 ## Build Status
 
-These tests are the nightly builds, which compute the smoke and integration tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
+These tests are the nightly builds, which compute the asynchronous tests. `main` is our principal branch and `staging` is our development branch. We use [pytest](https://docs.pytest.org/) for testing python utilities in [recommenders](recommenders) and [Papermill](https://github.com/nteract/papermill) and [Scrapbook](https://nteract-scrapbook.readthedocs.io/en/latest/) for the [notebooks](examples). 
 
 For more information about the testing pipelines, please see the [test documentation](tests/README.md).
 
 ### AzureML Nightly Build Status
 
-Smoke and integration tests are run daily on AzureML.
+The nightly build tests are run daily on AzureML.
 
 | Build Type | Branch | Status |  | Branch | Status |
 | --- | --- | --- | --- | --- | --- |
diff --git a/SETUP.md b/SETUP.md
index 64f5f20903..2334de0963 100644
--- a/SETUP.md
+++ b/SETUP.md
@@ -156,9 +156,9 @@ First make sure that the tag that you want to add, e.g. `0.6.0`, is added in [`r
 1. Make sure that the code in main passes all the tests (unit and nightly tests).
 1. Create a tag with the version number: e.g. `git tag -a 0.6.0 -m "Recommenders 0.6.0"`.
 1. Push the tag to the remote server: `git push origin 0.6.0`.
-1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (unit, smoke and integration), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
+1. When the new tag is pushed, a release pipeline is executed. This pipeline runs all the tests again (PR gate and nightly builds), generates a wheel and a tar.gz which are uploaded to a [GitHub draft release](https://github.com/microsoft/recommenders/releases).
 1. Fill up the draft release with all the recent changes in the code.
 1. Download the wheel and tar.gz locally, these files shouldn't have any bug, since they passed all the tests.
 1. Install twine: `pip install twine`
-1. Publish the wheel and tar.gz to pypi: `twine upload recommenders*`
+1. Publish the wheel and tar.gz to PyPI: `twine upload recommenders*`
 
diff --git a/pyproject.toml b/pyproject.toml
index 385f44302a..0ff4c8d962 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,10 +14,7 @@ build-backend = "setuptools.build_meta"
 [tool.pytest.ini_options]
 markers = [
     "experimental: tests that will not be executed and may need extra dependencies",
-    "flaky: flaky tests that can fail unexpectedly",
     "gpu: tests running on GPU",
-    "integration: integration tests",
     "notebooks: tests for notebooks",
-    "smoke: smoke tests",
     "spark: tests that requires Spark",
 ]
\ No newline at end of file
diff --git a/recommenders/utils/gpu_utils.py b/recommenders/utils/gpu_utils.py
index 882952f16a..ec8cd0341a 100644
--- a/recommenders/utils/gpu_utils.py
+++ b/recommenders/utils/gpu_utils.py
@@ -97,7 +97,7 @@ def get_cuda_version():
                 data = f.read().replace("\n", "")
             return data
         else:
-            return "Cannot find CUDA in this machine"
+            return None
 
 
 def get_cudnn_version():
@@ -125,14 +125,14 @@ def find_cudnn_in_headers(candiates):
             if version:
                 return version
             else:
-                return "Cannot find CUDNN version"
+                return None
         else:
-            return "Cannot find CUDNN version"
+            return None
 
     try:
         import torch
 
-        return torch.backends.cudnn.version()
+        return str(torch.backends.cudnn.version())
     except (ImportError, ModuleNotFoundError):
         if sys.platform == "win32":
             candidates = [r"C:\NVIDIA\cuda\include\cudnn.h"]
diff --git a/setup.py b/setup.py
index 5930d3be23..679252462b 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
     "transformers>=2.5.0,<5",
     "category_encoders>=1.3.0,<2",
     "jinja2>=2,<3.1",
-    "requests>=2.0.0,<3",
+    "requests>=2.31.0,<3",
     "cornac>=1.1.2,<1.15.2;python_version<='3.7'",
     "cornac>=1.15.2,<2;python_version>='3.8'",  # After 1.15.2, Cornac requires python 3.8
     "retrying>=1.3.3",
@@ -64,7 +64,7 @@
         "tensorflow~=2.6.1;python_version=='3.6'",
         "tensorflow~=2.7.0;python_version>='3.7'",
         "tf-slim>=1.1.0",
-        "torch>=1.8",  # for CUDA 11 support
+        "torch>=1.13.1",  # for CUDA 11 support
         "fastai>=1.0.46,<2",
     ],
     "spark": [
@@ -89,6 +89,7 @@
     "vowpalwabbit>=8.9.0,<9",
     # nni needs to be upgraded
     "nni==1.5",
+    "pymanopt>=0.2.5",
 ]
 
 # The following dependency can be installed as below, however PyPI does not allow direct URLs.
diff --git a/tests/README.md b/tests/README.md
index cb868ea93a..014fa84ebd 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -63,7 +63,9 @@ In this section we show how to create tests and add them to the test pipeline. T
 1. Create your code in the library and/or notebooks.
 1. Design the unit tests for the code.
 1. If you have written a notebook, design the notebook tests and check that the metrics they return is what you expect.
-1. Add the tests to the AzureML pipeline in the corresponding [test group](./ci/azureml_tests/test_groups.py). **Please note that if you don't add your tests to the pipeline, they will not be executed.**
+1. Add the tests to the AzureML pipeline in the corresponding [test group](./ci/azureml_tests/test_groups.py). 
+
+**Please note that if you don't add your tests to the pipeline, they will not be executed.**
 
 ### How to create tests for the Recommenders library
 
@@ -74,8 +76,6 @@ You want to make sure that all your code works before you submit it to the repos
 * Use the mark `@pytest.mark.gpu` if you want the test to be executed
   in a GPU environment. Use `@pytest.mark.spark` if you want the test
   to be executed in a Spark environment.
-* Use `@pytest.mark.smoke` and `@pytest.mark.integration` to mark the
-  tests as smoke tests and integration tests.
 * Use `@pytest.mark.notebooks` if you are testing a notebook.
 * Avoid using `is` in the asserts, instead use the operator `==`.
 * Follow the pattern `assert computation == value`, for example:
@@ -113,7 +113,7 @@ For executing this test, first make sure you are in the correct environment as d
 *Notice that the next instruction executes the tests from the root folder.*
 
 ```bash
-pytest tests/unit/test_notebooks_python.py::test_sar_single_node_runs
+pytest tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs
 ```
 
 #### Developing nightly tests with Papermill and Scrapbook
@@ -124,7 +124,7 @@ The first step is to tag the parameters that we are going to inject. For it we n
 
 The way papermill works to inject parameters is very simple, it generates a copy of the notebook (in our code we call it `OUTPUT_NOTEBOOK`), and creates a new cell with the injected variables.
 
-The second modification that we need to do to the notebook is to record the metrics we want to test using `sb.glue("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control in the smoke and integration tests.
+The second modification that we need to do to the notebook is to record the metrics we want to test using `sb.glue("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control in the smoke and functional tests.
 
 This is an example on how we do a smoke test. The complete code can be found in [smoke/examples/test_notebooks_python.py](./smoke/examples/test_notebooks_python.py):
 
@@ -136,7 +136,6 @@ import scrapbook as sb
 TOL = 0.05
 ABS_TOL = 0.05
 
-@pytest.mark.smoke
 def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["sar_single_node"]
     pm.execute_notebook(
@@ -159,14 +158,14 @@ For executing this test, first make sure you are in the correct environment as d
 *Notice that the next instructions execute the tests from the root folder.*
 
 ```
-pytest tests/smoke/test_notebooks_python.py::test_sar_single_node_smoke
+pytest tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke
 ```
 
 More details on how to integrate Papermill with notebooks can be found in their [repo](https://github.com/nteract/papermill). Also, you can check the [Scrapbook repo](https://github.com/nteract/scrapbook).
 
 ### How to add tests to the AzureML pipeline
 
-To add a new test to the AzureML pipeline, add the test path to an appropriate test group listed in [test_groups.py](https://github.com/microsoft/recommenders/blob/main/tests/ci/azureml_tests/test_groups.py). 
+To add a new test to the AzureML pipeline, add the test path to an appropriate test group listed in [test_groups.py](./ci/azureml_tests/test_groups.py). 
 
 Tests in `group_cpu_xxx` groups are executed on a CPU-only AzureML compute cluster node. Tests in `group_gpu_xxx` groups are executed on a GPU-enabled AzureML compute cluster node with GPU related dependencies added to the AzureML run environment. Tests in `group_pyspark_xxx` groups are executed on a CPU-only AzureML compute cluster node, with the PySpark related dependencies added to the AzureML run environment. 
 
@@ -177,15 +176,13 @@ Example of adding a new test:
 1. In the environment that you are running your code, first see if there is a group whose total runtime is less than the threshold.
 ```python
 "group_spark_001": [  # Total group time: 271.13s
-    "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-    "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+    "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
 ],
 ```
 2. Add the test to the group, add the time it takes to compute, and update the total group time.
 ```python
 "group_spark_001": [  # Total group time: 571.13s
-    "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-    "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+    "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
     #
     "tests/path/to/test_new.py::test_new_function", # 300s
 ],
@@ -217,133 +214,50 @@ Then, follow the steps below to create the AzureML infrastructure:
 
 To manually execute the tests in the CPU, GPU or Spark environments, first **make sure you are in the correct environment as described in the [SETUP.md](../SETUP.md)**.
 
-*Click on the following menus* to see more details on how to execute the unit, smoke and integration tests:
-
-<details>
-<summary><strong><em>Unit tests</em></strong></summary>
-
-Unit tests ensure that each class or function behaves as it should. Every time a developer makes a pull request to staging or main branch, a battery of unit tests is executed.
-
-*Note that the next instructions execute the tests from the root folder.*
-
-For executing the Python unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and not spark and not gpu" --durations 0
-
-For executing the Python unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and not spark and not gpu" --durations 0
-
-For executing the Python GPU unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and not spark and gpu" --durations 0
-
-For executing the Python GPU unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and not spark and gpu" --durations 0
-
-For executing the PySpark unit tests for the utilities:
-
-    pytest tests/unit -m "not notebooks and spark and not gpu" --durations 0
-
-For executing the PySpark unit tests for the notebooks:
-
-    pytest tests/unit -m "notebooks and spark and not gpu" --durations 0
-
-*NOTE: Adding `--durations 0` shows the computation time of all tests.*
-
-*NOTE: Adding `--disable-warnings` will disable the warning messages.*
-
-</details>
-
-<details>
-<summary><strong><em>Smoke tests</em></strong></summary>
-
-Smoke tests make sure that the system works and are executed just before the integration tests every night.
+### CPU tests
 
 *Note that the next instructions execute the tests from the root folder.*
 
-For executing the Python smoke tests:
+For executing the CPU tests for the utilities:
 
-    pytest tests/smoke -m "smoke and not spark and not gpu" --durations 0
+    pytest tests -m "not notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-For executing the Python GPU smoke tests:
+For executing the CPU tests for the notebooks:
 
-    pytest tests/smoke -m "smoke and not spark and gpu" --durations 0
+    pytest tests -m "notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-For executing the PySpark smoke tests:
+If you want to execute a specific test, you can use the following command:
 
-    pytest tests/smoke -m "smoke and spark and not gpu" --durations 0
+    pytest tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url --durations 0 --disable-warnings
 
-*NOTE: Adding `--durations 0` shows the computation time of all tests.*
+If you want to execute any of the tests types (data_validation, unit, smoke, functional, etc.) you can use the following command:
 
-*NOTE: Adding `--disable-warnings` will disable the warning messages.*
+    pytest tests/data_validation -m "not notebooks and not spark and not gpu" --durations 0 --disable-warnings
 
-</details>
+### GPU tests
 
-<details>
-<summary><strong><em>Integration tests</em></strong></summary>
+For executing the GPU tests for the utilities:
 
-Integration tests make sure that the program results are acceptable.
+    pytest tests -m "not notebooks and not spark and gpu" --durations 0 --disable-warnings
 
-*Note that the next instructions execute the tests from the root folder.*
+For executing the GPU tests for the notebooks:
 
-For executing the Python integration tests:
+    pytest tests -m "notebooks and not spark and gpu" --durations 0 --disable-warnings
 
-    pytest tests/integration -m "integration and not spark and not gpu" --durations 0
+### Spark tests
 
-For executing the Python GPU integration tests:
+For executing the PySpark tests for the utilities:
 
-    pytest tests/integration -m "integration and not spark and gpu" --durations 0
+    pytest tests -m "not notebooks and spark and not gpu" --durations 0 --disable-warnings
 
-For executing the PySpark integration tests:
+For executing the PySpark tests for the notebooks:
 
-    pytest tests/integration -m "integration and spark and not gpu" --durations 0
+    pytest tests -m "notebooks and spark and not gpu" --durations 0 --disable-warnings
 
 *NOTE: Adding `--durations 0` shows the computation time of all tests.*
 
 *NOTE: Adding `--disable-warnings` will disable the warning messages.*
 
-</details>
-
-<details>
-<summary><strong><em>Current Skipped Tests</em></strong></summary>
-
-Several of the tests are skipped for various reasons which are noted below.
-
-<table>
-<tr>
-<td>Test Module</td>
-<td>Test</td>
-<td>Test Environment</td>
-<td>Reason</td>
-</tr>
-<tr>
-<td>unit/recommenders/datasets/test_wikidata</td>
-<td>*</td>
-<td>Linux</td>
-<td>Wikidata API is unstable</td>
-</tr>
-<tr>
-<td>integration/recommenders/datasets/test_notebooks_python</td>
-<td>test_wikidata</td>
-<td>Linux</td>
-<td>Wikidata API is unstable</td>
-</tr>
-<tr>
-<td>*/test_notebooks_python</td>
-<td>test_vw*</td>
-<td>Linux</td>
-<td>VW pip package has installation incompatibilities</td>
-</tr>
-<tr>
-<td>*/test_notebooks_python</td>
-<td>test_nni*</td>
-<td>Linux</td>
-<td>NNI pip package has installation incompatibilities</td>
-</tr>
-</table>
-
 In order to skip a test because there is an OS or upstream issue which cannot be resolved you can use pytest [annotations](https://docs.pytest.org/en/latest/skipping.html).
 
 Example:
@@ -353,4 +267,3 @@ Example:
     def test_to_skip():
         assert False
 
-</details>
diff --git a/tests/ci/azureml_tests/run_groupwise_pytest.py b/tests/ci/azureml_tests/run_groupwise_pytest.py
index 64c9895809..f038567be8 100644
--- a/tests/ci/azureml_tests/run_groupwise_pytest.py
+++ b/tests/ci/azureml_tests/run_groupwise_pytest.py
@@ -13,7 +13,7 @@
 import argparse
 import glob
 from azureml.core import Run
-from test_groups import nightly_test_groups, unit_test_groups
+from test_groups import nightly_test_groups, pr_gate_test_groups
 
 if __name__ == "__main__":
 
@@ -46,7 +46,7 @@
     if args.testkind == "nightly":
         test_group = nightly_test_groups[args.testgroup]
     else:
-        test_group = unit_test_groups[args.testgroup]
+        test_group = pr_gate_test_groups[args.testgroup]
 
     logger.info("Tests to be executed")
     logger.info(str(test_group))
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index ec3bbea622..c21b8348ad 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -8,55 +8,60 @@
 # Standard_A8m_v2 with 8 vCPUs and 64 GiB memory.
 
 # IMPORTANT NOTE:
-# FOR INTEGRATION, NO GROUP SHOULD SURPASS 45MIN = 2700s !!!
-# FOR UNIT, NO GROUP SHOULD SURPASS 15MIN = 900s !!!
+# FOR NIGHTLY, NO GROUP SHOULD SURPASS 45MIN = 2700s !!!
+# FOR PR GATE, NO GROUP SHOULD SURPASS 15MIN = 900s !!!
 
-global nightly_test_groups, unit_test_groups
+global nightly_test_groups, pr_gate_test_groups
 
 nightly_test_groups = {
     "group_cpu_001": [  # Total group time: 1883s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_download_and_extract_movielens",  # 0.45s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_item_df",  # 0.47s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_pandas_df",  # 2.45s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_download_and_extract_movielens",  # 0.45s + 0.61s + 3.47s + 8.28s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.47s + 0.59s + 3.59s + 8.44s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 16.87s + 37.33s + 352.99s + 673.61s
         #
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 16.87s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_download_and_extract_movielens",  # 0.61s + 3.47s + 8.28s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_item_df",  # 0.59s + 3.59s + 8.44s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_pandas_df",  # 37.33s + 352.99s + 673.61s
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url",  # 0.38s
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_demo",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_demo",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_small",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_small",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_large",
+        "tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_large",
+        "tests/data_validation/examples/test_mind.py::test_mind_utils_runs",  # 219.77s
+        "tests/data_validation/examples/test_mind.py::test_mind_utils_values",  # 219.26s
         #
-        "tests/smoke/recommenders/dataset/test_mind.py::test_mind_url",  # 0.38s
-        "tests/smoke/recommenders/dataset/test_mind.py::test_extract_mind",  # 10.23s
-        "tests/smoke/examples/test_notebooks_python.py::test_mind_utils",  # 219.77s
-        "tests/integration/recommenders/datasets/test_mind.py::test_download_mind",  # 37.63s
-        "tests/integration/recommenders/datasets/test_mind.py::test_extract_mind",  # 56.30s
-        "tests/integration/recommenders/datasets/test_mind.py::test_mind_utils_integration",  # 219.26s
+        "tests/data_validation/examples/test_wikidata.py::test_wikidata_runs",
+        "tests/data_validation/examples/test_wikidata.py::test_wikidata_values",
         #
         "tests/smoke/examples/test_notebooks_python.py::test_lightgbm_quickstart_smoke",  # 46.42s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_cornac_bpr_smoke",  # 16.62s
-        "tests/integration/examples/test_notebooks_python.py::test_cornac_bpr_integration",  # 165.72s
+        "tests/functional/examples/test_notebooks_python.py::test_cornac_bpr_functional",  # 165.72s
     ],
     "group_cpu_002": [  # Total group time: 1801s
         "tests/smoke/examples/test_notebooks_python.py::test_baseline_deep_dive_smoke",  # 15.98s
-        "tests/integration/examples/test_notebooks_python.py::test_baseline_deep_dive_integration",  # 170.73s
+        "tests/functional/examples/test_notebooks_python.py::test_baseline_deep_dive_functional",  # 170.73s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_surprise_svd_smoke",  # 45.88s
-        "tests/integration/examples/test_notebooks_python.py::test_surprise_svd_integration",  # 503.54s
+        "tests/functional/examples/test_notebooks_python.py::test_surprise_svd_functional",  # 503.54s
         #
-        "tests/integration/examples/test_notebooks_python.py::test_geoimc_integration",  # 1006.19s
+        "tests/functional/examples/test_notebooks_python.py::test_geoimc_functional",  # 1006.19s
         #
-        "tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
+        "tests/functional/examples/test_notebooks_python.py::test_benchmark_movielens_cpu",  # 58s
+        #
+        "tests/functional/examples/test_notebooks_python.py::test_lightfm_functional",
     ],
     "group_cpu_003": [  # Total group time: 2253s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_download_criteo",  # 1.05s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_extract_criteo",  # 1.22s
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_criteo_load_pandas_df",  # 1.73s
-        "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df",  # 1368.63s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_download_criteo_sample",  # 1.05s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_extract_criteo_sample",  # 1.22s
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_pandas_df_sample",  # 1.73s
+        "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_pandas_df_full",  # 1368.63s
         #
         "tests/smoke/examples/test_notebooks_python.py::test_sar_single_node_smoke",  # 12.58s
-        "tests/integration/examples/test_notebooks_python.py::test_sar_single_node_integration",  # 57.67s + 808.83s
-        # FIXME: Add experimental tests in a later iteration
-        # "tests/integration/examples/test_notebooks_python.py::test_xlearn_fm_integration",  # 255.73s
+        "tests/functional/examples/test_notebooks_python.py::test_sar_single_node_functional",  # 57.67s + 808.83s
+        "tests/functional/examples/test_notebooks_python.py::test_xlearn_fm_functional",  # 255.73s
+        "tests/smoke/examples/test_notebooks_python.py::test_vw_deep_dive_smoke",
+        "tests/functional/examples/test_notebooks_python.py::test_vw_deep_dive_functional",
+        "tests/functional/examples/test_notebooks_python.py::test_nni_tuning_svd",
     ],
     "group_gpu_001": [  # Total group time: 1937.01s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -65,14 +70,14 @@
         "tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_news_iterator",  # 3.04s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_lightgcn",  # 6.03s
-        "tests/integration/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_integration",  # 19.45s
+        "tests/functional/examples/test_notebooks_gpu.py::test_lightgcn_deep_dive_functional",  # 19.45s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_sum",  # 27.23s
         #
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_dkn",  # 187.20s
-        "tests/integration/examples/test_notebooks_gpu.py::test_dkn_quickstart_integration",  # 1167.93s
+        "tests/functional/examples/test_notebooks_gpu.py::test_dkn_quickstart_functional",  # 1167.93s
         #
-        "tests/integration/examples/test_notebooks_gpu.py::test_slirec_quickstart_integration",  # 175.00s
+        "tests/functional/examples/test_notebooks_gpu.py::test_slirec_quickstart_functional",  # 175.00s
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_slirec",  # 346.72s
     ],
     "group_gpu_002": [  # Total group time: 1896.76s
@@ -80,22 +85,22 @@
         "tests/smoke/recommenders/recommender/test_deeprec_model.py::test_model_xdeepfm",  # 3.10s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
         # "tests/smoke/examples/test_notebooks_gpu.py::test_xdeepfm_smoke",  # 77.93s
-        "tests/integration/examples/test_notebooks_gpu.py::test_xdeepfm_integration",  # 470.11s
+        "tests/functional/examples/test_notebooks_gpu.py::test_xdeepfm_functional",  # 470.11s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_cornac_bivae_smoke",  # 67.84s
-        "tests/integration/examples/test_notebooks_gpu.py::test_cornac_bivae_integration",  # 453.21s
+        "tests/functional/examples/test_notebooks_gpu.py::test_cornac_bivae_functional",  # 453.21s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_wide_deep_smoke",  # 122.71s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_fastai_smoke",  # 33.22s
-        "tests/integration/examples/test_notebooks_gpu.py::test_fastai_integration",  # 667.88s
+        "tests/functional/examples/test_notebooks_gpu.py::test_fastai_functional",  # 667.88s
     ],
     "group_gpu_003": [  # Total group time: 2072.15s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         "tests/smoke/examples/test_notebooks_gpu.py::test_ncf_smoke",  # 114.39s
-        "tests/integration/examples/test_notebooks_gpu.py::test_ncf_integration",  # 1046.97s
+        "tests/functional/examples/test_notebooks_gpu.py::test_ncf_functional",  # 1046.97s
         "tests/smoke/examples/test_notebooks_gpu.py::test_ncf_deep_dive_smoke",  # 102.71s
-        "tests/integration/examples/test_notebooks_gpu.py::test_ncf_deep_dive_integration",  # 351.17s
+        "tests/functional/examples/test_notebooks_gpu.py::test_ncf_deep_dive_functional",  # 351.17s
         #
         "tests/smoke/recommenders/recommender/test_newsrec_utils.py::test_naml_iterator",  # 5.50s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
@@ -105,20 +110,20 @@
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         "tests/smoke/examples/test_notebooks_gpu.py::test_nrms_smoke",  # 232.55s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_nrms_quickstart_integration",  # 857.05s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_nrms_quickstart_functional",  # 857.05s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_lstur_smoke",  # 246.46s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_lstur_quickstart_integration",  # 766.52s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_lstur_quickstart_functional",  # 766.52s
     ],
     "group_gpu_005": [  # Total group time: 1844.05s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_wide_deep_integration",  # 1843.29s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_wide_deep_functional",  # 1843.29s
         #
         "tests/smoke/examples/test_notebooks_gpu.py::test_npa_smoke",  # 366.22s
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
-        # "tests/integration/examples/test_notebooks_gpu.py::test_npa_quickstart_integration",  # 810.92s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_npa_quickstart_functional",  # 810.92s
     ],
     "group_gpu_006": [  # Total group time: 1763.99s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -130,36 +135,204 @@
         # FIXME: https://github.com/microsoft/recommenders/issues/1883
         # "tests/smoke/examples/test_notebooks_gpu.py::test_naml_smoke",  # 620.13s
         #
-        "tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu",  # 226s
+        "tests/functional/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu",  # 226s
         # FIXME: Reduce test time https://github.com/microsoft/recommenders/issues/1731
-        # "tests/integration/examples/test_notebooks_gpu.py::test_naml_quickstart_integration",  # 2033.85s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_naml_quickstart_functional",  # 2033.85s
         # FIXME: https://github.com/microsoft/recommenders/issues/1716
-        # "tests/integration/examples/test_notebooks_gpu.py::test_sasrec_quickstart_integration",  # 448.06s + 614.69s
+        # "tests/functional/examples/test_notebooks_gpu.py::test_sasrec_quickstart_functional",  # 448.06s + 614.69s
         "tests/smoke/recommenders/recommender/test_newsrec_model.py::test_model_lstur",  # 194.88s
     ],
     "group_spark_001": [  # Total group time: 987.16s
-        "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df",  # 4.33s
-        "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 25.58s + 101.99s + 139.23s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df",  # 4.33s+ 25.58s + 101.99s + 139.23s
+        #
+        "tests/data_validation/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df_sample",  # 6.83s
+        "tests/data_validation/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df_full",  # 374.64s
         #
-        "tests/smoke/recommenders/dataset/test_criteo.py::test_criteo_load_spark_df",  # 6.83s
         "tests/smoke/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke",  # 32.45s
-        "tests/integration/recommenders/datasets/test_criteo.py::test_criteo_load_spark_df",  # 374.64s
+        "tests/functional/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_functional",
         #
         "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke",  # 49.53s
-        "tests/integration/examples/test_notebooks_pyspark.py::test_als_pyspark_integration",  # 110.58s
-        "tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark",  # 142s
+        "tests/functional/examples/test_notebooks_pyspark.py::test_als_pyspark_functional",  # 110.58s
+        #
+        "tests/functional/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark",  # 142s
     ],
 }
 
-unit_test_groups = {
+pr_gate_test_groups = {
+    "group_cpu_001": [  # Total group time: 525.96s
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_invalid_param__return_failure",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_data__no_name_collision",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_wrong_bytes",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_maybe",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_maybe_download_retry",
+        "tests/unit/recommenders/datasets/test_download_utils.py::test_download_path",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_negative_feedback_sampler",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_filter_by",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_csv_to_libffm",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_columns",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_has_same_base_dtype",
+        "tests/unit/recommenders/datasets/test_pandas_df_utils.py::test_lru_cache_df",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_split_pandas_data",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_min_rating_filter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_random_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_chrono_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_int_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_python_splitter.py::test_float_numpy_stratified_splitter",
+        "tests/unit/recommenders/datasets/test_sparse.py::test_df_to_sparse",
+        "tests/unit/recommenders/datasets/test_sparse.py::test_sparse_to_df",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_column_dtypes_match",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_merge_rating",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_merge_ranking",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_rmse",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_mae",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_rsquared",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_exp_var",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_get_top_k_items",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_get_top_k_items_largek",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_ndcg_at_k",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_map_at_k",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_precision",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_recall",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_auc",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_logloss",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_python_errors",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_catalog_coverage",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_distributional_coverage",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_item_novelty",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_novelty",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_diversity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_diversity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_diversity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_diversity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector",
+        "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector",
+        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_init",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_fit",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_stop_words",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_recommend_top_k_items",
+        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_top_k_recommendations",
+        "tests/unit/recommenders/models/test_cornac_utils.py::test_predict",
+        "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_geoimc.py::test_dataptr",
+        "tests/unit/recommenders/models/test_geoimc.py::test_length_normalize",
+        "tests/unit/recommenders/models/test_geoimc.py::test_mean_center",
+        "tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims",
+        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
+        "tests/unit/recommenders/models/test_geoimc.py::test_inferer_init",
+        "tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_interactions",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_fitting",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_users",
+        "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_init",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_fit",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_predict",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_predict_all_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_user_affinity",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_item_based_topk",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_popularity_based_topk",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_normalized_scores",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_match_similarity_type_from_json_file",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_dataset_with_duplicates",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_get_topk_most_similar_users",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_item_frequencies",
+        "tests/unit/recommenders/models/test_sar_singlenode.py::test_user_frequencies",
+        "tests/unit/recommenders/models/test_surprise_utils.py::test_predict",
+        "tests/unit/recommenders/models/test_surprise_utils.py::test_recommend_k_items",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_vw_init_del",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_to_vw_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_parse_train_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_parse_test_cmd",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_to_vw_file",
+        "tests/unit/recommenders/models/test_vowpal_wabbit.py::test_fit_and_predict",
+        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_get_experiment_status",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_done",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_tuner_no_more_trial",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_running",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_no_more_trial",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_experiment_status_failed",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_stopped_timeout",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_stopped",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_metrics_written",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_check_metrics_written_timeout",
+        "tests/unit/recommenders/tuning/test_nni_utils.py::test_get_trials",
+        "tests/unit/recommenders/tuning/test_sweep.py::test_param_sweep",
+        "tests/unit/recommenders/utils/test_general_utils.py::test_invert_dictionary",
+        "tests/unit/recommenders/utils/test_general_utils.py::test_get_number_processors",
+        "tests/unit/recommenders/utils/test_plot.py::test_line_graph",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_python_jaccard",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_python_lift",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_exponential_decay",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_get_top_k_scored_items",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_binarize",
+        "tests/unit/recommenders/utils/test_python_utils.py::test_rescale",
+        "tests/unit/recommenders/utils/test_timer.py::test_no_time",
+        "tests/unit/recommenders/utils/test_timer.py::test_stop_before_start",
+        "tests/unit/recommenders/utils/test_timer.py::test_interval_before_stop",
+        "tests/unit/recommenders/utils/test_timer.py::test_timer",
+        "tests/unit/recommenders/utils/test_timer.py::test_timer_format",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_rating",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_merge_ranking",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rmse",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_mae",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_rsquared",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_exp_var",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_get_top_k_items_largek",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_ndcg_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_map_at_k",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_precision",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_recall",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_auc",
+        "tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py::test_python_logloss",
+        "tests/security/test_dependency_security.py::test_requests",
+        "tests/security/test_dependency_security.py::test_numpy",
+        "tests/security/test_dependency_security.py::test_pandas",
+        "tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py",
+        "tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py",
+        "tests/integration/recommenders/utils/test_k8s_utils.py",
+    ],
+    "group_notebooks_cpu_001": [  # Total group time: 226.42s
+        "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_template_runs",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter",
+        "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
+        "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
+        "tests/unit/examples/test_notebooks_python.py::test_vw_deep_dive_runs",
+    ],
     "group_spark_001": [  # Total group time: 270.41s
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
+        "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_init_spark",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__return_success",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
+        "tests/data_validation/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_custom_param__succeed",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_stratified_splitter",
         "tests/unit/recommenders/datasets/test_spark_splitter.py::test_chrono_splitter",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__data_serialization_default_param",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_diversity_item_feature_vector",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_spark_df__store_tmp_file",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_python_match",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_precision",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_spark_exp_var",
@@ -170,7 +343,6 @@
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_serendipity",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_diversity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_diversity",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_spark_df_mock_100__with_default_param__succeed",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_serendipity_item_feature_vector",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_user_item_serendipity_item_feature_vector",
@@ -187,9 +359,9 @@
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_init_spark_rating_eval",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_catalog_coverage",
         "tests/unit/recommenders/evaluation/test_spark_evaluation.py::test_distributional_coverage",
-        "tests/unit/recommenders/datasets/test_spark_splitter.py::test_min_rating_filter",
     ],
     "group_notebooks_spark_001": [  # Total group time: 794s
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_databricks",
         "tests/unit/examples/test_notebooks_pyspark.py::test_als_deep_dive_runs",  # 287.70s
         "tests/unit/examples/test_notebooks_pyspark.py::test_als_pyspark_runs",  # 374.15s
         "tests/unit/examples/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_runs",  # 132.09s
@@ -204,34 +376,47 @@
     ],
     "group_gpu_001": [  # Total group time: 492.62s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
+        "tests/unit/recommenders/models/test_rbm.py::test_class_init",
         "tests/unit/recommenders/models/test_rbm.py::test_sampling_funct",
         "tests/unit/recommenders/models/test_rbm.py::test_train_param_init",
         "tests/unit/recommenders/models/test_rbm.py::test_save_load",
-        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_model",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_train_loader",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_test_loader",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_unsorted",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_empty",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
+        "tests/unit/recommenders/models/test_ncf_dataset.py::test_negative_sampler",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_init",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_fit",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_neumf_save_load",
         "tests/unit/recommenders/models/test_ncf_singlenode.py::test_regular_save_load",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
+        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_predict",
+        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_model",
+        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_wide_deep_utils.py::test_wide_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_naml_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_lstur_component_definition",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_nrms_component_definition",
-        "tests/unit/recommenders/models/test_wide_deep_utils.py::test_deep_model",
         "tests/unit/recommenders/models/test_newsrec_model.py::test_npa_component_definition",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_fit",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_init",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_test_loader",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_train_loader",
-        "tests/unit/recommenders/models/test_rbm.py::test_class_init",
-        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_init_unsorted",
-        "tests/unit/recommenders/models/test_ncf_singlenode.py::test_predict",
-        "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
+        "tests/unit/recommenders/models/test_newsrec_utils.py::test_prepare_hparams",
+        "tests/unit/recommenders/models/test_newsrec_utils.py::test_load_yaml_file",
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_prepare_data", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
         # "tests/unit/recommenders/models/test_sasrec_model.py::test_ssept", # FIXME: it takes too long to run
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_gpu_info",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_number_gpus",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_clear_memory_all_gpus",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_cuda_version",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_get_cudnn_version",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_cudnn_enabled",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_tensorflow_gpu",
+        "tests/unit/recommenders/utils/test_gpu_utils.py::test_pytorch_gpu",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_evaluation_log_hook",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_pandas_input_fn_for_saved_model",
+        "tests/unit/recommenders/utils/test_tf_utils.py::test_build_optimizer",
     ],
     "group_gpu_002": [  # Total group time:
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -242,6 +427,11 @@
         "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition",
         "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
+        "tests/unit/recommenders/models/test_deeprec_utils.py::test_prepare_hparams",
+        "tests/unit/recommenders/models/test_deeprec_utils.py::test_load_yaml_file",
+        "tests/security/test_dependency_security.py::test_tensorflow",
+        "tests/security/test_dependency_security.py::test_torch",
+        "tests/regression/test_compatibility_tf.py",
     ],
     "group_notebooks_gpu_001": [  # Total group time: 563.35s
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",  # 0.76s (Always the first test to check the GPU works)
@@ -256,33 +446,4 @@
         "tests/unit/examples/test_notebooks_gpu.py::test_xdeepfm",
         "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm",
     ],
-    "group_cpu_001": [  # Total group time: 525.96s
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_default_param__succeed",
-        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_wrong_bytes",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__has_default_col_names",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_load_pandas_df_mock_100__with_custom_param__succeed",
-        "tests/unit/recommenders/datasets/test_dataset.py::test_maybe_download_retry",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df__return_success",
-        "tests/unit/recommenders/utils/test_timer.py::test_timer",
-        "tests/unit/recommenders/tuning/test_ncf_utils.py::test_compute_test_results__return_success",
-        "tests/unit/recommenders/datasets/test_movielens.py::test_mock_movielens_schema__get_df_remove_default_col__return_success",
-        "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
-        "tests/unit/recommenders/datasets/test_wikidata.py::test_find_wikidata_id",
-        "tests/unit/recommenders/models/test_sar_singlenode.py::test_sar_item_similarity",
-        "tests/unit/recommenders/models/test_tfidf_utils.py::test_tokenize_text",
-        "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_tokens",
-        "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
-        "tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py",  # 297.91s
-    ],
-    "group_notebooks_cpu_001": [  # Total group time: 226.42s
-        "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_template_runs",
-        "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter",
-        "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
-        "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
-        "tests/unit/examples/test_notebooks_python.py::test_sar_single_node_runs",
-    ],
 }
diff --git a/tests/data_validation/examples/test_mind.py b/tests/data_validation/examples/test_mind.py
new file mode 100644
index 0000000000..e03162bf9b
--- /dev/null
+++ b/tests/data_validation/examples/test_mind.py
@@ -0,0 +1,37 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+
+def test_mind_utils_runs(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["mind_utils"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(mind_type="small", word_embedding_dim=300),
+    )
+
+
+def test_mind_utils_values(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["mind_utils"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(mind_type="small", word_embedding_dim=300),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    assert results["utils_state"]["vert_num"] == 17
+    assert results["utils_state"]["subvert_num"] == 17
+    assert results["utils_state"]["word_num"] == 23404
+    assert results["utils_state"]["word_num_all"] == 41074
+    assert results["utils_state"]["embedding_exist_num"] == 22408
+    assert results["utils_state"]["embedding_exist_num_all"] == 37634
+    assert results["utils_state"]["uid2index"] == 5000
diff --git a/tests/data_validation/examples/test_wikidata.py b/tests/data_validation/examples/test_wikidata.py
new file mode 100644
index 0000000000..cdee1699b9
--- /dev/null
+++ b/tests/data_validation/examples/test_wikidata.py
@@ -0,0 +1,44 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+import papermill as pm
+import scrapbook as sb
+
+
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="Wikidata API is unstable")
+def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
+    notebook_path = notebooks["wikidata_knowledge_graph"]
+    MOVIELENS_SAMPLE_SIZE = 5
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k",
+            MOVIELENS_SAMPLE=True,
+            MOVIELENS_SAMPLE_SIZE=MOVIELENS_SAMPLE_SIZE,
+        ),
+    )
+
+
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="Wikidata API is unstable")
+def test_wikidata_values(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["wikidata_knowledge_graph"]
+    pm.execute_notebook(
+        notebook_path,
+        output_notebook,
+        kernel_name=kernel_name,
+        parameters=dict(
+            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
+        ),
+    )
+    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
+        "data"
+    ]
+
+    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
+    assert results["length_result"] >= 1
diff --git a/tests/unit/recommenders/datasets/test_covid_utils.py b/tests/data_validation/recommenders/datasets/test_covid_utils.py
similarity index 100%
rename from tests/unit/recommenders/datasets/test_covid_utils.py
rename to tests/data_validation/recommenders/datasets/test_covid_utils.py
diff --git a/tests/smoke/recommenders/dataset/test_criteo.py b/tests/data_validation/recommenders/datasets/test_criteo.py
similarity index 54%
rename from tests/smoke/recommenders/dataset/test_criteo.py
rename to tests/data_validation/recommenders/datasets/test_criteo.py
index ad92e52f45..11666c4983 100644
--- a/tests/smoke/recommenders/dataset/test_criteo.py
+++ b/tests/data_validation/recommenders/datasets/test_criteo.py
@@ -2,22 +2,44 @@
 # Licensed under the MIT License.
 
 import os
+import gc
 import pytest
 import pandas as pd
+
 from recommenders.datasets import criteo
 
 
-@pytest.mark.smoke
-def test_criteo_load_pandas_df(criteo_first_row):
+def test_download_criteo_sample(tmp_path):
+    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
+    statinfo = os.stat(filepath)
+    assert statinfo.st_size == 8787154
+
+
+def test_extract_criteo_sample(tmp_path):
+    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
+    filename = criteo.extract_criteo(size="sample", compressed_file=filepath)
+    statinfo = os.stat(filename)
+    assert statinfo.st_size == 24328072
+
+
+def test_criteo_load_pandas_df_sample(criteo_first_row):
     df = criteo.load_pandas_df(size="sample")
     assert df.shape[0] == 100000
     assert df.shape[1] == 40
     assert df.loc[0].equals(pd.Series(criteo_first_row))
 
 
-@pytest.mark.smoke
+def test_criteo_load_pandas_df_full(criteo_first_row):
+    df = criteo.load_pandas_df(size="full")
+    assert df.shape[0] == 45840617
+    assert df.shape[1] == 40
+    assert df.loc[0].equals(pd.Series(criteo_first_row))
+    del df
+    gc.collect()
+
+
 @pytest.mark.spark
-def test_criteo_load_spark_df(spark, criteo_first_row):
+def test_criteo_load_spark_df_sample(spark, criteo_first_row):
     df = criteo.load_spark_df(spark, size="sample")
     assert df.count() == 100000
     assert len(df.columns) == 40
@@ -25,16 +47,12 @@ def test_criteo_load_spark_df(spark, criteo_first_row):
     assert first_row == criteo_first_row
 
 
-@pytest.mark.smoke
-def test_download_criteo(tmp_path):
-    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
-    statinfo = os.stat(filepath)
-    assert statinfo.st_size == 8787154
-
-
-@pytest.mark.smoke
-def test_extract_criteo(tmp_path):
-    filepath = criteo.download_criteo(size="sample", work_directory=tmp_path)
-    filename = criteo.extract_criteo(size="sample", compressed_file=filepath)
-    statinfo = os.stat(filename)
-    assert statinfo.st_size == 24328072
+@pytest.mark.spark
+def test_criteo_load_spark_df_full(spark, criteo_first_row):
+    df = criteo.load_spark_df(spark, size="full")
+    assert df.count() == 45840617
+    assert len(df.columns) == 40
+    first_row = df.limit(1).collect()[0].asDict()
+    assert first_row == criteo_first_row
+    del df
+    gc.collect()
diff --git a/tests/data_validation/recommenders/datasets/test_mind.py b/tests/data_validation/recommenders/datasets/test_mind.py
new file mode 100644
index 0000000000..d4f5f8c1f4
--- /dev/null
+++ b/tests/data_validation/recommenders/datasets/test_mind.py
@@ -0,0 +1,155 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+import os
+import pytest
+import requests
+
+from recommenders.datasets.mind import download_mind, extract_mind
+
+
+@pytest.mark.parametrize(
+    "url, content_length, etag",
+    [
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
+            "17372879",
+            '"0x8D8B8AD5B233930"',
+        ),  # NOTE: the z20 blob returns the etag with ""
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
+            "10080022",
+            '"0x8D8B8AD5B188839"',
+        ),
+        (
+            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
+            "97292694",
+            '"0x8D8B8AD5B126C3B"',
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
+            "52952752",
+            "0x8D834F2EB31BDEC",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
+            "30945572",
+            "0x8D834F2EBA8D865",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
+            "155178106",
+            "0x8D87F67F4AEB960",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
+            "530196631",
+            "0x8D8244E90C15C07",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
+            "103456245",
+            "0x8D8244E92005849",
+        ),
+        (
+            "https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
+            "150359301",
+            "0x8D87F67E6CA4364",
+        ),
+    ],
+)
+def test_mind_url(url, content_length, etag):
+    url_headers = requests.head(url).headers
+    assert url_headers["Content-Length"] == content_length
+    assert url_headers["ETag"] == etag
+
+
+def test_download_mind_demo(tmp):
+    train_path, valid_path = download_mind(size="demo", dest_path=tmp)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 17372879
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 10080022
+
+
+def test_download_mind_small(tmp):
+    train_path, valid_path = download_mind(size="small", dest_path=tmp)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 52952752
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 30945572
+
+
+def test_extract_mind_demo(tmp):
+    train_zip, valid_zip = download_mind(size="demo", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 14707247
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 16077470
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 23120370
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 4434762
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 11591565
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 15624320
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+
+def test_extract_mind_small(tmp):
+    train_zip, valid_zip = download_mind(size="small", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 92019716
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 25811015
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 41202121
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 42838544
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 21960998
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 33519092
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+
+def test_download_mind_large(tmp_path):
+    train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
+    statinfo = os.stat(train_path)
+    assert statinfo.st_size == 530196631
+    statinfo = os.stat(valid_path)
+    assert statinfo.st_size == 103456245
+
+
+def test_extract_mind_large(tmp):
+    train_zip, valid_zip = download_mind(size="large", dest_path=tmp)
+    train_path, valid_path = extract_mind(train_zip, valid_zip)
+
+    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
+    assert statinfo.st_size == 1373844151
+    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 40305151
+    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
+    assert statinfo.st_size == 84881998
+    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
+
+    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
+    assert statinfo.st_size == 230662527
+    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
+    assert statinfo.st_size == 31958202
+    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
+    assert statinfo.st_size == 59055351
+    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
+    assert statinfo.st_size == 1044588
diff --git a/tests/integration/recommenders/datasets/test_movielens.py b/tests/data_validation/recommenders/datasets/test_movielens.py
similarity index 59%
rename from tests/integration/recommenders/datasets/test_movielens.py
rename to tests/data_validation/recommenders/datasets/test_movielens.py
index 886418ee23..5af7e9673f 100644
--- a/tests/integration/recommenders/datasets/test_movielens.py
+++ b/tests/data_validation/recommenders/datasets/test_movielens.py
@@ -1,16 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
-import pytest
-from recommenders.datasets.movielens import (
-    load_pandas_df,
-    load_spark_df,
-    load_item_df,
-    download_movielens,
-    extract_movielens,
-)
 import gc
+import pandas
+from pandas.core.series import Series
+import pytest
+from pytest_mock import MockerFixture
 
 try:
     from pyspark.sql.types import (
@@ -22,11 +19,129 @@
 except ImportError:
     pass  # skip this import if we are in pure python environment
 
+from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
+from recommenders.datasets.movielens import MockMovielensSchema
+from recommenders.datasets.movielens import (
+    load_pandas_df,
+    load_spark_df,
+    load_item_df,
+    download_movielens,
+    extract_movielens,
+)
+from recommenders.datasets.movielens import (
+    DATA_FORMAT,
+    MOCK_DATA_FORMAT,
+    DEFAULT_HEADER,
+    DEFAULT_ITEM_COL,
+    DEFAULT_USER_COL,
+)
+
+
+@pytest.mark.parametrize("size", [10, 100])
+def test_mock_movielens_schema__has_default_col_names(size):
+    df = MockMovielensSchema.example(size=size)
+    for col_name in DEFAULT_HEADER:
+        assert col_name in df.columns
+
+
+@pytest.mark.parametrize("keep_first_n_cols", [2, 3, 4])
+def test_mock_movielens_schema__get_df_remove_default_col__return_success(
+    keep_first_n_cols,
+):
+    df = MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+    assert len(df) > 0
+    assert len(df.columns) == keep_first_n_cols
+
+
+@pytest.mark.parametrize("keep_first_n_cols", [-1, 0, 100])
+def test_mock_movielens_schema__get_df_invalid_param__return_failure(keep_first_n_cols):
+    with pytest.raises(ValueError, match=r"Invalid value.*"):
+        MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+
+
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("keep_first_n_cols", [None, 2])
+@pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_df__return_success(
+    size, seed, keep_first_n_cols, keep_title_col, keep_genre_col
+):
+    df = MockMovielensSchema.get_df(
+        size=size,
+        seed=seed,
+        keep_first_n_cols=keep_first_n_cols,
+        keep_title_col=keep_title_col,
+        keep_genre_col=keep_genre_col,
+    )
+    assert type(df) == pandas.DataFrame
+    assert len(df) == size
+
+    if keep_title_col:
+        assert len(df[DEFAULT_TITLE_COL]) == size
+    if keep_genre_col:
+        assert len(df[DEFAULT_GENRE_COL]) == size
+
+
+def test_mock_movielens_data__no_name_collision():
+    """
+    Making sure that no common names are shared between the mock and real dataset sizes
+    """
+    dataset_name = set(DATA_FORMAT.keys())
+    dataset_name_mock = set(MOCK_DATA_FORMAT.keys())
+    collision = dataset_name.intersection(dataset_name_mock)
+    assert not collision
+
+
+def test_load_pandas_df_mock_100__with_default_param__succeed():
+    df = load_pandas_df("mock100")
+    assert type(df) == pandas.DataFrame
+    assert len(df) == 100
+    assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()
+
+
+def test_load_pandas_df_mock_100__with_custom_param__succeed():
+    df = load_pandas_df(
+        "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
+    )
+    assert type(df[DEFAULT_TITLE_COL]) == Series
+    assert type(df[DEFAULT_GENRE_COL]) == Series
+    assert len(df) == 100
+    assert "|" in df.loc[0, DEFAULT_GENRE_COL]
+    assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
+
+
+@pytest.mark.parametrize("size", ["100k", "1m", "10m", "20m"])
+def test_download_and_extract_movielens(size, tmp):
+    """Test movielens data download and extract"""
+    zip_path = os.path.join(tmp, "ml.zip")
+    download_movielens(size, dest_path=zip_path)
+    assert len(os.listdir(tmp)) == 1
+    assert os.path.exists(zip_path)
+
+    rating_path = os.path.join(tmp, "rating.dat")
+    item_path = os.path.join(tmp, "item.dat")
+    extract_movielens(
+        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
+    )
+    # Test if raw-zip file, rating file, and item file are cached
+    assert len(os.listdir(tmp)) == 3
+    assert os.path.exists(rating_path)
+    assert os.path.exists(item_path)
+
 
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
     [
+        (
+            "100k",
+            100000,
+            1682,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
         (
             "1m",
             1000209,
@@ -112,10 +227,10 @@ def test_load_pandas_df(
     gc.collect()
 
 
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, num_movies, movie_example, title_example, genres_example, year_example",
     [
+        ("100k", 1682, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995"),
         ("1m", 3883, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995"),
         (
             "10m",
@@ -170,11 +285,80 @@ def test_load_item_df(
     gc.collect()
 
 
-@pytest.mark.integration
+@pytest.mark.spark
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_spark_df__return_success(
+    spark, size, seed, keep_title_col, keep_genre_col
+):
+    df = MockMovielensSchema.get_spark_df(
+        spark,
+        size=size,
+        seed=seed,
+        keep_title_col=keep_title_col,
+        keep_genre_col=keep_genre_col,
+    )
+    assert df.count() == size
+
+    if keep_title_col:
+        assert df.schema[DEFAULT_TITLE_COL]
+    if keep_genre_col:
+        assert df.schema[DEFAULT_GENRE_COL]
+
+
+@pytest.mark.spark
+def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
+    data_size = 3
+    MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
+    assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
+
+
+@pytest.mark.spark
+def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(
+    spark, mocker: MockerFixture
+):
+    data_size = 3
+    to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")
+
+    df = MockMovielensSchema.get_spark_df(spark, size=data_size)
+    # assertions
+    to_csv_spy.assert_called_once()
+    assert df.count() == data_size
+
+
+@pytest.mark.spark
+def test_load_spark_df_mock_100__with_default_param__succeed(spark):
+    df = load_spark_df(spark, "mock100")
+    assert df.count() == 100
+
+
+@pytest.mark.spark
+def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
+    df = load_spark_df(
+        spark, "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
+    )
+    assert df.schema[DEFAULT_TITLE_COL]
+    assert df.schema[DEFAULT_GENRE_COL]
+    assert df.count() == 100
+    assert "|" in df.take(1)[0][DEFAULT_GENRE_COL]
+    assert df.take(1)[0][DEFAULT_TITLE_COL] == "foo"
+
+
 @pytest.mark.spark
 @pytest.mark.parametrize(
     "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
     [
+        (
+            "100k",
+            100000,
+            1682,
+            1,
+            "Toy Story (1995)",
+            "Animation|Children's|Comedy",
+            "1995",
+        ),
         (
             "1m",
             1000209,
@@ -271,23 +455,3 @@ def test_load_spark_df(
     assert len(df.columns) == 4
     del df
     gc.collect()
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize("size", ["1m", "10m", "20m"])
-def test_download_and_extract_movielens(size, tmp):
-    """Test movielens data download and extract"""
-    zip_path = os.path.join(tmp, "ml.zip")
-    download_movielens(size, dest_path=zip_path)
-    assert len(os.listdir(tmp)) == 1
-    assert os.path.exists(zip_path)
-
-    rating_path = os.path.join(tmp, "rating.dat")
-    item_path = os.path.join(tmp, "item.dat")
-    extract_movielens(
-        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
-    )
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-    assert os.path.exists(rating_path)
-    assert os.path.exists(item_path)
diff --git a/tests/unit/recommenders/datasets/test_wikidata.py b/tests/data_validation/recommenders/datasets/test_wikidata.py
similarity index 100%
rename from tests/unit/recommenders/datasets/test_wikidata.py
rename to tests/data_validation/recommenders/datasets/test_wikidata.py
index 2162e7a495..1df27671ff 100644
--- a/tests/unit/recommenders/datasets/test_wikidata.py
+++ b/tests/data_validation/recommenders/datasets/test_wikidata.py
@@ -1,8 +1,8 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
 
+import pytest
 
 from recommenders.datasets.wikidata import (
     find_wikidata_id,
diff --git a/tests/integration/__init__.py b/tests/functional/__init__.py
similarity index 100%
rename from tests/integration/__init__.py
rename to tests/functional/__init__.py
diff --git a/tests/integration/examples/__init__.py b/tests/functional/examples/__init__.py
similarity index 100%
rename from tests/integration/examples/__init__.py
rename to tests/functional/examples/__init__.py
diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py
similarity index 94%
rename from tests/integration/examples/test_notebooks_gpu.py
rename to tests/functional/examples/test_notebooks_gpu.py
index 9b63315b5f..2d8c6b0a08 100644
--- a/tests/integration/examples/test_notebooks_gpu.py
+++ b/tests/functional/examples/test_notebooks_gpu.py
@@ -3,13 +3,8 @@
 
 import os
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
+import papermill as pm
+import scrapbook as sb
 
 from recommenders.utils.gpu_utils import get_number_gpus
 
@@ -19,14 +14,12 @@
 
 
 @pytest.mark.gpu
-@pytest.mark.integration
 def test_gpu_vm():
     assert get_number_gpus() >= 1
 
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, epochs, expected_values, seed",
     [
@@ -44,7 +37,7 @@ def test_gpu_vm():
         # ("10m", 5, {"map": 0.024821, "ndcg": 0.153396, "precision": 0.143046, "recall": 0.056590})# takes too long
     ],
 )
-def test_ncf_integration(
+def test_ncf_functional(
     notebooks, output_notebook, kernel_name, size, epochs, expected_values, seed
 ):
     notebook_path = notebooks["ncf"]
@@ -66,7 +59,6 @@ def test_ncf_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, epochs, batch_size, expected_values, seed",
     [
@@ -88,7 +80,7 @@ def test_ncf_integration(
         )
     ],
 )
-def test_ncf_deep_dive_integration(
+def test_ncf_deep_dive_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -121,7 +113,6 @@ def test_ncf_deep_dive_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, epochs, expected_values",
     [
@@ -142,7 +133,7 @@ def test_ncf_deep_dive_integration(
         # ("10m", 5, ), # it gets an OOM on pred = learner.model.forward(u, m)
     ],
 )
-def test_fastai_integration(
+def test_fastai_functional(
     notebooks, output_notebook, kernel_name, size, epochs, expected_values
 ):
     notebook_path = notebooks["fastai"]
@@ -162,7 +153,6 @@ def test_fastai_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "epochs, expected_values, seed",
     [
@@ -173,7 +163,7 @@ def test_fastai_integration(
         )
     ],
 )
-def test_xdeepfm_integration(
+def test_xdeepfm_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -202,7 +192,6 @@ def test_xdeepfm_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, steps, batch_size, expected_values, seed",
     [
@@ -224,7 +213,7 @@ def test_xdeepfm_integration(
         )
     ],
 )
-def test_wide_deep_integration(
+def test_wide_deep_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -261,7 +250,6 @@ def test_wide_deep_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "yaml_file, data_path, epochs, batch_size, expected_values, seed",
     [
@@ -275,7 +263,7 @@ def test_wide_deep_integration(
         )
     ],
 )
-def test_slirec_quickstart_integration(
+def test_slirec_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -313,7 +301,6 @@ def test_slirec_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "epochs, batch_size, seed, MIND_type, expected_values",
     [
@@ -333,7 +320,7 @@ def test_slirec_quickstart_integration(
         )
     ],
 )
-def test_nrms_quickstart_integration(
+def test_nrms_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -375,7 +362,6 @@ def test_nrms_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "epochs, batch_size, seed, MIND_type, expected_values",
     [
@@ -395,7 +381,7 @@ def test_nrms_quickstart_integration(
         )
     ],
 )
-def test_naml_quickstart_integration(
+def test_naml_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -437,7 +423,6 @@ def test_naml_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "epochs, batch_size, seed, MIND_type, expected_values",
     [
@@ -457,7 +442,7 @@ def test_naml_quickstart_integration(
         )
     ],
 )
-def test_lstur_quickstart_integration(
+def test_lstur_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -499,7 +484,6 @@ def test_lstur_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "epochs, batch_size, seed, MIND_type, expected_values",
     [
@@ -519,7 +503,7 @@ def test_lstur_quickstart_integration(
         )
     ],
 )
-def test_npa_quickstart_integration(
+def test_npa_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -561,7 +545,6 @@ def test_npa_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "yaml_file, data_path, size, epochs, batch_size, expected_values, seed",
     [
@@ -581,7 +564,7 @@ def test_npa_quickstart_integration(
         )
     ],
 )
-def test_lightgcn_deep_dive_integration(
+def test_lightgcn_deep_dive_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -619,8 +602,7 @@ def test_lightgcn_deep_dive_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
-def test_dkn_quickstart_integration(notebooks, output_notebook, kernel_name):
+def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["dkn_quickstart"]
     pm.execute_notebook(
         notebook_path,
@@ -640,7 +622,6 @@ def test_dkn_quickstart_integration(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -648,7 +629,7 @@ def test_dkn_quickstart_integration(notebooks, output_notebook, kernel_name):
         # 10m works but takes too long
     ],
 )
-def test_cornac_bivae_integration(
+def test_cornac_bivae_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["cornac_bivae_deep_dive"]
@@ -668,7 +649,6 @@ def test_cornac_bivae_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "data_dir, num_epochs, batch_size, model_name, expected_values, seed",
     [
@@ -690,7 +670,7 @@ def test_cornac_bivae_integration(
         ),
     ],
 )
-def test_sasrec_quickstart_integration(
+def test_sasrec_quickstart_functional(
     notebooks,
     output_notebook,
     kernel_name,
@@ -725,7 +705,6 @@ def test_sasrec_quickstart_integration(
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, algos, expected_values_ndcg",
     [
diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/functional/examples/test_notebooks_pyspark.py
similarity index 87%
rename from tests/integration/examples/test_notebooks_pyspark.py
rename to tests/functional/examples/test_notebooks_pyspark.py
index 1dbc2f1399..57bd879283 100644
--- a/tests/integration/examples/test_notebooks_pyspark.py
+++ b/tests/functional/examples/test_notebooks_pyspark.py
@@ -4,13 +4,8 @@
 import os
 import sys
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
+import papermill as pm
+import scrapbook as sb
 
 TOL = 0.05
 ABS_TOL = 0.05
@@ -20,8 +15,7 @@
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
 @pytest.mark.spark
 @pytest.mark.notebooks
-@pytest.mark.integration
-def test_als_pyspark_integration(notebooks, output_notebook, kernel_name):
+def test_als_pyspark_functional(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["als_pyspark"]
     pm.execute_notebook(
         notebook_path,
@@ -47,10 +41,9 @@ def test_als_pyspark_integration(notebooks, output_notebook, kernel_name):
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
 @pytest.mark.spark
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.skip(reason="It takes too long in the current test machine")
 @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
-def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel_name):
+def test_mmlspark_lightgbm_criteo_functional(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["mmlspark_lightgbm_criteo"]
     pm.execute_notebook(
         notebook_path,
@@ -67,7 +60,6 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel
 
 @pytest.mark.spark
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, algos, expected_values_ndcg",
     [
diff --git a/tests/integration/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py
similarity index 84%
rename from tests/integration/examples/test_notebooks_python.py
rename to tests/functional/examples/test_notebooks_python.py
index 870c7fc0c0..f232576271 100644
--- a/tests/integration/examples/test_notebooks_python.py
+++ b/tests/functional/examples/test_notebooks_python.py
@@ -2,12 +2,8 @@
 # Licensed under the MIT License.
 
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
+import scrapbook as sb
 
 
 TOL = 0.05
@@ -15,7 +11,6 @@
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -39,7 +34,7 @@
         ),
     ],
 )
-def test_sar_single_node_integration(
+def test_sar_single_node_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["sar_single_node"]
@@ -58,7 +53,6 @@ def test_sar_single_node_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -74,7 +68,7 @@ def test_sar_single_node_integration(
         # ("10m", {"map": , "ndcg": , "precision": , "recall": }), # OOM on test machine
     ],
 )
-def test_baseline_deep_dive_integration(
+def test_baseline_deep_dive_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["baseline_deep_dive"]
@@ -93,7 +87,6 @@ def test_baseline_deep_dive_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -113,7 +106,7 @@ def test_baseline_deep_dive_integration(
         # 10m works but takes too long
     ],
 )
-def test_surprise_svd_integration(
+def test_surprise_svd_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["surprise_svd_deep_dive"]
@@ -132,7 +125,6 @@ def test_surprise_svd_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -152,7 +144,7 @@ def test_surprise_svd_integration(
     ],
 )
 @pytest.mark.skip(reason="VW pip package has installation incompatibilities")
-def test_vw_deep_dive_integration(
+def test_vw_deep_dive_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["vowpal_wabbit_deep_dive"]
@@ -171,7 +163,6 @@ def test_vw_deep_dive_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.skip(reason="NNI pip package has installation incompatibilities")
 def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp):
     notebook_path = notebooks["nni_tuning_svd"]
@@ -192,28 +183,6 @@ def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
-@pytest.mark.skip(reason="Wikidata API is unstable")
-def test_wikidata_integration(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["wikidata_knowledge_graph"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5
-        ),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable
-    assert results["length_result"] >= 1
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, expected_values",
     [
@@ -221,7 +190,7 @@ def test_wikidata_integration(notebooks, output_notebook, kernel_name, tmp):
         # 10m works but takes too long
     ],
 )
-def test_cornac_bpr_integration(
+def test_cornac_bpr_functional(
     notebooks, output_notebook, kernel_name, size, expected_values
 ):
     notebook_path = notebooks["cornac_bpr_deep_dive"]
@@ -240,7 +209,6 @@ def test_cornac_bpr_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, epochs, expected_values",
     [
@@ -256,7 +224,7 @@ def test_cornac_bpr_integration(
         ),
     ],
 )
-def test_lightfm_integration(
+def test_lightfm_functional(
     notebooks, output_notebook, kernel_name, size, epochs, expected_values
 ):
     notebook_path = notebooks["lightfm_deep_dive"]
@@ -275,13 +243,12 @@ def test_lightfm_integration(
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.experimental
 @pytest.mark.parametrize(
     "expected_values",
     [({"rmse": 0.4969, "mae": 0.4761})],
 )
-def test_geoimc_integration(notebooks, output_notebook, kernel_name, expected_values):
+def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_values):
     notebook_path = notebooks["geoimc_quickstart"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
     results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
@@ -293,9 +260,8 @@ def test_geoimc_integration(notebooks, output_notebook, kernel_name, expected_va
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.experimental
-def test_xlearn_fm_integration(notebooks, output_notebook, kernel_name):
+def test_xlearn_fm_functional(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["xlearn_fm_deep_dive"]
     pm.execute_notebook(
         notebook_path,
@@ -311,7 +277,6 @@ def test_xlearn_fm_integration(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "size, algos, expected_values_ndcg",
     [
diff --git a/tests/integration/recommenders/__init__.py b/tests/integration/recommenders/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/recommenders/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/recommenders/datasets/__init__.py b/tests/integration/recommenders/datasets/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/integration/recommenders/datasets/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/integration/recommenders/datasets/test_criteo.py b/tests/integration/recommenders/datasets/test_criteo.py
deleted file mode 100644
index bba0f1f1e6..0000000000
--- a/tests/integration/recommenders/datasets/test_criteo.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import pytest
-import pandas as pd
-from recommenders.datasets import criteo
-import gc
-
-
-@pytest.mark.integration
-def test_criteo_load_pandas_df(criteo_first_row):
-    df = criteo.load_pandas_df(size="full")
-    assert df.shape[0] == 45840617
-    assert df.shape[1] == 40
-    assert df.loc[0].equals(pd.Series(criteo_first_row))
-    del df
-    gc.collect()
-
-
-@pytest.mark.spark
-@pytest.mark.integration
-def test_criteo_load_spark_df(spark, criteo_first_row):
-    df = criteo.load_spark_df(spark, size="full")
-    assert df.count() == 45840617
-    assert len(df.columns) == 40
-    first_row = df.limit(1).collect()[0].asDict()
-    assert first_row == criteo_first_row
-    del df
-    gc.collect()
diff --git a/tests/integration/recommenders/datasets/test_mind.py b/tests/integration/recommenders/datasets/test_mind.py
deleted file mode 100644
index 17953c4678..0000000000
--- a/tests/integration/recommenders/datasets/test_mind.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-import papermill as pm
-import scrapbook as sb
-
-from recommenders.datasets.mind import download_mind, extract_mind
-
-
-@pytest.mark.integration
-def test_download_mind(tmp_path):
-    train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
-    statinfo = os.stat(train_path)
-    assert statinfo.st_size == 530196631
-    statinfo = os.stat(valid_path)
-    assert statinfo.st_size == 103456245
-
-
-@pytest.mark.integration
-def test_extract_mind(tmp):
-    train_zip, valid_zip = download_mind(size="large", dest_path=tmp)
-    train_path, valid_path = extract_mind(train_zip, valid_zip)
-
-    statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-    assert statinfo.st_size == 1373844151
-    statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-    assert statinfo.st_size == 40305151
-    statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-    assert statinfo.st_size == 84881998
-    statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-    assert statinfo.st_size == 1044588
-
-    statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-    assert statinfo.st_size == 230662527
-    statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-    assert statinfo.st_size == 31958202
-    statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-    assert statinfo.st_size == 59055351
-    statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-    assert statinfo.st_size == 1044588
-
-
-@pytest.mark.notebooks
-@pytest.mark.integration
-def test_mind_utils_integration(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["mind_utils"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(mind_type="small", word_embedding_dim=300),
-    )
-    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
-        "data"
-    ]
-
-    assert results["utils_state"]["vert_num"] == 17
-    assert results["utils_state"]["subvert_num"] == 17
-    assert results["utils_state"]["word_num"] == 23404
-    assert results["utils_state"]["word_num_all"] == 41074
-    assert results["utils_state"]["embedding_exist_num"] == 22408
-    assert results["utils_state"]["embedding_exist_num_all"] == 37634
-    assert results["utils_state"]["uid2index"] == 5000
diff --git a/tests/unit/recommenders/utils/test_k8s_utils.py b/tests/integration/recommenders/utils/test_k8s_utils.py
similarity index 100%
rename from tests/unit/recommenders/utils/test_k8s_utils.py
rename to tests/integration/recommenders/utils/test_k8s_utils.py
diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py b/tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
similarity index 99%
rename from tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py
rename to tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
index d4dea22086..7c9070c5f3 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation_time_performance.py
+++ b/tests/performance/recommenders/evaluation/test_python_evaluation_time_performance.py
@@ -4,8 +4,10 @@
 import numpy as np
 import pandas as pd
 import pytest
+import random
 from sklearn.preprocessing import minmax_scale
 
+from recommenders.utils.timer import Timer
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
@@ -28,15 +30,13 @@
     auc,
     logloss,
 )
-import random
-from recommenders.utils.timer import Timer
+
 
 random.seed(SEED)
 DATA_USER_NUM = 5000
 DATA_ITEM_NUM = DATA_USER_NUM * 2
 DATA_SAMPLE_NUM = DATA_USER_NUM * 1000
 DATA_RATING_MAX = 5
-
 TOL = 0.1
 
 
@@ -84,8 +84,6 @@ def rating_pred_binary(rating_pred):
 
 # The following time thresholds are benchmarked on Azure
 # Standard_A8m_v2 with 8 vCPUs and 64 GiB memory.
-
-
 def test_merge_rating(rating_true, rating_pred):
     with Timer() as t:
         merge_rating_true_pred(
diff --git a/tests/regression/test_compatibility_tf.py b/tests/regression/test_compatibility_tf.py
new file mode 100644
index 0000000000..c949f42312
--- /dev/null
+++ b/tests/regression/test_compatibility_tf.py
@@ -0,0 +1,14 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+
+
+@pytest.mark.gpu
+def test_compatibility_tf():
+    """Some of our code uses TF1 and some TF2. Here we just check that we
+    can import both versions.
+    """
+    import tensorflow as tf
+    from tensorflow.compat.v1 import placeholder
diff --git a/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py b/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py
new file mode 100644
index 0000000000..61dfc22956
--- /dev/null
+++ b/tests/responsible_ai/recommenders/datasets/test_criteo_privacy.py
@@ -0,0 +1,15 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pandas as pd
+
+from recommenders.datasets import criteo
+
+
+def test_criteo_privacy(criteo_first_row):
+    """Check that there are no privacy concerns. In Criteo, we check that the
+    data is anonymized.
+    """
+    df = criteo.load_pandas_df(size="sample")
+    assert df.loc[0].equals(pd.Series(criteo_first_row))
diff --git a/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py b/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py
new file mode 100644
index 0000000000..dd6a16ccc6
--- /dev/null
+++ b/tests/responsible_ai/recommenders/datasets/test_movielens_privacy.py
@@ -0,0 +1,14 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+from recommenders.datasets import movielens
+
+
+def test_movielens_privacy():
+    """Check that there are no privacy concerns. In Movielens, we check that all the
+    userID are numbers.
+    """
+    df = movielens.load_pandas_df(size="100k")
+    users = df["userID"].values.tolist()
+    assert all(isinstance(x, int) for x in users)
diff --git a/tests/security/test_dependency_security.py b/tests/security/test_dependency_security.py
new file mode 100644
index 0000000000..d706f94718
--- /dev/null
+++ b/tests/security/test_dependency_security.py
@@ -0,0 +1,42 @@
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
+
+import pytest
+import requests
+import numpy as np
+import pandas as pd
+
+try:
+    import tensorflow as tf
+    import torch
+except ImportError:
+    pass  # skip this import if we are in cpu environment
+
+
+def test_requests():
+    # Security issue: https://github.com/psf/requests/releases/tag/v2.31.0
+    assert requests.__version__ >= "2.31.0"
+
+
+def test_numpy():
+    # Security issue: https://github.com/advisories/GHSA-frgw-fgh6-9g52
+    assert np.__version__ >= "1.13.3"
+
+
+def test_pandas():
+    # Security issue: https://github.com/advisories/GHSA-cmm9-mgm5-9r42
+    assert pd.__version__ >= "1.0.3"
+
+
+@pytest.mark.gpu
+def test_tensorflow():
+    # Security issue: https://github.com/advisories/GHSA-w5gh-2wr2-pm6g
+    # Security issue: https://github.com/advisories/GHSA-r6jx-9g48-2r5r
+    assert tf.__version__ >= "2.5.1"
+
+
+@pytest.mark.gpu
+def test_torch():
+    # Security issue: https://github.com/advisories/GHSA-47fc-vmwq-366v
+    assert torch.__version__ >= "1.13.1"
diff --git a/tests/smoke/examples/test_notebooks_gpu.py b/tests/smoke/examples/test_notebooks_gpu.py
index 9f9b7ef4f3..082b8664bb 100644
--- a/tests/smoke/examples/test_notebooks_gpu.py
+++ b/tests/smoke/examples/test_notebooks_gpu.py
@@ -3,12 +3,8 @@
 
 
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
+import scrapbook as sb
 
 from recommenders.utils.gpu_utils import get_number_gpus
 
@@ -17,14 +13,12 @@
 ABS_TOL = 0.05
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_gpu_vm():
     assert get_number_gpus() >= 1
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_ncf_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["ncf"]
@@ -45,7 +39,6 @@ def test_ncf_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["ncf_deep_dive"]
@@ -73,7 +66,6 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_fastai_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["fastai"]
@@ -98,7 +90,6 @@ def test_fastai_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["xdeepfm_quickstart"]
@@ -125,7 +116,6 @@ def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp):
     notebook_path = notebooks["wide_deep"]
@@ -154,7 +144,6 @@ def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_naml_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["naml_quickstart"]
@@ -175,7 +164,6 @@ def test_naml_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_nrms_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["nrms_quickstart"]
@@ -196,7 +184,6 @@ def test_nrms_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_npa_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["npa_quickstart"]
@@ -217,7 +204,6 @@ def test_npa_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_lstur_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lstur_quickstart"]
@@ -238,7 +224,6 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_cornac_bivae_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["cornac_bivae_deep_dive"]
diff --git a/tests/smoke/examples/test_notebooks_pyspark.py b/tests/smoke/examples/test_notebooks_pyspark.py
index 561b45b68f..2e521104a6 100644
--- a/tests/smoke/examples/test_notebooks_pyspark.py
+++ b/tests/smoke/examples/test_notebooks_pyspark.py
@@ -1,14 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
+import scrapbook as sb
 
 
 TOL = 0.05
@@ -17,7 +14,6 @@
 
 # This is a flaky test that can fail unexpectedly
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.smoke
 @pytest.mark.spark
 @pytest.mark.notebooks
 def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name):
@@ -45,7 +41,6 @@ def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name):
 
 # This is a flaky test that can fail unexpectedly
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
-@pytest.mark.smoke
 @pytest.mark.spark
 @pytest.mark.notebooks
 @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
diff --git a/tests/smoke/examples/test_notebooks_python.py b/tests/smoke/examples/test_notebooks_python.py
index 4673d71c8b..0bd359ce39 100644
--- a/tests/smoke/examples/test_notebooks_python.py
+++ b/tests/smoke/examples/test_notebooks_python.py
@@ -1,20 +1,16 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
 
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import pytest
+import papermill as pm
+import scrapbook as sb
 
 
 TOL = 0.05
 ABS_TOL = 0.05
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["sar_single_node"]
@@ -34,7 +30,6 @@ def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.176385, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["baseline_deep_dive"]
@@ -58,7 +53,6 @@ def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.108826, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["surprise_svd_deep_dive"]
@@ -82,7 +76,6 @@ def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.032, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 @pytest.mark.skip(reason="VW pip package has installation incompatibilities")
 def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name):
@@ -107,7 +100,6 @@ def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name):
     assert results["recall"] == pytest.approx(0.037612, rel=TOL, abs=ABS_TOL)
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lightgbm_quickstart"]
@@ -138,7 +130,6 @@ def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.notebooks
 def test_cornac_bpr_smoke(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["cornac_bpr_deep_dive"]
@@ -156,15 +147,3 @@ def test_cornac_bpr_smoke(notebooks, output_notebook, kernel_name):
     assert results["ndcg"] == pytest.approx(0.4034, rel=TOL, abs=ABS_TOL)
     assert results["precision"] == pytest.approx(0.3550, rel=TOL, abs=ABS_TOL)
     assert results["recall"] == pytest.approx(0.1802, rel=TOL, abs=ABS_TOL)
-
-
-@pytest.mark.smoke
-@pytest.mark.notebooks
-def test_mind_utils(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["mind_utils"]
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(mind_type="small", word_embedding_dim=300),
-    )
diff --git a/tests/smoke/recommenders/dataset/__init__.py b/tests/smoke/recommenders/dataset/__init__.py
deleted file mode 100644
index 25dc11fb93..0000000000
--- a/tests/smoke/recommenders/dataset/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Needed to execute the AzureML tests
\ No newline at end of file
diff --git a/tests/smoke/recommenders/dataset/test_mind.py b/tests/smoke/recommenders/dataset/test_mind.py
deleted file mode 100644
index 332815128a..0000000000
--- a/tests/smoke/recommenders/dataset/test_mind.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import pytest
-import os
-import requests
-from recommenders.datasets.mind import download_mind, extract_mind
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "url, content_length, etag",
-    [
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
-            "17372879",
-            '"0x8D8B8AD5B233930"',
-        ),  # NOTE: the z20 blob returns the etag with ""
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
-            "10080022",
-            '"0x8D8B8AD5B188839"',
-        ),
-        (
-            "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
-            "97292694",
-            '"0x8D8B8AD5B126C3B"',
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
-            "52952752",
-            "0x8D834F2EB31BDEC",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
-            "30945572",
-            "0x8D834F2EBA8D865",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
-            "155178106",
-            "0x8D87F67F4AEB960",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
-            "530196631",
-            "0x8D8244E90C15C07",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
-            "103456245",
-            "0x8D8244E92005849",
-        ),
-        (
-            "https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
-            "150359301",
-            "0x8D87F67E6CA4364",
-        ),
-    ],
-)
-def test_mind_url(url, content_length, etag):
-    url_headers = requests.head(url).headers
-    assert url_headers["Content-Length"] == content_length
-    assert url_headers["ETag"] == etag
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize("size", [("demo"), ("small")])
-def test_extract_mind(size, tmp):
-    train_zip, valid_zip = download_mind(size, dest_path=tmp)
-    train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
-
-    if size == "demo":
-        statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-        assert statinfo.st_size == 14707247
-        statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 16077470
-        statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-        assert statinfo.st_size == 23120370
-        statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-        statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-        assert statinfo.st_size == 4434762
-        statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 11591565
-        statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-        assert statinfo.st_size == 15624320
-        statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-    elif size == "small":
-        statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
-        assert statinfo.st_size == 92019716
-        statinfo = os.stat(os.path.join(train_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 25811015
-        statinfo = os.stat(os.path.join(train_path, "news.tsv"))
-        assert statinfo.st_size == 41202121
-        statinfo = os.stat(os.path.join(train_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-        statinfo = os.stat(os.path.join(valid_path, "behaviors.tsv"))
-        assert statinfo.st_size == 42838544
-        statinfo = os.stat(os.path.join(valid_path, "entity_embedding.vec"))
-        assert statinfo.st_size == 21960998
-        statinfo = os.stat(os.path.join(valid_path, "news.tsv"))
-        assert statinfo.st_size == 33519092
-        statinfo = os.stat(os.path.join(valid_path, "relation_embedding.vec"))
-        assert statinfo.st_size == 1044588
-    else:
-        assert False
diff --git a/tests/smoke/recommenders/dataset/test_movielens.py b/tests/smoke/recommenders/dataset/test_movielens.py
deleted file mode 100644
index 0649c2ab25..0000000000
--- a/tests/smoke/recommenders/dataset/test_movielens.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright (c) Recommenders contributors.
-# Licensed under the MIT License.
-
-import os
-import pytest
-from recommenders.datasets.movielens import (
-    load_pandas_df,
-    load_spark_df,
-    load_item_df,
-    download_movielens,
-    extract_movielens,
-)
-
-try:
-    from pyspark.sql.types import (
-        StructType,
-        StructField,
-        IntegerType,
-    )
-    from pyspark.sql.functions import col
-except ImportError:
-    pass  # skip this import if we are in pure python environment
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "100k",
-            100000,
-            1682,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        )
-    ],
-)
-def test_load_pandas_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test MovieLens dataset load as pd.DataFrame"""
-    # Test if correct data are loaded
-    header = ["a", "b", "c"]
-    df = load_pandas_df(size=size, local_cache_path=tmp, header=header)
-    assert len(df) == num_samples
-    assert len(df.columns) == len(header)
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_pandas_df(
-            size=size,
-            header=header,
-            local_cache_path=tmp,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert len(df) == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.loc[df["b"] == movie_example][:2]
-        title = head["Title"].values
-        assert title[0] == title[1]
-        assert title[0] == title_example
-        genres = head["Genres"].values
-        assert genres[0] == genres[1]
-        assert genres[0] == genres_example
-        year = head["Year"].values
-        assert year[0] == year[1]
-        assert year[0] == year_example
-
-    # Test default arguments
-    df = load_pandas_df(size)
-    assert len(df) == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize(
-    "size, num_movies, movie_example, title_example, genres_example, year_example",
-    [("100k", 1682, 1, "Toy Story (1995)", "Animation|Children's|Comedy", "1995")],
-)
-def test_load_item_df(
-    size,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-):
-    """Test movielens item data load (not rating data)"""
-    df = load_item_df(size, local_cache_path=tmp, title_col="title")
-    assert len(df) == num_movies
-    # movie_col and title_col should be loaded
-    assert len(df.columns) == 2
-    assert df["title"][0] == title_example
-
-    # Test title and genres
-    df = load_item_df(
-        size,
-        local_cache_path=tmp,
-        movie_col="item",
-        genres_col="genres",
-        year_col="year",
-    )
-    assert len(df) == num_movies
-    # movile_col, genres_col and year_col
-    assert len(df.columns) == 3
-
-    assert df["item"][0] == movie_example
-    assert df["genres"][0] == genres_example
-    assert df["year"][0] == year_example
-
-
-@pytest.mark.smoke
-@pytest.mark.spark
-@pytest.mark.parametrize(
-    "size, num_samples, num_movies, movie_example, title_example, genres_example, year_example",
-    [
-        (
-            "100k",
-            100000,
-            1682,
-            1,
-            "Toy Story (1995)",
-            "Animation|Children's|Comedy",
-            "1995",
-        )
-    ],
-)
-def test_load_spark_df(
-    size,
-    num_samples,
-    num_movies,
-    movie_example,
-    title_example,
-    genres_example,
-    year_example,
-    tmp,
-    spark,
-):
-    """Test MovieLens dataset load into pySpark.DataFrame"""
-
-    # Test if correct data are loaded
-    header = ["1", "2", "3"]
-    schema = StructType(
-        [
-            StructField("u", IntegerType()),
-            StructField("m", IntegerType()),
-        ]
-    )
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark, size=size, local_cache_path=tmp, header=header, schema=schema
-        )
-        assert df.count() == num_samples
-        # Test if schema is used when both schema and header are provided
-        assert len(df.columns) == len(schema)
-        # Test if raw-zip file, rating file, and item file are cached
-        assert len(os.listdir(tmp)) == 3
-
-    # Test title, genres, and released year load
-    header = ["a", "b", "c", "d", "e"]
-    with pytest.warns(Warning):
-        df = load_spark_df(
-            spark,
-            size=size,
-            local_cache_path=tmp,
-            header=header,
-            title_col="Title",
-            genres_col="Genres",
-            year_col="Year",
-        )
-        assert df.count() == num_samples
-        assert (
-            len(df.columns) == 7
-        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
-        assert "e" not in df.columns  # only the first 4 header columns are used
-        # Get two records of the same items and check if the item-features are the same.
-        head = df.filter(col("b") == movie_example).limit(2)
-        title = head.select("Title").collect()
-        assert title[0][0] == title[1][0]
-        assert title[0][0] == title_example
-        genres = head.select("Genres").collect()
-        assert genres[0][0] == genres[1][0]
-        assert genres[0][0] == genres_example
-        year = head.select("Year").collect()
-        assert year[0][0] == year[1][0]
-        assert year[0][0] == year_example
-
-    # Test default arguments
-    df = load_spark_df(spark, size)
-    assert df.count() == num_samples
-    # user, item, rating and timestamp
-    assert len(df.columns) == 4
-
-
-@pytest.mark.smoke
-@pytest.mark.parametrize("size", ["100k"])
-def test_download_and_extract_movielens(size, tmp):
-    """Test movielens data download and extract"""
-    zip_path = os.path.join(tmp, "ml.zip")
-    download_movielens(size, dest_path=zip_path)
-    assert len(os.listdir(tmp)) == 1
-    assert os.path.exists(zip_path)
-
-    rating_path = os.path.join(tmp, "rating.dat")
-    item_path = os.path.join(tmp, "item.dat")
-    extract_movielens(
-        size, rating_path=rating_path, item_path=item_path, zip_path=zip_path
-    )
-    # Test if raw-zip file, rating file, and item file are cached
-    assert len(os.listdir(tmp)) == 3
-    assert os.path.exists(rating_path)
-    assert os.path.exists(item_path)
diff --git a/tests/smoke/recommenders/recommender/test_deeprec_model.py b/tests/smoke/recommenders/recommender/test_deeprec_model.py
index 357e0aa814..ead368485e 100644
--- a/tests/smoke/recommenders/recommender/test_deeprec_model.py
+++ b/tests/smoke/recommenders/recommender/test_deeprec_model.py
@@ -30,9 +30,7 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_FFM_iterator(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "xdeepfm")
     yaml_file = os.path.join(data_path, "xDeepFM.yaml")
@@ -52,9 +50,7 @@ def test_FFM_iterator(deeprec_resource_path):
         assert isinstance(res, tuple)
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_xdeepfm(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "xdeepfm")
     yaml_file = os.path.join(data_path, "xDeepFM.yaml")
@@ -79,9 +75,7 @@ def test_model_xdeepfm(deeprec_resource_path):
     assert model.predict(data_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_dkn(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
     yaml_file = os.path.join(data_path, r"dkn.yaml")
@@ -116,10 +110,7 @@ def test_model_dkn(deeprec_resource_path):
     assert model.run_eval(valid_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
-@pytest.mark.sequential
 def test_model_slirec(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
     yaml_file = os.path.join(deeprec_config_path, "sli_rec.yaml")
@@ -182,10 +173,7 @@ def test_model_slirec(deeprec_resource_path, deeprec_config_path):
     assert model.predict(test_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
-@pytest.mark.sequential
 def test_model_sum(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
     yaml_file = os.path.join(deeprec_config_path, "sum.yaml")
@@ -248,9 +236,7 @@ def test_model_sum(deeprec_resource_path, deeprec_config_path):
     assert model.predict(valid_file, output_file) is not None
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
-@pytest.mark.deeprec
 def test_model_lightgcn(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
     yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")
diff --git a/tests/smoke/recommenders/recommender/test_deeprec_utils.py b/tests/smoke/recommenders/recommender/test_deeprec_utils.py
index ae9f924264..cd02871f2a 100644
--- a/tests/smoke/recommenders/recommender/test_deeprec_utils.py
+++ b/tests/smoke/recommenders/recommender/test_deeprec_utils.py
@@ -23,7 +23,6 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_DKN_iterator(deeprec_resource_path):
     data_path = os.path.join(deeprec_resource_path, "dkn")
@@ -82,7 +81,6 @@ def test_DKN_iterator(deeprec_resource_path):
             break
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_Sequential_Iterator(deeprec_resource_path, deeprec_config_path):
     data_path = os.path.join(deeprec_resource_path, "slirec")
diff --git a/tests/smoke/recommenders/recommender/test_newsrec_model.py b/tests/smoke/recommenders/recommender/test_newsrec_model.py
index 968df738e2..7cad05ba35 100644
--- a/tests/smoke/recommenders/recommender/test_newsrec_model.py
+++ b/tests/smoke/recommenders/recommender/test_newsrec_model.py
@@ -17,33 +17,32 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_nrms(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -69,35 +68,34 @@ def test_model_nrms(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_naml(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -123,33 +121,32 @@ def test_model_naml(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_lstur(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -175,33 +172,32 @@ def test_model_lstur(mind_resource_path):
     )
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_model_npa(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
diff --git a/tests/smoke/recommenders/recommender/test_newsrec_utils.py b/tests/smoke/recommenders/recommender/test_newsrec_utils.py
index 6c65a6c9b5..08825e828c 100644
--- a/tests/smoke/recommenders/recommender/test_newsrec_utils.py
+++ b/tests/smoke/recommenders/recommender/test_newsrec_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
@@ -13,33 +14,32 @@
     pass  # disable error while collecting tests for non-gpu environments
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_news_iterator(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -69,35 +69,34 @@ def test_news_iterator(mind_resource_path):
         break
 
 
-@pytest.mark.smoke
 @pytest.mark.gpu
 def test_naml_iterator(mind_resource_path):
-    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
-    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
-    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
-    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
+    train_news_file = os.path.join(mind_resource_path, "train", "news.tsv")
+    train_behaviors_file = os.path.join(mind_resource_path, "train", "behaviors.tsv")
+    valid_news_file = os.path.join(mind_resource_path, "valid", "news.tsv")
+    valid_behaviors_file = os.path.join(mind_resource_path, "valid", "behaviors.tsv")
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(train_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "train"),
             "MINDdemo_train.zip",
         )
     if not os.path.exists(valid_news_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "valid"),
             "MINDdemo_dev.zip",
         )
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
diff --git a/tests/unit/examples/test_notebooks_gpu.py b/tests/unit/examples/test_notebooks_gpu.py
index 251ef44eb3..45073daf5f 100644
--- a/tests/unit/examples/test_notebooks_gpu.py
+++ b/tests/unit/examples/test_notebooks_gpu.py
@@ -1,13 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
-
-try:
-    import papermill as pm
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
 
 from recommenders.utils.gpu_utils import get_number_gpus
 
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 156aae7dc7..372fe6f238 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -1,13 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
+import papermill as pm
 
 from recommenders.utils.constants import (
     DEFAULT_RATING_COL,
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 37b71d591d..ed3d494fdf 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -1,15 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
-
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
-
+import papermill as pm
+import scrapbook as sb
 
 TOL = 0.05
 ABS_TOL = 0.05
@@ -60,13 +56,6 @@ def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name):
     )
 
 
-@pytest.mark.notebooks
-@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
-def test_vw_deep_dive_runs(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
-
-
 @pytest.mark.notebooks
 def test_lightgbm(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["lightgbm_quickstart"]
@@ -86,24 +75,13 @@ def test_lightgbm(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.skip(reason="Wikidata API is unstable")
-def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
-    notebook_path = notebooks["wikidata_knowledge_graph"]
-    MOVIELENS_SAMPLE_SIZE = 5
-    pm.execute_notebook(
-        notebook_path,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(
-            MOVIELENS_DATA_SIZE="100k",
-            MOVIELENS_SAMPLE=True,
-            MOVIELENS_SAMPLE_SIZE=MOVIELENS_SAMPLE_SIZE,
-        ),
-    )
+def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["cornac_bpr_deep_dive"]
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
 
 
-@pytest.mark.experimental
 @pytest.mark.notebooks
+@pytest.mark.experimental
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["rlrmc_quickstart"]
     pm.execute_notebook(
@@ -115,6 +93,8 @@ def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name):
-    notebook_path = notebooks["cornac_bpr_deep_dive"]
+@pytest.mark.experimental
+@pytest.mark.skip(reason="VW pip package has installation incompatibilities")
+def test_vw_deep_dive_runs(notebooks, output_notebook, kernel_name):
+    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
diff --git a/tests/unit/recommenders/datasets/test_dataset.py b/tests/unit/recommenders/datasets/test_download_utils.py
similarity index 94%
rename from tests/unit/recommenders/datasets/test_dataset.py
rename to tests/unit/recommenders/datasets/test_download_utils.py
index 942ac1d35f..c7d649796e 100644
--- a/tests/unit/recommenders/datasets/test_dataset.py
+++ b/tests/unit/recommenders/datasets/test_download_utils.py
@@ -4,14 +4,17 @@
 import os
 import pytest
 import requests
-from tempfile import TemporaryDirectory
 import logging
+from tempfile import TemporaryDirectory
+
 from recommenders.datasets.download_utils import maybe_download, download_path
 
 
 @pytest.fixture
 def files_fixtures():
-    file_url = "https://raw.githubusercontent.com/Microsoft/Recommenders/main/LICENSE"
+    file_url = (
+        "https://raw.githubusercontent.com/recommenders-team/recommenders/main/LICENSE"
+    )
     filepath = "license.txt"
     return file_url, filepath
 
@@ -21,7 +24,7 @@ def test_maybe_download(files_fixtures):
     if os.path.exists(filepath):
         os.remove(filepath)
 
-    downloaded_filepath = maybe_download(file_url, "license.txt", expected_bytes=1162)
+    downloaded_filepath = maybe_download(file_url, "license.txt", expected_bytes=1212)
     assert os.path.exists(downloaded_filepath)
     assert os.path.basename(downloaded_filepath) == "license.txt"
 
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/unit/recommenders/datasets/test_movielens.py
deleted file mode 100644
index 81ba7e02ca..0000000000
--- a/tests/unit/recommenders/datasets/test_movielens.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import os
-import pandas
-import pytest
-
-from recommenders.datasets.movielens import MockMovielensSchema
-from recommenders.datasets.movielens import load_pandas_df, load_spark_df
-from recommenders.datasets.movielens import (
-    DATA_FORMAT,
-    MOCK_DATA_FORMAT,
-    DEFAULT_HEADER,
-    DEFAULT_ITEM_COL,
-    DEFAULT_USER_COL
-)
-from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
-
-from pandas.core.series import Series
-from pytest_mock import MockerFixture
-
-
-@pytest.mark.parametrize("size", [10, 100])
-def test_mock_movielens_schema__has_default_col_names(size):
-    df = MockMovielensSchema.example(size=size)
-    for col_name in DEFAULT_HEADER:
-        assert col_name in df.columns
-
-
-@pytest.mark.parametrize("keep_first_n_cols", [2, 3, 4])
-def test_mock_movielens_schema__get_df_remove_default_col__return_success(
-    keep_first_n_cols,
-):
-    df = MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
-    assert len(df) > 0
-    assert len(df.columns) == keep_first_n_cols
-
-
-@pytest.mark.parametrize("keep_first_n_cols", [-1, 0, 100])
-def test_mock_movielens_schema__get_df_invalid_param__return_failure(keep_first_n_cols):
-    with pytest.raises(ValueError, match=r"Invalid value.*"):
-        MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
-
-
-@pytest.mark.parametrize("keep_genre_col", [True, False])
-@pytest.mark.parametrize("keep_title_col", [True, False])
-@pytest.mark.parametrize("keep_first_n_cols", [None, 2])
-@pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
-@pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_df__return_success(
-    size, seed, keep_first_n_cols, keep_title_col, keep_genre_col
-):
-    df = MockMovielensSchema.get_df(
-        size=size,
-        seed=seed,
-        keep_first_n_cols=keep_first_n_cols,
-        keep_title_col=keep_title_col,
-        keep_genre_col=keep_genre_col,
-    )
-    assert type(df) == pandas.DataFrame
-    assert len(df) == size
-
-    if keep_title_col:
-        assert len(df[DEFAULT_TITLE_COL]) == size
-    if keep_genre_col:
-        assert len(df[DEFAULT_GENRE_COL]) == size
-
-
-@pytest.mark.spark
-@pytest.mark.parametrize("keep_genre_col", [True, False])
-@pytest.mark.parametrize("keep_title_col", [True, False])
-@pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
-@pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_spark_df__return_success(
-    spark, size, seed, keep_title_col, keep_genre_col
-):
-    df = MockMovielensSchema.get_spark_df(
-        spark,
-        size=size,
-        seed=seed,
-        keep_title_col=keep_title_col,
-        keep_genre_col=keep_genre_col,
-    )
-    assert df.count() == size
-
-    if keep_title_col:
-        assert df.schema[DEFAULT_TITLE_COL]
-    if keep_genre_col:
-        assert df.schema[DEFAULT_GENRE_COL]
-
-
-@pytest.mark.spark
-def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
-    data_size = 3
-    MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
-    assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
-
-
-@pytest.mark.spark
-def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(
-    spark, mocker: MockerFixture
-):
-    data_size = 3
-    to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")
-
-    df = MockMovielensSchema.get_spark_df(spark, size=data_size)
-    # assertions
-    to_csv_spy.assert_called_once()
-    assert df.count() == data_size
-
-
-def test_mock_movielens_data__no_name_collision():
-    """
-    Making sure that no common names are shared between the mock and real dataset sizes
-    """
-    dataset_name = set(DATA_FORMAT.keys())
-    dataset_name_mock = set(MOCK_DATA_FORMAT.keys())
-    collision = dataset_name.intersection(dataset_name_mock)
-    assert not collision
-
-
-@pytest.mark.spark
-def test_load_spark_df_mock_100__with_default_param__succeed(spark):
-    df = load_spark_df(spark, "mock100")
-    assert df.count() == 100
-
-
-def test_load_pandas_df_mock_100__with_default_param__succeed():
-    df = load_pandas_df("mock100")
-    assert type(df) == pandas.DataFrame
-    assert len(df) == 100
-    assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()
-
-
-@pytest.mark.spark
-def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
-    df = load_spark_df(
-        spark, "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
-    )
-    assert df.schema[DEFAULT_TITLE_COL]
-    assert df.schema[DEFAULT_GENRE_COL]
-    assert df.count() == 100
-    assert "|" in df.take(1)[0][DEFAULT_GENRE_COL]
-    assert df.take(1)[0][DEFAULT_TITLE_COL] == "foo"
-
-
-def test_load_pandas_df_mock_100__with_custom_param__succeed():
-    df = load_pandas_df(
-        "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
-    )
-    assert type(df[DEFAULT_TITLE_COL]) == Series
-    assert type(df[DEFAULT_GENRE_COL]) == Series
-    assert len(df) == 100
-    assert "|" in df.loc[0, DEFAULT_GENRE_COL]
-    assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
diff --git a/tests/unit/recommenders/datasets/test_pandas_df_utils.py b/tests/unit/recommenders/datasets/test_pandas_df_utils.py
index d414936989..7fe502d188 100644
--- a/tests/unit/recommenders/datasets/test_pandas_df_utils.py
+++ b/tests/unit/recommenders/datasets/test_pandas_df_utils.py
@@ -1,11 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+import os
 import numpy as np
 import pandas as pd
 import pytest
 from tempfile import TemporaryDirectory
-import os
 
 from recommenders.datasets.pandas_df_utils import (
     filter_by,
diff --git a/tests/unit/recommenders/datasets/test_python_splitter.py b/tests/unit/recommenders/datasets/test_python_splitter.py
index 16f4fd3b21..39e622ea97 100644
--- a/tests/unit/recommenders/datasets/test_python_splitter.py
+++ b/tests/unit/recommenders/datasets/test_python_splitter.py
@@ -1,15 +1,21 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import pandas as pd
 import numpy as np
-import pytest
 
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+)
 from recommenders.datasets.split_utils import (
     min_rating_filter_pandas,
     split_pandas_data_with_ratios,
 )
-
 from recommenders.datasets.python_splitters import (
     python_chrono_split,
     python_random_split,
@@ -17,13 +23,6 @@
     numpy_stratified_split,
 )
 
-from recommenders.utils.constants import (
-    DEFAULT_USER_COL,
-    DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
-)
-
 
 @pytest.fixture(scope="module")
 def test_specs():
diff --git a/tests/unit/recommenders/datasets/test_spark_splitter.py b/tests/unit/recommenders/datasets/test_spark_splitter.py
index 9051d84440..9f6d402544 100644
--- a/tests/unit/recommenders/datasets/test_spark_splitter.py
+++ b/tests/unit/recommenders/datasets/test_spark_splitter.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
@@ -48,6 +49,25 @@ def spark_dataset(spark):
     )
 
 
+def _if_later(data1, data2):
+    """Helper function to test if records in data1 are earlier than that in data2.
+    Returns:
+        bool: True or False indicating if data1 is earlier than data2.
+    """
+
+    max_times = data1.groupBy(DEFAULT_USER_COL).agg(
+        F.max(DEFAULT_TIMESTAMP_COL).alias("max")
+    )
+    min_times = data2.groupBy(DEFAULT_USER_COL).agg(
+        F.min(DEFAULT_TIMESTAMP_COL).alias("min")
+    )
+    all_times = max_times.join(min_times, on=DEFAULT_USER_COL).select(
+        (F.col("max") <= F.col("min"))
+    )
+
+    return all([x[0] for x in all_times.collect()])
+
+
 @pytest.mark.spark
 def test_min_rating_filter(spark_dataset):
     dfs_user = min_rating_filter_spark(spark_dataset, min_rating=5, filter_by="user")
@@ -190,22 +210,3 @@ def test_timestamp_splitter(spark_dataset):
     max_split1 = splits[1].agg(F.max(DEFAULT_TIMESTAMP_COL)).first()[0]
     min_split2 = splits[2].agg(F.min(DEFAULT_TIMESTAMP_COL)).first()[0]
     assert max_split1 <= min_split2
-
-
-def _if_later(data1, data2):
-    """Helper function to test if records in data1 are earlier than that in data2.
-    Returns:
-        bool: True or False indicating if data1 is earlier than data2.
-    """
-
-    max_times = data1.groupBy(DEFAULT_USER_COL).agg(
-        F.max(DEFAULT_TIMESTAMP_COL).alias("max")
-    )
-    min_times = data2.groupBy(DEFAULT_USER_COL).agg(
-        F.min(DEFAULT_TIMESTAMP_COL).alias("min")
-    )
-    all_times = max_times.join(min_times, on=DEFAULT_USER_COL).select(
-        (F.col("max") <= F.col("min"))
-    )
-
-    return all([x[0] for x in all_times.collect()])
diff --git a/tests/unit/recommenders/datasets/test_sparse.py b/tests/unit/recommenders/datasets/test_sparse.py
index 47ae4c5d5f..e4df1b9be0 100644
--- a/tests/unit/recommenders/datasets/test_sparse.py
+++ b/tests/unit/recommenders/datasets/test_sparse.py
@@ -80,7 +80,7 @@ def random_date_generator(start_date, range_in_days):
     return results
 
 
-def test_df_to_sparse(test_specs, python_dataset):
+def test_df_to_sparse(python_dataset):
     # initialize the splitter
     header = {
         "col_user": DEFAULT_USER_COL,
@@ -100,7 +100,7 @@ def test_df_to_sparse(test_specs, python_dataset):
     )
 
 
-def test_sparse_to_df(test_specs, python_dataset):
+def test_sparse_to_df(python_dataset):
     # initialize the splitter
     header = {
         "col_user": DEFAULT_USER_COL,
@@ -115,20 +115,20 @@ def test_sparse_to_df(test_specs, python_dataset):
     X, _, _ = am.gen_affinity_matrix()
 
     # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
-    DF = am.map_back_sparse(X, kind="ratings")
+    df = am.map_back_sparse(X, kind="ratings")
 
     # tests: check that the two dataframes have the same elements in the same positions.
     assert (
-        DF.userID.values.all()
+        df.userID.values.all()
         == python_dataset.sort_values(by=["userID"]).userID.values.all()
     )
 
     assert (
-        DF.itemID.values.all()
+        df.itemID.values.all()
         == python_dataset.sort_values(by=["userID"]).itemID.values.all()
     )
 
     assert (
-        DF.rating.values.all()
+        df.rating.values.all()
         == python_dataset.sort_values(by=["userID"]).rating.values.all()
     )
diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
index e5837fc663..72ac95ead4 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -40,6 +41,7 @@
     distributional_coverage,
 )
 
+
 TOL = 0.0001
 
 
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 9cf35ee3ec..ada8c02e9e 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 from pandas.util.testing import assert_frame_equal
 
 from recommenders.evaluation.python_evaluation import (
diff --git a/tests/unit/recommenders/models/test_cornac_utils.py b/tests/unit/recommenders/models/test_cornac_utils.py
index dad7f3e446..51dde4a8fd 100644
--- a/tests/unit/recommenders/models/test_cornac_utils.py
+++ b/tests/unit/recommenders/models/test_cornac_utils.py
@@ -2,8 +2,8 @@
 # Licensed under the MIT License.
 
 
-import pandas as pd
 import pytest
+import pandas as pd
 import cornac
 
 from recommenders.utils.constants import (
@@ -14,6 +14,7 @@
 from recommenders.models.cornac.cornac_utils import predict, predict_ranking
 from recommenders.evaluation.python_evaluation import mae, rmse, ndcg_at_k, recall_at_k
 
+
 TOL = 0.001
 
 
diff --git a/tests/unit/recommenders/models/test_deeprec_model.py b/tests/unit/recommenders/models/test_deeprec_model.py
index 6bd672f417..8207e0bf16 100644
--- a/tests/unit/recommenders/models/test_deeprec_model.py
+++ b/tests/unit/recommenders/models/test_deeprec_model.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
diff --git a/tests/unit/recommenders/models/test_deeprec_utils.py b/tests/unit/recommenders/models/test_deeprec_utils.py
index d54e470871..310e4ef3a3 100644
--- a/tests/unit/recommenders/models/test_deeprec_utils.py
+++ b/tests/unit/recommenders/models/test_deeprec_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 
diff --git a/tests/unit/recommenders/models/test_geoimc.py b/tests/unit/recommenders/models/test_geoimc.py
index fd4dfc4da1..0eabc339d9 100644
--- a/tests/unit/recommenders/models/test_geoimc.py
+++ b/tests/unit/recommenders/models/test_geoimc.py
@@ -1,12 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-try:
-    import collections
-    import pytest
-    import numpy as np
-    from scipy.sparse import csr_matrix
 
+import collections
+import pytest
+import numpy as np
+from scipy.sparse import csr_matrix
+
+try:
     from recommenders.models.geoimc.geoimc_data import DataPtr
     from recommenders.models.geoimc.geoimc_predict import Inferer
     from recommenders.models.geoimc.geoimc_algorithm import IMCProblem
@@ -19,6 +20,7 @@
 except:
     pass  # skip if pymanopt not installed
 
+
 _IMC_TEST_DATA = [
     (
         csr_matrix(np.array([[1, 5, 3], [7, 2, 1]])),
diff --git a/tests/unit/recommenders/models/test_lightfm_utils.py b/tests/unit/recommenders/models/test_lightfm_utils.py
index c0990f2055..2155fb6559 100644
--- a/tests/unit/recommenders/models/test_lightfm_utils.py
+++ b/tests/unit/recommenders/models/test_lightfm_utils.py
@@ -1,18 +1,21 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import pytest
 import itertools
 import numpy as np
 import pandas as pd
-from lightfm import LightFM, cross_validation
 from lightfm.data import Dataset
+from lightfm import LightFM, cross_validation
+
 from recommenders.models.lightfm.lightfm_utils import (
     track_model_metrics,
     similar_users,
     similar_items,
 )
 
+
 SEEDNO = 42
 TEST_PERCENTAGE = 0.25
 TEST_USER_ID = 2
@@ -133,6 +136,7 @@ def test_interactions(interactions):
     assert user_features.shape == (10, 17)
 
 
+@pytest.mark.skip(reason="Flaky test")
 def test_fitting(fitting):
     output, _ = fitting
     assert output.shape == (4, 4)
diff --git a/tests/unit/recommenders/models/test_ncf_dataset.py b/tests/unit/recommenders/models/test_ncf_dataset.py
index 4a148a13d3..b48554ae84 100644
--- a/tests/unit/recommenders/models/test_ncf_dataset.py
+++ b/tests/unit/recommenders/models/test_ncf_dataset.py
@@ -1,9 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 import pandas as pd
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
diff --git a/tests/unit/recommenders/models/test_ncf_singlenode.py b/tests/unit/recommenders/models/test_ncf_singlenode.py
index 80531d2939..918bd368d9 100644
--- a/tests/unit/recommenders/models/test_ncf_singlenode.py
+++ b/tests/unit/recommenders/models/test_ncf_singlenode.py
@@ -1,11 +1,12 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import shutil
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
 
 try:
     from recommenders.models.ncf.ncf_singlenode import NCF
diff --git a/tests/unit/recommenders/models/test_newsrec_model.py b/tests/unit/recommenders/models/test_newsrec_model.py
index bb2dab7b5b..ac3c751a44 100644
--- a/tests/unit/recommenders/models/test_newsrec_model.py
+++ b/tests/unit/recommenders/models/test_newsrec_model.py
@@ -22,11 +22,11 @@ def test_nrms_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "nrms.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -54,11 +54,11 @@ def test_naml_component_definition(mind_resource_path):
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
     vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
     subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "naml.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -86,11 +86,11 @@ def test_npa_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "npa.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "utils"),
             "MINDdemo_utils.zip",
         )
@@ -116,11 +116,11 @@ def test_lstur_component_definition(mind_resource_path):
     wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
     userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
     wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
-    yaml_file = os.path.join(mind_resource_path, "utils", r"lstur.yaml")
+    yaml_file = os.path.join(mind_resource_path, "utils", "lstur.yaml")
 
     if not os.path.exists(yaml_file):
         download_deeprec_resources(
-            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
+            "https://recodatasets.z20.web.core.windows.net/newsrec/",
             os.path.join(mind_resource_path, "mind", "utils"),
             "MINDdemo_utils.zip",
         )
diff --git a/tests/unit/recommenders/models/test_rbm.py b/tests/unit/recommenders/models/test_rbm.py
index d9430d8cfd..7f0174e472 100644
--- a/tests/unit/recommenders/models/test_rbm.py
+++ b/tests/unit/recommenders/models/test_rbm.py
@@ -1,8 +1,9 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import numpy as np
+
 import pytest
+import numpy as np
 
 try:
     from recommenders.models.rbm.rbm import RBM
diff --git a/tests/unit/recommenders/models/test_sar_singlenode.py b/tests/unit/recommenders/models/test_sar_singlenode.py
index c5fe4ab1c6..19e79b2337 100644
--- a/tests/unit/recommenders/models/test_sar_singlenode.py
+++ b/tests/unit/recommenders/models/test_sar_singlenode.py
@@ -1,9 +1,10 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import itertools
+
 import json
 import pytest
+import itertools
 import numpy as np
 import pandas as pd
 from pandas.testing import assert_frame_equal
@@ -250,7 +251,6 @@ def test_recommend_k_items(
 
 
 def test_get_item_based_topk(header, pandas_dummy):
-
     sar = SAR(**header)
     sar.fit(pandas_dummy)
 
@@ -299,7 +299,6 @@ def test_get_item_based_topk(header, pandas_dummy):
 
 
 def test_get_popularity_based_topk(header):
-
     train_df = pd.DataFrame(
         {
             header["col_user"]: [1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4],
diff --git a/tests/unit/recommenders/models/test_sasrec_model.py b/tests/unit/recommenders/models/test_sasrec_model.py
index 7f3cde7673..0e85d5f836 100644
--- a/tests/unit/recommenders/models/test_sasrec_model.py
+++ b/tests/unit/recommenders/models/test_sasrec_model.py
@@ -1,8 +1,9 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import pytest
+
 import os
+import pytest
 from collections import defaultdict
 
 try:
@@ -15,7 +16,6 @@
         download_and_extract,
         _reviews_preprocessing,
     )
-
 except ImportError:
     pass  # skip if in cpu environment
 
@@ -180,7 +180,6 @@ def test_sampler():
 
 @pytest.mark.gpu
 def test_sasrec(model_parameters):
-
     params = model_parameters
 
     model = SASREC(
@@ -202,7 +201,6 @@ def test_sasrec(model_parameters):
 
 @pytest.mark.gpu
 def test_ssept(model_parameters):
-
     params = model_parameters
 
     model = SSEPT(
diff --git a/tests/unit/recommenders/models/test_surprise_utils.py b/tests/unit/recommenders/models/test_surprise_utils.py
index 2568fc4acc..c80e63818d 100644
--- a/tests/unit/recommenders/models/test_surprise_utils.py
+++ b/tests/unit/recommenders/models/test_surprise_utils.py
@@ -2,22 +2,20 @@
 # Licensed under the MIT License.
 
 
-try:
-    import pandas as pd
-    import pytest
-    import surprise
-
-    from recommenders.utils.constants import (
-        DEFAULT_USER_COL,
-        DEFAULT_ITEM_COL,
-        DEFAULT_RATING_COL,
-    )
-    from recommenders.models.surprise.surprise_utils import (
-        predict,
-        compute_ranking_predictions,
-    )
-except:
-    pass  # skip if surprise not installed
+import pytest
+import pandas as pd
+import surprise
+
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+)
+from recommenders.models.surprise.surprise_utils import (
+    predict,
+    compute_ranking_predictions,
+)
+
 
 TOL = 0.001
 
diff --git a/tests/unit/recommenders/models/test_tfidf_utils.py b/tests/unit/recommenders/models/test_tfidf_utils.py
index 58450b138d..a52047950f 100644
--- a/tests/unit/recommenders/models/test_tfidf_utils.py
+++ b/tests/unit/recommenders/models/test_tfidf_utils.py
@@ -1,10 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import pytest
-from recommenders.models.tfidf.tfidf_utils import TfidfRecommender
-import pandas as pd
 import scipy
+import pandas as pd
+
+from recommenders.models.tfidf.tfidf_utils import TfidfRecommender
+
 
 CLEAN_COL = "cleaned_text"
 K = 2
@@ -51,6 +54,20 @@ def model():
     return TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
 
 
+@pytest.fixture(scope="module")
+def df_clean(model, df):
+    return model.clean_dataframe(df, ["abstract", "full_text"], new_col_name=CLEAN_COL)
+
+
+@pytest.fixture(scope="module")
+def model_fit(model, df_clean):
+    model_fit = TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
+    tf, vectors_tokenized = model_fit.tokenize_text(df_clean)
+    model_fit.fit(tf, vectors_tokenized)
+
+    return model_fit
+
+
 def test_init(model):
     assert model.id_col == "cord_uid"
     assert model.tokenization_method == "scibert"
@@ -69,11 +86,6 @@ def test_clean_dataframe(model, df):
     assert False not in isalphanumeric
 
 
-@pytest.fixture(scope="module")
-def df_clean(model, df):
-    return model.clean_dataframe(df, ["abstract", "full_text"], new_col_name=CLEAN_COL)
-
-
 def test_tokenize_text(model, df_clean):
     _, vectors_tokenized = model.tokenize_text(df_clean)
     assert True not in list(df_clean[CLEAN_COL] == vectors_tokenized)
@@ -85,15 +97,6 @@ def test_fit(model, df_clean):
     assert type(model.tfidf_matrix) == scipy.sparse.csr.csr_matrix
 
 
-@pytest.fixture(scope="module")
-def model_fit(model, df_clean):
-    model_fit = TfidfRecommender(id_col="cord_uid", tokenization_method="scibert")
-    tf, vectors_tokenized = model_fit.tokenize_text(df_clean)
-    model_fit.fit(tf, vectors_tokenized)
-
-    return model_fit
-
-
 def test_get_tokens(model_fit):
     tokens = model_fit.get_tokens()
     assert type(tokens) == dict
diff --git a/tests/unit/recommenders/models/test_vowpal_wabbit.py b/tests/unit/recommenders/models/test_vowpal_wabbit.py
index b8c07c1867..0db5bdd704 100644
--- a/tests/unit/recommenders/models/test_vowpal_wabbit.py
+++ b/tests/unit/recommenders/models/test_vowpal_wabbit.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 from unittest import mock
@@ -23,7 +24,7 @@ def model():
     del model
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_vw_init_del():
     model = VW()
     tempdir = model.tempdir.name
@@ -33,7 +34,7 @@ def test_vw_init_del():
     assert not os.path.exists(tempdir)
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_to_vw_cmd():
     expected = [
         "vw",
@@ -60,7 +61,7 @@ def test_to_vw_cmd():
     assert VW.to_vw_cmd(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_parse_train_cmd(model):
     expected = [
         "vw",
@@ -77,7 +78,7 @@ def test_parse_train_cmd(model):
     assert model.parse_train_params(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_parse_test_cmd(model):
     expected = [
         "vw",
@@ -98,7 +99,7 @@ def test_parse_test_cmd(model):
     assert model.parse_test_params(params=params) == expected
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_to_vw_file(model, df):
     expected = ["1 0|user 1 |item 8", "5 1|user 3 |item 7", "3 2|user 2 |item 7"]
     model.to_vw_file(df, train=True)
@@ -107,7 +108,7 @@ def test_to_vw_file(model, df):
     del model
 
 
-@pytest.mark.vw
+@pytest.mark.experimental
 def test_fit_and_predict(model, df):
     # generate fake predictions
     with open(model.prediction_file, "w") as f:
diff --git a/tests/unit/recommenders/models/test_wide_deep_utils.py b/tests/unit/recommenders/models/test_wide_deep_utils.py
index 85522f5e4f..2d4168e99f 100644
--- a/tests/unit/recommenders/models/test_wide_deep_utils.py
+++ b/tests/unit/recommenders/models/test_wide_deep_utils.py
@@ -1,9 +1,11 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import os
 import pytest
 import pandas as pd
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
diff --git a/tests/unit/recommenders/tuning/test_ncf_utils.py b/tests/unit/recommenders/tuning/test_ncf_utils.py
index 1cfb334cd2..3f2039bc53 100644
--- a/tests/unit/recommenders/tuning/test_ncf_utils.py
+++ b/tests/unit/recommenders/tuning/test_ncf_utils.py
@@ -1,17 +1,21 @@
-import pytest
+# Copyright (c) Recommenders contributors.
+# Licensed under the MIT License.
+
 
+import pytest
 from unittest.mock import Mock
 
 from recommenders.tuning.nni.ncf_utils import compute_test_results
 from recommenders.datasets.movielens import MockMovielensSchema
 
+
 DATA_SIZE = 1  # setting to 1 so all IDs are unique
 
 
 @pytest.fixture(scope="module")
 def mock_model():
     def mock_predict(*args, is_list=False):
-        """ Mock model predict method"""
+        """Mock model predict method"""
         if is_list:
             return [0] * DATA_SIZE
         else:
diff --git a/tests/unit/recommenders/tuning/test_nni_utils.py b/tests/unit/recommenders/tuning/test_nni_utils.py
index ca7f2e4886..bdd5fb166c 100644
--- a/tests/unit/recommenders/tuning/test_nni_utils.py
+++ b/tests/unit/recommenders/tuning/test_nni_utils.py
@@ -1,12 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import json
+
 import os
 import sys
-from tempfile import TemporaryDirectory
-from unittest.mock import patch
+import json
 import pytest
+from unittest.mock import patch
+from tempfile import TemporaryDirectory
 
 from recommenders.tuning.nni.nni_utils import (
     get_experiment_status,
@@ -52,6 +53,7 @@ def mock_exception():
     raise Exception()
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_get_experiment_status():
     content = "some_status"
@@ -64,6 +66,7 @@ def test_get_experiment_status():
         assert nni_status["errors"] == [""]
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_done():
     content = "DONE"
@@ -74,6 +77,7 @@ def test_check_experiment_status_done():
         check_experiment_status(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_tuner_no_more_trial():
     content = "TUNER_NO_MORE_TRIAL"
@@ -84,6 +88,7 @@ def test_check_experiment_status_tuner_no_more_trial():
         check_experiment_status(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_running():
     content = "RUNNING"
@@ -97,6 +102,7 @@ def test_check_experiment_status_running():
     assert "check_experiment_status() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_no_more_trial():
     content = "NO_MORE_TRIAL"
@@ -110,6 +116,7 @@ def test_check_experiment_status_no_more_trial():
     assert "check_experiment_status() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_experiment_status_failed():
     content = "some_failed_status"
@@ -126,6 +133,7 @@ def test_check_experiment_status_failed():
     )
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_stopped_timeout():
     content = "some_status"
@@ -139,12 +147,14 @@ def test_check_stopped_timeout():
     assert "check_stopped() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_stopped():
     with patch("requests.get", side_effect=mock_exception):
         check_stopped(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_metrics_written():
     content = [{"finalMetricData": None}, {"finalMetricData": None}]
@@ -152,6 +162,7 @@ def test_check_metrics_written():
         check_metrics_written(wait=0.1, max_retries=1)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_check_metrics_written_timeout():
     content = [{"logPath": "/p"}, {"logPath": "/q"}]
@@ -163,6 +174,7 @@ def test_check_metrics_written_timeout():
     assert "check_metrics_written() timed out" == str(excinfo.value)
 
 
+@pytest.mark.experimental
 @pytest.mark.skipif(sys.platform == "win32", reason="nni not installable on windows")
 def test_get_trials():
     with TemporaryDirectory() as tmp_dir1, TemporaryDirectory() as tmp_dir2:
diff --git a/tests/unit/recommenders/tuning/test_sweep.py b/tests/unit/recommenders/tuning/test_sweep.py
index ae3f67d52a..640eb46e7d 100644
--- a/tests/unit/recommenders/tuning/test_sweep.py
+++ b/tests/unit/recommenders/tuning/test_sweep.py
@@ -9,9 +9,7 @@
 
 @pytest.fixture(scope="module")
 def parameter_dictionary():
-    params = {"param1": [1, 2, 3], "param2": [4, 5, 6], "param3": 1}
-
-    return params
+    return {"param1": [1, 2, 3], "param2": [4, 5, 6], "param3": 1}
 
 
 def test_param_sweep(parameter_dictionary):
diff --git a/tests/unit/recommenders/utils/test_general_utils.py b/tests/unit/recommenders/utils/test_general_utils.py
index 9bca7eac97..c8ca548744 100644
--- a/tests/unit/recommenders/utils/test_general_utils.py
+++ b/tests/unit/recommenders/utils/test_general_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 from recommenders.utils.general_utils import invert_dictionary, get_number_processors
 
 
diff --git a/tests/unit/recommenders/utils/test_gpu_utils.py b/tests/unit/recommenders/utils/test_gpu_utils.py
index 4bbbac54fc..8d361b61fa 100644
--- a/tests/unit/recommenders/utils/test_gpu_utils.py
+++ b/tests/unit/recommenders/utils/test_gpu_utils.py
@@ -1,24 +1,23 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import sys
 import pytest
 
 try:
     import tensorflow as tf
     import torch
+    from recommenders.utils.gpu_utils import (
+        get_cuda_version,
+        get_cudnn_version,
+        get_gpu_info,
+        get_number_gpus,
+    )
 except ImportError:
     pass  # skip this import if we are in cpu environment
 
 
-from recommenders.utils.gpu_utils import (
-    get_cuda_version,
-    get_cudnn_version,
-    get_gpu_info,
-    get_number_gpus,
-)
-
-
 @pytest.mark.gpu
 def test_get_gpu_info():
     assert len(get_gpu_info()) >= 1
@@ -38,12 +37,12 @@ def test_clear_memory_all_gpus():
 @pytest.mark.gpu
 @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
 def test_get_cuda_version():
-    assert get_cuda_version() > "9.0.0"
+    assert int(get_cuda_version().split(".")[0]) > 9
 
 
 @pytest.mark.gpu
 def test_get_cudnn_version():
-    assert get_cudnn_version() > "7.0.0"
+    assert int(get_cudnn_version()[0]) > 7
 
 
 @pytest.mark.gpu
@@ -52,8 +51,9 @@ def test_cudnn_enabled():
 
 
 @pytest.mark.gpu
+@pytest.mark.skip(reason="This function in TF is flaky")
 def test_tensorflow_gpu():
-    assert tf.test.is_gpu_available()
+    assert len(tf.config.list_physical_devices("GPU")) > 0
 
 
 @pytest.mark.gpu
diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py
index 4a0484d4fa..24223b703f 100644
--- a/tests/unit/recommenders/utils/test_notebook_utils.py
+++ b/tests/unit/recommenders/utils/test_notebook_utils.py
@@ -1,14 +1,12 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-from pathlib import Path
+
 import pytest
+import papermill as pm
+import scrapbook as sb
+from pathlib import Path
 
-try:
-    import papermill as pm
-    import scrapbook as sb
-except ImportError:
-    pass  # disable error while collecting tests for non-notebook environments
 from recommenders.utils.notebook_utils import is_jupyter, is_databricks
 
 
@@ -33,6 +31,8 @@ def test_is_jupyter(output_notebook, kernel_name):
     assert not result_is_databricks
 
 
-# @pytest.mark.notebooks
-# def test_is_databricks():
-#     TODO Currently, we cannot pytest modules on Databricks
+@pytest.mark.spark
+@pytest.mark.notebooks
+@pytest.mark.skip(reason="TODO: Implement this")
+def test_is_databricks():
+    pass
diff --git a/tests/unit/recommenders/utils/test_plot.py b/tests/unit/recommenders/utils/test_plot.py
index e2f2653307..471889ef2d 100644
--- a/tests/unit/recommenders/utils/test_plot.py
+++ b/tests/unit/recommenders/utils/test_plot.py
@@ -1,6 +1,7 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
+
 import matplotlib
 import matplotlib.pyplot as plt
 from recommenders.utils.plot import line_graph
@@ -19,6 +20,8 @@ def test_line_graph():
         y_name="Accuracy",
         legend_loc="best",
     )
+    assert plt.gca().get_xlabel() == "Epoch"
+    assert plt.gca().get_ylabel() == "Accuracy"
     plt.close()
 
     # Single graph as a subplot
diff --git a/tests/unit/recommenders/utils/test_python_utils.py b/tests/unit/recommenders/utils/test_python_utils.py
index e29eb81c76..0ee806a1e1 100644
--- a/tests/unit/recommenders/utils/test_python_utils.py
+++ b/tests/unit/recommenders/utils/test_python_utils.py
@@ -2,8 +2,8 @@
 # Licensed under the MIT License.
 
 
-import numpy as np
 import pytest
+import numpy as np
 
 from recommenders.utils.python_utils import (
     exponential_decay,
@@ -14,6 +14,7 @@
     rescale,
 )
 
+
 TOL = 0.0001
 
 
diff --git a/tests/unit/recommenders/utils/test_tf_utils.py b/tests/unit/recommenders/utils/test_tf_utils.py
index 07dae225c3..f064797f54 100644
--- a/tests/unit/recommenders/utils/test_tf_utils.py
+++ b/tests/unit/recommenders/utils/test_tf_utils.py
@@ -1,11 +1,13 @@
 # Copyright (c) Recommenders contributors.
 # Licensed under the MIT License.
 
-import itertools
+
 import os
+import pytest
 import numpy as np
 import pandas as pd
-import pytest
+import itertools
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
diff --git a/tests/unit/recommenders/utils/test_timer.py b/tests/unit/recommenders/utils/test_timer.py
index 65bb3ae70a..35d174f9ab 100644
--- a/tests/unit/recommenders/utils/test_timer.py
+++ b/tests/unit/recommenders/utils/test_timer.py
@@ -2,8 +2,9 @@
 # Licensed under the MIT License.
 
 
-import pytest
 import time
+import pytest
+
 from recommenders.utils.timer import Timer