diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py index e2e122835f..edfa7832e9 100644 --- a/tests/ci/azureml_tests/test_groups.py +++ b/tests/ci/azureml_tests/test_groups.py @@ -204,13 +204,6 @@ ], "group_gpu_001": [ # Total group time: 492.62s "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) - "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", - "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition", - "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition", - # "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", # FIXME: Issue #1953 - # "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition", # FIXME: Issue #1953 - # "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Issue #1953 - "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition", "tests/unit/recommenders/models/test_rbm.py::test_sampling_funct", "tests/unit/recommenders/models/test_rbm.py::test_train_param_init", "tests/unit/recommenders/models/test_rbm.py::test_save_load", @@ -237,9 +230,19 @@ "tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column", # "tests/unit/recommenders/models/test_sasrec_model.py::test_prepare_data", # FIXME: it takes too long to run # "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run - #"tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run + # "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run # "tests/unit/recommenders/models/test_sasrec_model.py::test_ssept", # FIXME: it takes too long to run ], + "group_gpu_002": [ # Total group time: + "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) + "tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", + "tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition", + ], "group_notebooks_gpu_001": [ # Total group time: 563.35s "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) "tests/unit/examples/test_notebooks_gpu.py::test_dkn_quickstart", diff --git a/tests/unit/recommenders/models/test_deeprec_model.py b/tests/unit/recommenders/models/test_deeprec_model.py index 127acd1f09..434d629854 100644 --- a/tests/unit/recommenders/models/test_deeprec_model.py +++ b/tests/unit/recommenders/models/test_deeprec_model.py @@ -1,8 +1,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import pytest import os +import pytest + from recommenders.datasets import movielens from recommenders.datasets.amazon_reviews import ( download_and_extract, @@ -36,36 +37,16 @@ pass # skip this import if we are in cpu environment -@pytest.mark.gpu -def test_xdeepfm_component_definition(deeprec_resource_path): - data_path = os.path.join(deeprec_resource_path, "xdeepfm") - yaml_file = os.path.join(data_path, "xDeepFM.yaml") - - if not os.path.exists(yaml_file): - download_deeprec_resources( - "https://recodatasets.z20.web.core.windows.net/deeprec/", - data_path, - "xdeepfmresources.zip", - ) - - hparams = prepare_hparams(yaml_file) - model = XDeepFMModel(hparams, FFMTextIterator) - - assert model.logit is not None - assert model.update is not None - assert model.iterator is not None - - @pytest.mark.gpu @pytest.fixture(scope="module") def dkn_files(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") - news_feature_file = os.path.join(data_path, r"doc_feature.txt") - user_history_file = os.path.join(data_path, r"user_history.txt") - wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy") - entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy") - contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy") + news_feature_file = os.path.join(data_path, "doc_feature.txt") + user_history_file = os.path.join(data_path, "user_history.txt") + wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") + entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") + contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", @@ -83,6 +64,83 @@ def dkn_files(deeprec_resource_path): ) +@pytest.mark.gpu +@pytest.fixture(scope="module") +def sequential_files(deeprec_resource_path): + data_path = os.path.join(deeprec_resource_path, "slirec") + train_file = os.path.join(data_path, "train_data") + valid_file = os.path.join(data_path, "valid_data") + test_file = os.path.join(data_path, "test_data") + user_vocab = os.path.join(data_path, "user_vocab.pkl") + item_vocab = os.path.join(data_path, "item_vocab.pkl") + cate_vocab = os.path.join(data_path, "category_vocab.pkl") + + reviews_name = "reviews_Movies_and_TV_5.json" + meta_name = "meta_Movies_and_TV.json" + reviews_file = os.path.join(data_path, reviews_name) + meta_file = os.path.join(data_path, meta_name) + + # number of negative instances with a positive instance for validation + valid_num_ngs = 4 + # number of negative instances with a positive instance for testing + test_num_ngs = 9 + # sample a small item set for training and testing here for example + sample_rate = 0.01 + + input_files = [ + reviews_file, + meta_file, + train_file, + valid_file, + test_file, + user_vocab, + item_vocab, + cate_vocab, + ] + download_and_extract(reviews_name, reviews_file) + download_and_extract(meta_name, meta_file) + data_preprocessing( + *input_files, + sample_rate=sample_rate, + valid_num_ngs=valid_num_ngs, + test_num_ngs=test_num_ngs + ) + + return ( + data_path, + user_vocab, + item_vocab, + cate_vocab, + ) + + +@pytest.mark.gpu +def test_xdeepfm_component_definition(deeprec_resource_path): + data_path = os.path.join(deeprec_resource_path, "xdeepfm") + yaml_file = os.path.join(data_path, "xDeepFM.yaml") + + if not os.path.exists(yaml_file): + download_deeprec_resources( + "https://recodatasets.z20.web.core.windows.net/deeprec/", + data_path, + "xdeepfmresources.zip", + ) + + hparams = prepare_hparams(yaml_file) + model = XDeepFMModel(hparams, FFMTextIterator) + + assert model.logit is not None + assert model.update is not None + assert model.iterator is not None + assert model.hparams is not None + assert model.hparams.model_type == "xDeepFM" + assert model.hparams.epochs == 50 + assert model.hparams.batch_size == 128 + assert model.hparams.learning_rate == 0.0005 + assert model.hparams.loss == "log_loss" + assert model.hparams.optimizer == "adam" + + @pytest.mark.gpu def test_dkn_component_definition(dkn_files): # Load params from fixture @@ -107,12 +165,18 @@ def test_dkn_component_definition(dkn_files): epochs=1, learning_rate=0.0001, ) - assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None + assert model.hparams is not None + assert model.hparams.model_type == "dkn" + assert model.hparams.epochs == 1 + assert model.hparams.batch_size == 100 + assert model.hparams.learning_rate == 0.0001 + assert model.hparams.loss == "log_loss" + assert model.hparams.optimizer == "adam" @pytest.mark.gpu @@ -143,65 +207,21 @@ def test_dkn_item2item_component_definition(dkn_files): use_entity=True, use_context=True, ) - assert hparams is not None hparams.neg_num = 9 model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator) assert model_item2item.pred_logits is not None assert model_item2item.update is not None assert model_item2item.iterator is not None - - -@pytest.mark.gpu -@pytest.fixture(scope="module") -def sequential_files(deeprec_resource_path): - data_path = os.path.join(deeprec_resource_path, "slirec") - train_file = os.path.join(data_path, r"train_data") - valid_file = os.path.join(data_path, r"valid_data") - test_file = os.path.join(data_path, r"test_data") - user_vocab = os.path.join(data_path, r"user_vocab.pkl") - item_vocab = os.path.join(data_path, r"item_vocab.pkl") - cate_vocab = os.path.join(data_path, r"category_vocab.pkl") - - reviews_name = "reviews_Movies_and_TV_5.json" - meta_name = "meta_Movies_and_TV.json" - reviews_file = os.path.join(data_path, reviews_name) - meta_file = os.path.join(data_path, meta_name) - valid_num_ngs = ( - 4 # number of negative instances with a positive instance for validation - ) - test_num_ngs = ( - 9 # number of negative instances with a positive instance for testing - ) - sample_rate = ( - 0.01 # sample a small item set for training and testing here for example - ) - - input_files = [ - reviews_file, - meta_file, - train_file, - valid_file, - test_file, - user_vocab, - item_vocab, - cate_vocab, - ] - download_and_extract(reviews_name, reviews_file) - download_and_extract(meta_name, meta_file) - data_preprocessing( - *input_files, - sample_rate=sample_rate, - valid_num_ngs=valid_num_ngs, - test_num_ngs=test_num_ngs - ) - - return ( - data_path, - user_vocab, - item_vocab, - cate_vocab, - ) + assert model_item2item.hparams is not None + assert model_item2item.hparams.model_type == "dkn" + assert model_item2item.hparams.epochs == 1 + assert model_item2item.hparams.batch_size == 100 + assert model_item2item.hparams.learning_rate == 0.0005 + assert model_item2item.hparams.loss == "log_loss" + assert model_item2item.hparams.optimizer == "adam" + assert model_item2item.hparams.max_grad_norm == 0.5 + assert model_item2item.hparams.his_size == 20 @pytest.mark.gpu @@ -223,12 +243,22 @@ def test_slirec_component_definition(sequential_files, deeprec_config_path): cate_vocab=cate_vocab, need_sample=True, ) - assert hparams is not None model = SLI_RECModel(hparams, SequentialIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None + assert model.hparams is not None + assert model.hparams.model_type == "sli_rec" + assert model.hparams.epochs == 1 + assert model.hparams.batch_size == 400 + assert model.hparams.learning_rate == 0.001 + assert model.hparams.loss == "softmax" + assert model.hparams.optimizer == "adam" + assert model.hparams.train_num_ngs == 4 + assert model.hparams.embed_l2 == 0.0 + assert model.hparams.layer_l2 == 0.0 + assert model.hparams.need_sample is True @pytest.mark.gpu @@ -251,12 +281,22 @@ def test_nextitnet_component_definition(sequential_files, deeprec_config_path): cate_vocab=cate_vocab, need_sample=True, ) - assert hparams_nextitnet is not None model_nextitnet = NextItNetModel(hparams_nextitnet, NextItNetIterator) assert model_nextitnet.logit is not None assert model_nextitnet.update is not None assert model_nextitnet.iterator is not None + assert model_nextitnet.hparams is not None + assert model_nextitnet.hparams.model_type == "NextItNet" + assert model_nextitnet.hparams.epochs == 1 + assert model_nextitnet.hparams.batch_size == 400 + assert model_nextitnet.hparams.learning_rate == 0.001 + assert model_nextitnet.hparams.loss == "softmax" + assert model_nextitnet.hparams.optimizer == "adam" + assert model_nextitnet.hparams.train_num_ngs == 4 + assert model_nextitnet.hparams.embed_l2 == 0.0 + assert model_nextitnet.hparams.layer_l2 == 0.0 + assert model_nextitnet.hparams.need_sample is True @pytest.mark.gpu @@ -279,12 +319,22 @@ def test_sum_component_definition(sequential_files, deeprec_config_path): cate_vocab=cate_vocab, need_sample=True, ) - assert hparams_sum is not None model_sum = SUMModel(hparams_sum, SequentialIterator) assert model_sum.logit is not None assert model_sum.update is not None assert model_sum.iterator is not None + assert model_sum.hparams is not None + assert model_sum.hparams.model_type == "SUM" + assert model_sum.hparams.epochs == 1 + assert model_sum.hparams.batch_size == 400 + assert model_sum.hparams.learning_rate == 0.001 + assert model_sum.hparams.loss == "softmax" + assert model_sum.hparams.optimizer == "adam" + assert model_sum.hparams.train_num_ngs == 4 + assert model_sum.hparams.embed_l2 == 0.0 + assert model_sum.hparams.layer_l2 == 0.0 + assert model_sum.hparams.need_sample is True @pytest.mark.gpu @@ -296,16 +346,17 @@ def test_lightgcn_component_definition(deeprec_config_path): data = ImplicitCF(train=train, test=test) - embed_size = 64 - hparams = prepare_hparams(yaml_file, embed_size=embed_size) + hparams = prepare_hparams(yaml_file, embed_size=64) model = LightGCN(hparams, data) assert model.norm_adj is not None - assert model.ua_embeddings.shape == [data.n_users, embed_size] - assert model.ia_embeddings.shape == [data.n_items, embed_size] + assert model.ua_embeddings.shape == [943, 64] + assert model.ia_embeddings.shape == [1682, 64] assert model.u_g_embeddings is not None assert model.pos_i_g_embeddings is not None assert model.neg_i_g_embeddings is not None assert model.batch_ratings is not None assert model.loss is not None assert model.opt is not None + assert model.batch_size == 1024 + assert model.epochs == 1000