Skip to content

Commit

Permalink
Merge pull request #1957 from microsoft/bug/deeprec_file
Browse files Browse the repository at this point in the history
Bug in Deeprec tests and adding more tests
  • Loading branch information
wutaomsft committed Jul 17, 2023
2 parents af53046 + bc10081 commit 60e06dc
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 95 deletions.
19 changes: 11 additions & 8 deletions tests/ci/azureml_tests/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,6 @@
],
"group_gpu_001": [ # Total group time: 492.62s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition",
# "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", # FIXME: Issue #1953
# "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition", # FIXME: Issue #1953
# "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Issue #1953
"tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
"tests/unit/recommenders/models/test_rbm.py::test_sampling_funct",
"tests/unit/recommenders/models/test_rbm.py::test_train_param_init",
"tests/unit/recommenders/models/test_rbm.py::test_save_load",
Expand All @@ -237,9 +230,19 @@
"tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
# "tests/unit/recommenders/models/test_sasrec_model.py::test_prepare_data", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run
#"tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_ssept", # FIXME: it takes too long to run
],
"group_gpu_002": [ # Total group time:
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
],
"group_notebooks_gpu_001": [ # Total group time: 563.35s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/examples/test_notebooks_gpu.py::test_dkn_quickstart",
Expand Down
225 changes: 138 additions & 87 deletions tests/unit/recommenders/models/test_deeprec_model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import pytest
import os
import pytest

from recommenders.datasets import movielens
from recommenders.datasets.amazon_reviews import (
download_and_extract,
Expand Down Expand Up @@ -36,36 +37,16 @@
pass # skip this import if we are in cpu environment


@pytest.mark.gpu
def test_xdeepfm_component_definition(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "xdeepfm")
yaml_file = os.path.join(data_path, "xDeepFM.yaml")

if not os.path.exists(yaml_file):
download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
data_path,
"xdeepfmresources.zip",
)

hparams = prepare_hparams(yaml_file)
model = XDeepFMModel(hparams, FFMTextIterator)

assert model.logit is not None
assert model.update is not None
assert model.iterator is not None


@pytest.mark.gpu
@pytest.fixture(scope="module")
def dkn_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "dkn")
yaml_file = os.path.join(data_path, "dkn.yaml")
news_feature_file = os.path.join(data_path, r"doc_feature.txt")
user_history_file = os.path.join(data_path, r"user_history.txt")
wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy")
news_feature_file = os.path.join(data_path, "doc_feature.txt")
user_history_file = os.path.join(data_path, "user_history.txt")
wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")

download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
Expand All @@ -83,6 +64,83 @@ def dkn_files(deeprec_resource_path):
)


@pytest.mark.gpu
@pytest.fixture(scope="module")
def sequential_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "slirec")
train_file = os.path.join(data_path, "train_data")
valid_file = os.path.join(data_path, "valid_data")
test_file = os.path.join(data_path, "test_data")
user_vocab = os.path.join(data_path, "user_vocab.pkl")
item_vocab = os.path.join(data_path, "item_vocab.pkl")
cate_vocab = os.path.join(data_path, "category_vocab.pkl")

reviews_name = "reviews_Movies_and_TV_5.json"
meta_name = "meta_Movies_and_TV.json"
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)

# number of negative instances with a positive instance for validation
valid_num_ngs = 4
# number of negative instances with a positive instance for testing
test_num_ngs = 9
# sample a small item set for training and testing here for example
sample_rate = 0.01

input_files = [
reviews_file,
meta_file,
train_file,
valid_file,
test_file,
user_vocab,
item_vocab,
cate_vocab,
]
download_and_extract(reviews_name, reviews_file)
download_and_extract(meta_name, meta_file)
data_preprocessing(
*input_files,
sample_rate=sample_rate,
valid_num_ngs=valid_num_ngs,
test_num_ngs=test_num_ngs
)

return (
data_path,
user_vocab,
item_vocab,
cate_vocab,
)


@pytest.mark.gpu
def test_xdeepfm_component_definition(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "xdeepfm")
yaml_file = os.path.join(data_path, "xDeepFM.yaml")

if not os.path.exists(yaml_file):
download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
data_path,
"xdeepfmresources.zip",
)

hparams = prepare_hparams(yaml_file)
model = XDeepFMModel(hparams, FFMTextIterator)

assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "xDeepFM"
assert model.hparams.epochs == 50
assert model.hparams.batch_size == 128
assert model.hparams.learning_rate == 0.0005
assert model.hparams.loss == "log_loss"
assert model.hparams.optimizer == "adam"


@pytest.mark.gpu
def test_dkn_component_definition(dkn_files):
# Load params from fixture
Expand All @@ -107,12 +165,18 @@ def test_dkn_component_definition(dkn_files):
epochs=1,
learning_rate=0.0001,
)
assert hparams is not None

model = DKN(hparams, DKNTextIterator)
assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "dkn"
assert model.hparams.epochs == 1
assert model.hparams.batch_size == 100
assert model.hparams.learning_rate == 0.0001
assert model.hparams.loss == "log_loss"
assert model.hparams.optimizer == "adam"


@pytest.mark.gpu
Expand Down Expand Up @@ -143,65 +207,21 @@ def test_dkn_item2item_component_definition(dkn_files):
use_entity=True,
use_context=True,
)
assert hparams is not None

hparams.neg_num = 9
model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator)
assert model_item2item.pred_logits is not None
assert model_item2item.update is not None
assert model_item2item.iterator is not None


@pytest.mark.gpu
@pytest.fixture(scope="module")
def sequential_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "slirec")
train_file = os.path.join(data_path, r"train_data")
valid_file = os.path.join(data_path, r"valid_data")
test_file = os.path.join(data_path, r"test_data")
user_vocab = os.path.join(data_path, r"user_vocab.pkl")
item_vocab = os.path.join(data_path, r"item_vocab.pkl")
cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

reviews_name = "reviews_Movies_and_TV_5.json"
meta_name = "meta_Movies_and_TV.json"
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)
valid_num_ngs = (
4 # number of negative instances with a positive instance for validation
)
test_num_ngs = (
9 # number of negative instances with a positive instance for testing
)
sample_rate = (
0.01 # sample a small item set for training and testing here for example
)

input_files = [
reviews_file,
meta_file,
train_file,
valid_file,
test_file,
user_vocab,
item_vocab,
cate_vocab,
]
download_and_extract(reviews_name, reviews_file)
download_and_extract(meta_name, meta_file)
data_preprocessing(
*input_files,
sample_rate=sample_rate,
valid_num_ngs=valid_num_ngs,
test_num_ngs=test_num_ngs
)

return (
data_path,
user_vocab,
item_vocab,
cate_vocab,
)
assert model_item2item.hparams is not None
assert model_item2item.hparams.model_type == "dkn"
assert model_item2item.hparams.epochs == 1
assert model_item2item.hparams.batch_size == 100
assert model_item2item.hparams.learning_rate == 0.0005
assert model_item2item.hparams.loss == "log_loss"
assert model_item2item.hparams.optimizer == "adam"
assert model_item2item.hparams.max_grad_norm == 0.5
assert model_item2item.hparams.his_size == 20


@pytest.mark.gpu
Expand All @@ -223,12 +243,22 @@ def test_slirec_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams is not None

model = SLI_RECModel(hparams, SequentialIterator)
assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "sli_rec"
assert model.hparams.epochs == 1
assert model.hparams.batch_size == 400
assert model.hparams.learning_rate == 0.001
assert model.hparams.loss == "softmax"
assert model.hparams.optimizer == "adam"
assert model.hparams.train_num_ngs == 4
assert model.hparams.embed_l2 == 0.0
assert model.hparams.layer_l2 == 0.0
assert model.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -251,12 +281,22 @@ def test_nextitnet_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams_nextitnet is not None

model_nextitnet = NextItNetModel(hparams_nextitnet, NextItNetIterator)
assert model_nextitnet.logit is not None
assert model_nextitnet.update is not None
assert model_nextitnet.iterator is not None
assert model_nextitnet.hparams is not None
assert model_nextitnet.hparams.model_type == "NextItNet"
assert model_nextitnet.hparams.epochs == 1
assert model_nextitnet.hparams.batch_size == 400
assert model_nextitnet.hparams.learning_rate == 0.001
assert model_nextitnet.hparams.loss == "softmax"
assert model_nextitnet.hparams.optimizer == "adam"
assert model_nextitnet.hparams.train_num_ngs == 4
assert model_nextitnet.hparams.embed_l2 == 0.0
assert model_nextitnet.hparams.layer_l2 == 0.0
assert model_nextitnet.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -279,12 +319,22 @@ def test_sum_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams_sum is not None

model_sum = SUMModel(hparams_sum, SequentialIterator)
assert model_sum.logit is not None
assert model_sum.update is not None
assert model_sum.iterator is not None
assert model_sum.hparams is not None
assert model_sum.hparams.model_type == "SUM"
assert model_sum.hparams.epochs == 1
assert model_sum.hparams.batch_size == 400
assert model_sum.hparams.learning_rate == 0.001
assert model_sum.hparams.loss == "softmax"
assert model_sum.hparams.optimizer == "adam"
assert model_sum.hparams.train_num_ngs == 4
assert model_sum.hparams.embed_l2 == 0.0
assert model_sum.hparams.layer_l2 == 0.0
assert model_sum.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -296,16 +346,17 @@ def test_lightgcn_component_definition(deeprec_config_path):

data = ImplicitCF(train=train, test=test)

embed_size = 64
hparams = prepare_hparams(yaml_file, embed_size=embed_size)
hparams = prepare_hparams(yaml_file, embed_size=64)
model = LightGCN(hparams, data)

assert model.norm_adj is not None
assert model.ua_embeddings.shape == [data.n_users, embed_size]
assert model.ia_embeddings.shape == [data.n_items, embed_size]
assert model.ua_embeddings.shape == [943, 64]
assert model.ia_embeddings.shape == [1682, 64]
assert model.u_g_embeddings is not None
assert model.pos_i_g_embeddings is not None
assert model.neg_i_g_embeddings is not None
assert model.batch_ratings is not None
assert model.loss is not None
assert model.opt is not None
assert model.batch_size == 1024
assert model.epochs == 1000

0 comments on commit 60e06dc

Please sign in to comment.