Skip to content

Commit

Permalink
Merge pull request #40 from biocore/two-time-bug
Browse files Browse the repository at this point in the history
Fixes two subject / state error
  • Loading branch information
gwarmstrong authored Mar 11, 2021
2 parents c419b03 + 0db8e22 commit 0fbe6ac
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 24 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
v0.0.7 (2020-12-12)

### Bug fixes

* Fixes in `optspace.py` function `rank_estimate` and `factorization.py` function `tenals`
* fixes two subject/state tensor
* added test in `test_method.py` for two subject or state tensor

v0.0.6 (2020-09-27)

### Features
Expand Down
2 changes: 1 addition & 1 deletion gemelli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

__version__ = "0.0.6"
__version__ = "0.0.7"
3 changes: 3 additions & 0 deletions gemelli/factorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,9 @@ def tenals(tensor,
total_nonzeros = np.count_nonzero(mask[i, :, :].copy())
n_, m_ = obs_tmp.shape
eps_tmp = total_nonzeros / np.sqrt(n_ * m_)
if min(obs_tmp.shape) <= 2:
# two-subjects/time is already low-rank
continue
if rank_estimate(obs_tmp, eps_tmp) >= (min(obs_tmp.shape) - 1):
warnings.warn('A component of your data may be high-rank.',
RuntimeWarning)
Expand Down
5 changes: 3 additions & 2 deletions gemelli/optspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ def grassmann_manifold_two(U, step_size, n_components):

def rank_estimate(obs, eps, k=20, lam=0.05,
min_rank=3, max_iter=5000):

"""
This function estimates the rank of a
sparse matrix (i.e. with missing values).
Expand Down Expand Up @@ -440,9 +439,11 @@ def rank_estimate(obs, eps, k=20, lam=0.05,
Conference on Communication, Control,
and Computing (Allerton) 1216–1222 (2009).
"""

# dim. of the data
n, m = obs.shape
# ensure rank worth estimating
if min(n, m) <= 2:
return min_rank
# get N-singular values
s = svds(obs, min(k, n, m) - 1, which='LM',
return_singular_vectors=False)[::-1]
Expand Down
55 changes: 34 additions & 21 deletions gemelli/q2/tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,41 +33,54 @@ def setUp(self):
self.in_table, self.in_meta = create_test_table()
self.subj = 'host_subject_id'
self.state = 'context'
self.biom_table = load_table(self.in_table)
self.meta_table = read_csv(self.in_meta, sep='\t', index_col=0)
# make metadata subjects and two-time
self.meta_table_two_time = self.meta_table.iloc[:22, :].copy()
self.meta_table_two_time[self.state] = [0] * 11 + [1] * 11
self.meta_table_two_time[self.subj] = [i for i in range(11)] * 2

def test_ctf(self):
"""Tests the basic validity of the actual ctf() method's outputs."""
self.biom_table = load_table(self.in_table)
self.meta_table = read_csv(self.in_meta, sep='\t', index_col=0)
ord1, ord2, disttst, stst, ftst = ctf(table=self.biom_table,
sample_metadata=self.meta_table,
individual_id_column=self.subj,
state_column=self.state)
# Validate types of the ctf outputs
self.assertIsInstance(ord1, OrdinationResults)
self.assertIsInstance(ord2, OrdinationResults)
self.assertIsInstance(disttst, DistanceMatrix)
self.assertIsInstance(stst, pd.DataFrame)
self.assertIsInstance(ftst, pd.DataFrame)
# Ensure that no NaNs are in the OrdinationResults
# NOTE that we have to use the DataFrame .any() functions instead of
# python's built-in any() functions -- see #29 for details on this
self.assertFalse(np.isnan(ord1.features).any(axis=None))
self.assertFalse(np.isnan(ord1.samples).any(axis=None))
self.assertFalse(np.isnan(ord2.features).any(axis=None))
self.assertFalse(np.isnan(ord2.samples).any(axis=None))
for meta_classes in [self.meta_table, self.meta_table_two_time]:
res_tmp = ctf(table=self.biom_table,
sample_metadata=meta_classes,
individual_id_column=self.subj,
state_column=self.state)
ord1, ord2, disttst, stst, ftst = res_tmp
# Validate types of the ctf outputs
self.assertIsInstance(ord1, OrdinationResults)
self.assertIsInstance(ord2, OrdinationResults)
self.assertIsInstance(disttst, DistanceMatrix)
self.assertIsInstance(stst, pd.DataFrame)
self.assertIsInstance(ftst, pd.DataFrame)
# Ensure that no NaNs are in the OrdinationResults
# NOTE that we have to use the DataFrame
# .any() functions instead of
# python's built-in any() functions --
# see #29 for details on this
self.assertFalse(np.isnan(ord1.features).any(axis=None))
self.assertFalse(np.isnan(ord1.samples).any(axis=None))
self.assertFalse(np.isnan(ord2.features).any(axis=None))
self.assertFalse(np.isnan(ord2.samples).any(axis=None))


class Test_qiime2_ctf(unittest.TestCase):

def setUp(self):
self.subj = 'host_subject_id'
self.state = 'context'
self.in_table, self.in_meta = create_test_table()
self.biom_table = load_table(self.in_table)
self.q2table = Artifact.import_data("FeatureTable[Frequency]",
self.biom_table)
self.meta_table = read_csv(self.in_meta, sep='\t', index_col=0)
# make metadata subjects and two-time
self.meta_table_two_time = self.meta_table.iloc[:22, :].copy()
self.meta_table_two_time[self.state] = [0] * 11 + [1] * 11
self.meta_table_two_time[self.subj] = [i for i in range(11)] * 2
self.q2meta = Metadata(self.meta_table)
self.subj = 'host_subject_id'
self.state = 'context'
self.q2meta_two_time = Metadata(self.meta_table)
self.out_ = os_path_sep.join(self.in_table.split(os_path_sep)[:-1])

def test_qiime2_ctf(self):
Expand Down

0 comments on commit 0fbe6ac

Please sign in to comment.