Skip to content

Commit

Permalink
Merge pull request #15 from cameronmartino/update_tensor_building
Browse files Browse the repository at this point in the history
Update tensor building
  • Loading branch information
cameronmartino committed Jun 13, 2019
2 parents 2b8edb5 + 2fc83d3 commit 770b7f0
Show file tree
Hide file tree
Showing 13 changed files with 646 additions and 631 deletions.
3 changes: 2 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ branch = True
include = */gemelli/*

[report]
show_missing = True
exclude_lines =
pragma: no cover
raise NotImplementedError
if __name__ == .__main__.:
omit =
*/tests*
*/__init__.py

*/base.py
43 changes: 43 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,46 @@
(2019-06-12)

### Features

* Tensor Building and RCLR transformation in `preprocessing.rclr` and `preprocessing.build`
* N-mode tensor building and transformation
* Mean of counts for subject-conditional pairs with several samples

### Backward-incompatible changes [stable]

* In `preprocessing.build`:
* pervious -> current
* build().sample_order -> build().subject_order
* build().temporal_order -> build().condition_orders
* as a list for N possible condition(s)
* build().tensor -> build().counts

### Backward-incompatible changes [experimental]

### Performance enhancements

* tensor building and transformation

### Bug fixes

* line 369-360 in `factorization.tenals` causes np.nan(s) in solution
* fixed by added pseudocount if any nan in solution

* line 178-179 in `factorization.TenAls`
* was previously checking if all missing/zero not if there were no missing/zero as intended

### Deprecated functionality [stable]

* In `preprocessing.rclr` and `preprocessing.build`:
* build().transform() -> `preprocessing.rclr` as standalone function

### Deprecated functionality [experimental]

### Miscellaneous

* line 175 in `factorization.TenAls` to send ValueError if input is not numpy array


(2019-05-17)

### Features
Expand Down
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,34 @@

# gemelli

## usage

```python
import numpy as np
import pandas as pd
from gemelli.factorization import TenAls
from gemelli.preprocessing import build, rclr

# contruct and transform the tensor
tensor = Build()
tensor.construct(table, metadata, subjects,
[condition_1, condition_2, ..., condition_n])
tensor_rclr = rclr(tensor.counts)
# factorize
TF = TenAls().fit(tensor_rclr)
# write loading files
PC = ['PC'+str(i+1) for i in range(rank)]
# loadings as daaframe
sample_loading = pd.DataFrame(abs(TF.sample_loading),
tensor.subject_order)
feature_loading = pd.DataFrame(TF.feature_loading,
tensor.feature_order)
temporal_loading = pd.DataFrame(TF.conditional_loading,
tensor.condition_orders[0])
```

## resources

Named after gemelli by alighiero boetti and also the pasta.

[TenAls translated from Sewoong Oh](http://swoh.web.engr.illinois.edu/software/optspace/code.html)
1 change: 0 additions & 1 deletion ci/pip_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
coveralls
gneiss
25 changes: 15 additions & 10 deletions gemelli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,30 @@ def fit(self):
should be implemetned by sub-method"""

def transform(self):
""" TODO
""" return loadings
"""
return self.sample_loading, \
self.feature_loading, \
self.conditional_loading


class _BaseTransform(object):
class _BaseConstruct(object):

"""Base class for transformation/norm methods.
Warning: This class should not be used directly.
Use derived classes instead.
"""
@abstractmethod
def fit(self):
""" Placeholder for fit this
should be implemetned by sub-method"""

def transform(self):
""" return transformed
"""
return self.TRCLR
def construct(self):
"""
conditional_loading : array-like or list of array-like
The conditional loading vectors
of shape (conditions, r) if there is 1 type
of condition, and a list of such matrices if
there are more than 1 type of condition
feature_loading : array-like
The feature loading vectors
of shape (features, r)
sample_loading : array-like
The sample loading vectors
of shape (samples, r) """
48 changes: 33 additions & 15 deletions gemelli/factorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@

class TenAls(_BaseImpute):

def __init__(self, rank=3, iteration=50, ninit=50, tol=1e-8):
def __init__(
self,
rank=3,
iteration=50,
ninit=50,
nitr_RTPM=50,
tol=1e-8,
pseudocount=1.0):
"""
This class performs a low-rank 3rd order
Expand Down Expand Up @@ -129,6 +136,8 @@ def __init__(self, rank=3, iteration=50, ninit=50, tol=1e-8):
self.iteration = iteration
self.ninit = ninit
self.tol = tol
self.pseudocount = pseudocount
self.nitr_RTPM = nitr_RTPM

def fit(self, Tensor):
"""
Expand Down Expand Up @@ -164,15 +173,10 @@ def _fit(self):
sparse_tensor = self.sparse_tensor

if not isinstance(sparse_tensor, np.ndarray):
sparse_tensor = np.array(sparse_tensor)
if not isinstance(sparse_tensor, np.ndarray):
raise ValueError('Input data is should be type numpy.ndarray')
if len(sparse_tensor.shape) < 3 or len(sparse_tensor.shape) > 3:
raise ValueError('Input data is should be 3rd-order tensor',
' with shape (samples, features, time)')

if (np.count_nonzero(sparse_tensor) == 0 and
np.count_nonzero(~np.isnan(sparse_tensor)) == 0):
raise ValueError('Input data is should be type numpy.ndarray')

if (np.count_nonzero(sparse_tensor) == np.product(sparse_tensor.shape) and
np.count_nonzero(~np.isnan(sparse_tensor)) == np.product(sparse_tensor.shape)):
raise ValueError('No missing data in the format np.nan or 0')

if np.count_nonzero(np.isinf(sparse_tensor)) != 0:
Expand All @@ -189,7 +193,9 @@ def _fit(self):
r=self.rank,
ninit=self.ninit,
nitr=self.iteration,
tol=self.tol)
nitr_RTPM=self.nitr_RTPM,
tol=self.tol,
pseudocount=self.pseudocount)

self.loadings = loadings
self.eigenvalues = np.diag(s_)
Expand All @@ -206,7 +212,15 @@ def _fit(self):
self.dist = dist


def tenals(TE, E, r=3, ninit=50, nitr=50, tol=1e-8):
def tenals(
TE,
E,
r=3,
ninit=50,
nitr=50,
nitr_RTPM=50,
tol=1e-8,
pseudocount=1.0):
"""
A low-rank 3rd order tensor factorization
for partially observered non-symmetric
Expand Down Expand Up @@ -341,6 +355,8 @@ def tenals(TE, E, r=3, ninit=50, nitr=50, tol=1e-8):
0, 0))

v_alt[dim] = V_alt[dim][:, q] + v_dim.flatten()
# add pseudocount to prevent division by zero causing nan.
den[dim][den[dim] == 0] = pseudocount
v_alt[dim] = v_alt[dim] / den[dim]

if dim == len(dims) - 1:
Expand Down Expand Up @@ -453,7 +469,8 @@ def RTPM(TE, r, ninit, nitr):
tS[init] = TenProjAlt(TE - CPcomp(S0, U),
[tUn[:, [init]] for tUn in tU])

idx = np.argmax(tS, axis=0)[0]
idx = np.argmin(tS, axis=0)[0]

for tUn, Un in zip(tU, U):
Un[:, i] = tUn[:, idx] / norm(tUn[:, idx])

Expand Down Expand Up @@ -563,7 +580,7 @@ def CPcomp(S, U):
"""

output_shape = tuple(u.shape[0] for u in U)
to_multiply = [S.T*u if i == 0 else u for i, u in enumerate(U)]
to_multiply = [S.T * u if i == 0 else u for i, u in enumerate(U)]
product = khatri_rao(to_multiply)
T = product.sum(1).reshape(output_shape)

Expand Down Expand Up @@ -598,7 +615,8 @@ def TenProjAlt(D, U_list):


def khatri_rao(matrices):
"""Returns the Khatri Rao product of a list of matrices
"""
Returns the Khatri Rao product of a list of matrices
Modified from TensorLy
Expand Down
Loading

0 comments on commit 770b7f0

Please sign in to comment.