codycarroll
diff --git a/‎GetCEScores.py‎
Lines changed: 74 additions & 0 deletions b/‎GetCEScores.py‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎GetCovDense.py‎
Lines changed: 74 additions & 0 deletions b/‎GetCovDense.py‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎GetEigenAnalysisResults.py‎
Lines changed: 97 additions & 0 deletions b/‎GetEigenAnalysisResults.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎GetRawCov.py‎
Lines changed: 116 additions & 0 deletions b/‎GetRawCov.py‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎PC_CovE.py‎ b/‎PC_CovE.py‎
diff --git a/‎__pycache__/GetRawCov.cpython-311.pyc‎
7.12 KB b/‎__pycache__/GetRawCov.cpython-311.pyc‎
7.12 KB
diff --git a/‎__pycache__/SetOptions.cpython-311.pyc‎
166 Bytes b/‎__pycache__/SetOptions.cpython-311.pyc‎
166 Bytes
diff --git a/‎__pycache__/Sparsify.cpython-311.pyc‎
5.53 KB b/‎__pycache__/Sparsify.cpython-311.pyc‎
5.53 KB
diff --git a/‎__pycache__/Wiener.cpython-311.pyc‎
1.77 KB b/‎__pycache__/Wiener.cpython-311.pyc‎
1.77 KB
diff --git a/‎__pycache__/isRegular.cpython-311.pyc‎
35 Bytes b/‎__pycache__/isRegular.cpython-311.pyc‎
35 Bytes
@@ -0,0 +1,74 @@
+import numpy as np
+from scipy.interpolate import interp1d
+from ConvertSupport import convert_support
+from typing import List, Dict, Any
+
+def get_ce_scores(y: List[np.ndarray], t: List[np.ndarray], optns: Dict[str, Any],
+                  mu: np.ndarray, obs_grid: np.ndarray, fitted_cov: np.ndarray,
+                  lambda_: np.ndarray, phi: np.ndarray, sigma2: float = 0.0) -> List[Dict[str, Any]]:
+    if lambda_.shape[0] != phi.shape[1]:
+        raise ValueError("Number of eigenvalues does not match number of eigenfunctions.")
+    
+    sigma_y = fitted_cov + np.eye(phi.shape[0]) * sigma2
+    mu_phi_sig = get_mu_phi_sig(t, obs_grid, mu, phi, sigma_y)
+    
+    results = []
+    for y_vec, mps in zip(y, mu_phi_sig):
+        result = get_ind_ce_scores(y_vec, mps['mu_vec'], lambda_, mps['phi_mat'], mps['sigma_yi'],
+                                   verbose=optns.get('verbose', False))
+        results.append(result)
+    return results
+
+def get_mu_phi_sig(t: List[np.ndarray], obs_grid: np.ndarray, mu: np.ndarray,
+                   phi: np.ndarray, sigma_y: np.ndarray) -> List[Dict[str, Any]]:
+    mu_interp = interp1d(obs_grid, mu, kind='linear', fill_value="extrapolate")
+    phi_interps = [
+        interp1d(obs_grid, phi[:, i], kind='linear', fill_value="extrapolate")
+        for i in range(phi.shape[1])
+    ]
+
+    ret = []
+    for tvec in t:
+        if len(tvec) == 0:
+            ret.append({'mu_vec': np.array([]), 'phi_mat': np.array([]), 'sigma_yi': np.array([])})
+            continue
+        
+        mu_vec = mu_interp(tvec)
+        phi_mat = np.column_stack([interp(tvec) for interp in phi_interps])
+        sigma_yi = convert_support(obs_grid, tvec, mu=sigma_y)
+        
+        ret.append({'mu_vec': mu_vec, 'phi_mat': phi_mat, 'sigma_yi': sigma_yi})
+    
+    return ret
+
+
+
+def get_ind_ce_scores(y_vec: np.ndarray, mu_vec: np.ndarray, lam_vec: np.ndarray,
+                      phi_mat: np.ndarray, sigma_yi: np.ndarray,
+                      newy_ind: int = None, verbose: bool = False) -> Dict[str, Any]:
+    if len(y_vec) == 0:
+        if verbose:
+            print("Empty observation found, possibly due to truncation.")
+        return {
+            'xi_est': np.full((len(lam_vec),), np.nan),
+            'xi_var': np.full((len(lam_vec), len(lam_vec)), np.nan),
+            'fitted_y': np.full((0, 0), np.nan)
+        }
+    
+    if newy_ind is not None:
+        if len(y_vec) != 1:
+            new_phi = phi_mat[newy_ind, :].reshape(1, -1)
+            new_mu = mu_vec[newy_ind]
+            y_vec = np.delete(y_vec, newy_ind)
+            mu_vec = np.delete(mu_vec, newy_ind)
+            phi_mat = np.delete(phi_mat, newy_ind, axis=0)
+            sigma_yi = np.delete(np.delete(sigma_yi, newy_ind, axis=0), newy_ind, axis=1)
+            return GetIndCEScoresCPPnewInd(y_vec, mu_vec, lam_vec, phi_mat, sigma_yi, new_phi, new_mu)
+        else:
+            lam_phi = np.diag(lam_vec) @ phi_mat.T
+            lam_phi_sig = lam_phi @ np.linalg.inv(sigma_yi)
+            xi_est = lam_phi_sig @ (y_vec - mu_vec)
+            xi_var = np.diag(lam_vec) - lam_phi @ lam_phi_sig.T
+            return {'xi_est': xi_est, 'xi_var': xi_var, 'fitted_y': np.nan}
+    
+    return GetIndCEScoresCPP(y_vec, mu_vec, lam_vec, phi_mat, sigma_yi)
@@ -0,0 +1,74 @@
+import numpy as np
+import pandas as pd
+
+def get_cov_dense(ymat, mu, optns):
+    """
+    Calculate the sample covariance matrix for dense, regular functional data.
+
+    Parameters:
+    - ymat: np.ndarray, shape (n, p) - matrix of dense regular functional data.
+    - mu: np.ndarray, shape (p,) - estimated cross-sectional mean vector.
+    - optns: dict - options containing:
+        * 'dataType' (str): Must be "Dense" or "DenseWithMV".
+        * 'userMu' (optional, np.ndarray): If provided, adjusts ymat by subtracting this vector.
+        * 'error' (bool): If True, adjusts diagonal for variance.
+        * 'userSigma2' (optional, float): User-provided variance for diagonal adjustment.
+    
+    Returns:
+    - dict with keys:
+        * 'rawCov': None (since it's not computed in this function)
+        * 'smoothCov': np.ndarray - sample covariance matrix on observed grid.
+        * 'bwCov': None
+        * 'sigma2': float - estimated variance if 'error' is True; None otherwise.
+        * 'outGrid': None
+    """
+    
+    if optns['dataType'] not in ['Dense', 'DenseWithMV']:
+        raise ValueError("Sample Covariance is only applicable for dataType='Dense' or 'DenseWithMV'.")
+
+    n, m = ymat.shape
+
+    # Adjust ymat by subtracting mu if 'userMu' is provided in options
+    if optns.get('userMu') is not None:
+        ymat = ymat - np.tile(mu, (n, 1))  # Repeat mu across rows
+        K = np.zeros((m, m))
+        
+        # Compute the covariance matrix manually while handling NaNs
+        for i in range(m):
+            for j in range(m):
+                XcNaNindx = np.isnan(ymat[:, i])
+                YcNaNindx = np.isnan(ymat[:, j])
+                NaNrows = np.where(XcNaNindx | YcNaNindx)[0]
+                indx = np.setdiff1d(np.arange(n), NaNrows)
+                K[i, j] = np.sum(ymat[indx, i] * ymat[indx, j]) / (n - 1 - len(NaNrows))
+    else:
+        # Use pairwise complete observations to calculate covariance if 'userMu' is not provided
+        K = np.cov(ymat, rowvar=False, bias=False)
+    
+    # Ensure symmetry of K
+    K = 0.5 * (K + K.T)
+    
+    # Check for any NaN in the covariance matrix
+    if np.isnan(K).any():
+        raise ValueError("Data is too sparse to be considered DenseWithMV. Remove sparse observations or specify dataType='Sparse' for FPCA.")
+    
+    sigma2 = None
+    if optns.get('error', False):
+        # Use the 2nd order difference method for estimating variance, if not provided
+        if 'userSigma2' in optns:
+            sigma2 = optns['userSigma2']
+        else:
+            ord_diff = 2
+            sigma2 = np.mean(np.diff(ymat, n=ord_diff, axis=1)**2, where=~np.isnan(ymat)) / np.math.comb(2 * ord_diff, ord_diff)
+            np.fill_diagonal(K, np.diag(K) - sigma2)
+    
+    # Create return dictionary with similar structure to SmoothCov object in R
+    ret = {
+        'rawCov': None,
+        'smoothCov': K,
+        'bwCov': None,
+        'sigma2': sigma2,
+        'outGrid': None
+    }
+    
+    return ret
@@ -0,0 +1,97 @@
+import numpy as np
+import sys
+import os
+sys.path.append(os.path.abspath('src'))
+from trapzRcpp import trapz
+
+
+def get_eigen_analysis_results(smoothCov, regGrid, optns, muWork=None):
+    """
+    Perform eigenanalysis on the covariance matrix and select components
+    based on specified variance explained threshold.
+    
+    Parameters:
+    - smoothCov: np.ndarray - covariance matrix
+    - regGrid: np.ndarray - regular grid for integration
+    - optns: dict - options containing:
+        * 'maxK': int, maximum number of principal components
+        * 'FVEthreshold': float, functional variance explained threshold
+        * 'FVEfittedCov': float, threshold for fitted covariance, if applicable
+        * 'verbose': bool, whether to print messages
+    - muWork: np.ndarray or None, optional mean work vector (default None)
+
+    Returns:
+    - dict with keys:
+        * 'lambda': np.ndarray - eigenvalues selected
+        * 'phi': np.ndarray - selected eigenvectors
+        * 'cumFVE': np.ndarray - cumulative FVE
+        * 'kChoosen': int - number of components chosen
+        * 'fittedCov': np.ndarray - fitted covariance
+        * 'fittedCovUser': np.ndarray or None - fitted covariance with user-specified threshold
+        * 'fittedCorrUser': np.ndarray or None - correlation matrix if diagonal is non-zero
+    """
+    maxK = optns['maxK']
+    FVEthreshold = optns['FVEthreshold']
+    FVEfittedCov = optns.get('FVEfittedCov', None)
+    verbose = optns['verbose']
+    
+    gridSize = regGrid[1] - regGrid[0]
+    numGrids = smoothCov.shape[0]
+    
+    # Eigen decomposition
+    eig_values, eig_vectors = np.linalg.eigh(smoothCov)
+    
+    # Select positive eigenvalues
+    positive_ind = eig_values >= 0
+    if np.sum(positive_ind) == 0:
+        raise ValueError("All eigenvalues are negative. The covariance estimate is incorrect.")
+    
+    d = eig_values[positive_ind][::-1]  # Sort in descending order
+    eigenV = eig_vectors[:, positive_ind][:, ::-1]  # Match ordering with eigenvalues
+
+    # Threshold based on maxK
+    if maxK < len(d):
+        if verbose:
+            print(f"At most {len(d)} number of PCs can be selected, thresholded by `maxK` = {maxK}.")
+        
+        d = d[:maxK]
+        eigenV = eigenV[:, :maxK]
+
+    # Calculate cumulative FVE
+    FVE = np.cumsum(d) / np.sum(d)
+    no_opt = np.min(np.where(FVE >= FVEthreshold)[0]) + 1  # Select minimum components for FVE threshold
+    
+    # Normalization of eigenvectors
+    if muWork is None:
+        muWork = np.arange(eigenV.shape[0]) + 1  # Default mean work
+
+    def normalize_vector(x):
+        """Normalize vector x using trapezoidal integration and adjust sign based on mean."""
+        x /= np.sqrt(trapz(regGrid, x**2))
+        return x if np.sum(x * muWork) >= 0 else -x
+    
+    phi = np.apply_along_axis(normalize_vector, 0, eigenV)
+    lambda_ = gridSize * d
+    
+    # Covariance matrix construction
+    no_fittedCov = np.min(np.where(FVE >= FVEfittedCov)[0]) + 1 if FVEfittedCov is not None else phi.shape[1]
+    fittedCovUser = phi[:, :no_fittedCov] @ np.diag(lambda_[:no_fittedCov]) @ phi[:, :no_fittedCov].T
+    fittedCov = phi @ np.diag(lambda_) @ phi.T
+
+    # Fitted correlation matrix
+    if np.any(np.diag(fittedCovUser) == 0):
+        fittedCorrUser = None
+    else:
+        diag_sqrt_inv = np.diag(1 / np.sqrt(np.diag(fittedCovUser)))
+        fittedCorrUser = diag_sqrt_inv @ fittedCovUser @ diag_sqrt_inv
+        np.fill_diagonal(fittedCorrUser, 1)
+
+    return {
+        'lambda': lambda_[:no_opt],
+        'phi': phi[:, :no_opt],
+        'cumFVE': FVE,
+        'kChoosen': no_opt,
+        'fittedCov': fittedCov,
+        'fittedCovUser': fittedCovUser,
+        'fittedCorrUser': fittedCorrUser
+    }
@@ -0,0 +1,116 @@
+import numpy as np
+from scipy import linalg
+
+def uniqueM(x):
+    """ Helper function to map values in x to unique integers """
+    unique_vals = np.unique(x)
+    id1 = np.zeros(len(x), dtype=int)
+    for i, val in enumerate(unique_vals):
+        id1[np.where(x == val)[0]] = i + 1
+    return id1
+
+def meshgrid(x, y):
+    """ Custom meshgrid function to replicate R's meshgrid functionality """
+    X, Y = np.meshgrid(x, y)
+    return {'X': X, 'Y': Y}
+
+def GetRawCov(y, t, obsGridnew, mu, dataType, error):
+    """
+    Obtain raw covariance.
+    
+    Parameters:
+    - y: list of n arrays (repeated measurements for n subjects)
+    - t: list of n arrays (time points for n subjects)
+    - obsGridnew: array of m time points corresponding to mu
+    - mu: array of fitted mean functions (corresponding to pooled unique time points from t)
+    - dataType: output of IsRegular() (should be one of 'Sparse', 'DenseWithMV', 'Dense', 'RegularWithMV')
+    - error: boolean flag (True if measurement error assumption is applied, False otherwise)
+
+    Returns:
+    A dictionary containing:
+    - tPairs: (N, 2) matrix of pairs of time points for subjects
+    - cxxn: 1D array of raw covariance corresponding to tPairs
+    - indx: 1D array of indices for each subject
+    - win: 1D array of weights for 2-D smoother (if required)
+    - cyy: 1D array of raw covariance for all pairs of time points
+    - diag: 2-column matrix for raw covariance along diagonal if error == True
+    """
+    
+    ncohort = len(y)
+    obsGrid = np.sort(np.unique(np.concatenate(t)))  # sort and flatten the time points
+    mu_interpolated = np.interp(obsGrid, obsGridnew, mu)  # interpolate mu to match obsGrid
+    count = None
+    indx = None
+    diag = None
+
+    if dataType in ['Sparse', 'DenseWithMV']:
+        Ys = [meshgrid(yi, t[i]) for i, yi in enumerate(y)]
+        Xs = [meshgrid(ti, t[i]) for i, ti in enumerate(t)]
+
+        # Vectorize the grids for y & t
+        xx1 = np.concatenate([x['X'].flatten() for x in Xs])
+        xx2 = np.concatenate([x['Y'].flatten() for x in Xs])
+        yy2 = np.concatenate([y['Y'].flatten() for y in Ys])
+        yy1 = np.concatenate([y['X'].flatten() for y in Ys])
+
+        # Get id1/2 such that xx1/2 = q(id1/2), where q = unique(xx1/2)
+        id1 = uniqueM(xx1)
+        id2 = uniqueM(xx2)
+        cyy = (yy1 - mu_interpolated[id1]) * (yy2 - mu_interpolated[id2])
+
+        # Index for subject i
+        indx = np.repeat(np.arange(len(y)), [len(yi) ** 2 for yi in y])
+
+        tPairs = np.column_stack([xx1, xx2])
+
+        if error:
+            tneq = np.where(xx1 != xx2)[0]
+            teq = np.where(xx1 == xx2)[0]
+            indx = indx[tneq]
+            diag = np.column_stack([tPairs[teq, 0], cyy[teq]])
+            tPairs = tPairs[tneq]
+            cxxn = cyy[tneq]
+        else:
+            cxxn = cyy
+
+    elif dataType == 'Dense':
+        yy = np.array([np.ravel(yi) for yi in y]).T
+        MU = np.tile(mu, (len(y), 1)).T
+        t1 = t[0]
+
+        yy = yy - MU
+        cyy = np.dot(yy.T, yy) / ncohort
+        cyy = cyy.flatten()
+
+        cxxn = cyy
+        xxyy = meshgrid(t1, t1)  # Create meshgrid for t1
+
+        tPairs = np.column_stack([xxyy['X'].flatten(), xxyy['Y'].flatten()])
+
+        if error:
+            tneq = np.where(tPairs[:, 0] != tPairs[:, 1])[0]
+            teq = np.where(tPairs[:, 0] == tPairs[:, 1])[0]
+            diag = np.column_stack([tPairs[teq, 0], cyy[teq]])
+            tPairs = tPairs[tneq]
+            cxxn = cyy[tneq]
+        else:
+            cxxn = cyy
+
+    elif dataType == 'RegularWithMV':
+        raise ValueError("This is not implemented yet. Contact Pantelis!")
+
+    else:
+        raise ValueError("Invalid 'dataType' argument type")
+
+    result = {
+        'tPairs': tPairs,
+        'cxxn': cxxn,
+        'indx': indx,
+        'cyy': cyy,
+        'diag': diag,
+        'count': count,
+        'error': error,
+        'dataType': dataType
+    }
+
+    return result