16 revisit comments and remove tqdm (#18)

* cleaning code * cleaning code * cleaning code * scipy * scipy * regularization * regularization * test extensions * todo: in iterated ewma * remove thomas.py and introduce center and clip functionality * fixing tests * fixing tests * documentation * documentation * black on experiments * updated experiments * Add files via upload * demo notebook * demo notebook fixed --------- Co-authored-by: Kasper Johansson <[email protected]>
cvxgrp · May 29, 2023 · a1fb0eb · a1fb0eb
1 parent 8e17e3c
commit a1fb0eb
Show file tree

Hide file tree

Showing 29 changed files with 2,694 additions and 2,299 deletions.
diff --git a/book/docs/notebooks/demo.ipynb b/book/docs/notebooks/demo.ipynb
diff --git a/cvx/covariance/__init__.py b/cvx/covariance/__init__.py
@@ -0,0 +1 @@
+from cvx.covariance.main import covariance_estimator
diff --git a/cvx/covariance/covariance_combination.py b/cvx/covariance/covariance_combination.py
@@ -14,15 +14,18 @@ def _cholesky_precision(cov):
 
     param cov: dictionary of covariance matrices {time: Sigma}
     """
-    return {time: np.linalg.cholesky(np.linalg.inv(item.values)) for time, item in cov.items()}
+    return {
+        time: np.linalg.cholesky(np.linalg.inv(item.values))
+        for time, item in cov.items()
+    }
 
 
 def _B_t_col(Ls, nus, returns):
     """
     Computes L.T @ return for each L factor in Ls and corresponding
     return in returns
     """
-    return {time:  L.T @ returns.loc[time].values - nus[time] for time, L in Ls.items()}
+    return {time: L.T @ returns.loc[time].values - nus[time] for time, L in Ls.items()}
 
 
 def _diag_part(cholesky):
@@ -37,143 +40,172 @@ def _A(diags_interval, K):
 
     returns: nNxK matrix where each column is a vector of stacked diagonals
     """
-    return np.column_stack([np.vstack(diags_interval.iloc[:,i]).flatten() for i in range(K)])
+    return np.column_stack(
+        [np.vstack(diags_interval.iloc[:, i]).flatten() for i in range(K)]
+    )
 
 
 def _nu(Ls, means):
     """
     Computes L.T @ mu for each L factor in Ls and corresponding
     mu in means
     """
-    return {time:  L.T @ means[time] for time, L in Ls.items()}  
+    return {time: L.T @ means[time] for time, L in Ls.items()}
+
 
 # Declaring namedtuple()
-Result = namedtuple('Result', ['time', 'window', 'mean', 'covariance', 'weights'])
+Result = namedtuple("Result", ["time", "mean", "covariance", "weights"])
 
 
 class _CombinationProblem:
     def __init__(self, keys, n, window):
         self.keys = keys
-        self.K = len(keys)
-        self._weight = cvx.Variable(self.K, name="weights")
-        self.A_param = cvx.Parameter((n * window, self.K))
-        self.P_chol_param = cvx.Parameter((self.K, self.K))
+        K = len(keys)
+        self._weight = cvx.Variable(len(keys), name="weights")
+        self.A_param = cvx.Parameter((n * window, K))
+        self.P_chol_param = cvx.Parameter((K, K))
 
     @property
-    def constraints(self):
+    def _constraints(self):
         return [cvx.sum(self._weight) == 1, self._weight >= 0]
 
     @property
-    def objective(self):
-        return cvx.sum(cvx.log(self.A_param @ self._weight)) - 0.5 * cvx.sum_squares(self.P_chol_param.T @ self._weight)
+    def _objective(self):
+        return cvx.sum(cvx.log(self.A_param @ self._weight)) - 0.5 * cvx.sum_squares(
+            self.P_chol_param.T @ self._weight
+        )
 
     @property
-    def problem(self):
-        return cvx.Problem(cvx.Maximize(self.objective), self.constraints)
+    def _problem(self):
+        return cvx.Problem(cvx.Maximize(self._objective), self._constraints)
 
     def solve(self, **kwargs):
-        self.problem.solve(**kwargs)
-        return self.weights
+        return self._problem.solve(**kwargs)
+        # return self.weights
 
     @property
     def weights(self):
         return pd.Series(index=self.keys, data=self._weight.value)
 
 
 class CovarianceCombination:
-    def __init__(self, sigmas, returns, means=None, window=None):
+    def __init__(self, sigmas, returns, means=None):
         """
         Computes the covariance combination of a set of covariance matrices
 
         param sigmas: dictionary of covariance matrices {key: {time: sigma}}
         param returns: pandas DataFrame of returns
         param means: dictionary of means {key: {time: mu}}, optional
-        param window: number of periods to use in covariance estimation, optional
         """
         # Assert Sigmas and means have same keys if means not None
         n = returns.shape[1]
         if means is not None:
             for key, sigma in sigmas.items():
                 # Assert sigmas and means have same keys
-                assert sigma.keys() == means[key].keys(), "sigmas and means must have same keys"
+                assert (
+                    sigma.keys() == means[key].keys()
+                ), "sigmas and means must have same keys"
         else:
             # Set means to zero if not provided
-            means = {k: {time: np.zeros(n) for time in sigma.keys()} for k, sigma in sigmas.items()}
-
+            means = {
+                k: {time: np.zeros(n) for time in sigma.keys()}
+                for k, sigma in sigmas.items()
+            }
+
         self.__means = means
         self.__sigmas = sigmas
         self.__returns = returns
 
-        self.__Ls = pd.DataFrame({k: _cholesky_precision(sigma) for k, sigma in sigmas.items()})
+        # all those quantities don't depend on the window size
+        self.__Ls = pd.DataFrame(
+            {k: _cholesky_precision(sigma) for k, sigma in sigmas.items()}
+        )
         self.__Ls_shifted = self.__Ls.shift(1).dropna()
-        self.__nus = pd.DataFrame({key: _nu(Ls, means[key]) for key, Ls in self.__Ls.items()})
-        nus_shifted = self.__nus.shift(1).dropna()
+        self.__nus = pd.DataFrame(
+            {key: _nu(Ls, means[key]) for key, Ls in self.__Ls.items()}
+        )
+        self.__nus_shifted = self.__nus.shift(1).dropna()
+
+    @property
+    def K(self):
+        """
+        Returns the number of expert predictors
+        """
+        return len(self.__sigmas)
+
+    @property
+    def assets(self):
+        """
+        Returns the assets in the covariance combination problem
+        """
+        return self.__returns.columns
 
+    def solve(self, window=None, **kwargs):
+        """
+        The size of the window is crucial to specify the size of the parameters
+        for the cvxpy problem. Hence those computations are not in the __init__ method
+
+        Solves the covariance combination problem at a given time, i.e.,
+        finds the prediction for the covariance matrix at 'time+1'
+
+        param window: number of previous time steps to use in the covariance combination
+        """
         # If window is None, use all available data; cap window at length of data
         window = window or len(self.__Ls_shifted)
         window = min(window, len(self.__Ls_shifted))
-        self.window = window
 
         # Compute P matrix and its Cholesky factor
-        Lts_at_r = pd.DataFrame({key: _B_t_col(Ls, nus_shifted[key], returns) for key, Ls in self.__Ls_shifted.items()})
+        Lts_at_r = pd.DataFrame(
+            {
+                key: _B_t_col(Ls, self.__nus_shifted[key], self.__returns)
+                for key, Ls in self.__Ls_shifted.items()
+            }
+        )
 
         Bs = {time: np.column_stack(Lts_at_r.loc[time]) for time in Lts_at_r.index}
         prod_Bs = pd.Series({time: B.T @ B for time, B in Bs.items()})
         times = prod_Bs.index
-        P = {times[i]: sum(prod_Bs.loc[times[i - window + 1]:times[i]]) for i in range(window - 1, len(times))}
+        P = {
+            times[i]: sum(prod_Bs.loc[times[i - window + 1] : times[i]])
+            for i in range(window - 1, len(times))
+        }
 
-        self.__P_chol = {time: np.linalg.cholesky(matrix) for time, matrix in P.items()}
+        P_chol = {time: np.linalg.cholesky(matrix) for time, matrix in P.items()}
 
         # Compute A matrix
         Ls_diag = pd.DataFrame({k: _diag_part(L) for k, L in self.__Ls_shifted.items()})
 
-        self.__A = {times[i]: _A(Ls_diag.truncate(before=times[i - window + 1], after=times[i]), self.K) for i in
-             range(window - 1, len(times))}
+        A = {
+            times[i]: _A(
+                Ls_diag.truncate(before=times[i - window + 1], after=times[i]), self.K
+            )
+            for i in range(window - 1, len(times))
+        }
 
-        self.__problem = _CombinationProblem(keys=self.__sigmas.keys(), n=len(self.assets), window=window)
+        problem = _CombinationProblem(
+            keys=self.__sigmas.keys(), n=len(self.assets), window=window
+        )
 
-    @property
-    def K(self):
-        """
-        Returns the number of expert predictors
-        """
-        return len(self.__sigmas)
+        for time in A.keys():
+            problem.A_param.value = A[time]
+            problem.P_chol_param.value = P_chol[time]
 
-    @property
-    def index(self):
-        """
-        Returns the index of the covariance combination problem, e.g. the timestamps
-        """
-        return self.__A.keys()
+            yield self._solve(time=time, problem=problem, **kwargs)
 
-    @property
-    def assets(self):
+    def _solve(self, time, problem, **kwargs):
         """
-        Returns the assets in the covariance combination problem
-        """
-        return self.__returns.columns
-
-    def solve(self, time, **kwargs):
-        """
-        Solves the covariance combination problem at a given time, i.e.,
-        finds the prediction for the covariance matrix at 'time+1'
+        Solves the covariance combination problem at a given time t
         """
-        # Update parameters and solve problem
-        self.__problem.A_param.value = self.__A[time]
-        self.__problem.P_chol_param.value = self.__P_chol[time]
-        weights = self.__problem.solve(**kwargs)
+        # solve problem
+        problem.solve(**kwargs)
+        weights = problem.weights
 
         # Get non-shifted L
-        L = sum(self.__Ls.loc[time] * weights.values)    # prediction for time+1
+        L = sum(self.__Ls.loc[time] * weights.values)  # prediction for time+1
         nu = sum(self.__nus.loc[time] * weights.values)  # prediction for time+1
 
         mean = pd.Series(index=self.assets, data=np.linalg.inv(L.T) @ nu)
-        sigma = pd.DataFrame(index=self.assets, columns=self.assets, data=np.linalg.inv(L @ L.T))
-        return Result(time=time, window=self.window, mean=mean, covariance=sigma, weights=weights)
-
-    def solve_window(self, **kwargs):
-        """
-        Solves the covariance combination problem for all time steps
-        """
-        for time in self.index:
-            yield self.solve(time, **kwargs)
+        sigma = pd.DataFrame(
+            index=self.assets, columns=self.assets, data=np.linalg.inv(L @ L.T)
+        )
+        return Result(time=time, mean=mean, covariance=sigma, weights=weights)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from cvx.covariance.main import covariance_estimator