Skip to content

Commit

Permalink
16 revisit comments and remove tqdm (#18)
Browse files Browse the repository at this point in the history
* cleaning code

* cleaning code

* cleaning code

* scipy

* scipy

* regularization

* regularization

* test extensions

* todo: in iterated ewma

* remove thomas.py and introduce center and clip functionality

* fixing tests

* fixing tests

* documentation

* documentation

* black on experiments

* updated experiments

* Add files via upload

* demo notebook

* demo notebook fixed

---------

Co-authored-by: Kasper Johansson <[email protected]>
  • Loading branch information
tschm and kasperjo authored May 29, 2023
1 parent 8e17e3c commit a1fb0eb
Show file tree
Hide file tree
Showing 29 changed files with 2,694 additions and 2,299 deletions.
1,846 changes: 901 additions & 945 deletions book/docs/notebooks/demo.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions cvx/covariance/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from cvx.covariance.main import covariance_estimator
166 changes: 99 additions & 67 deletions cvx/covariance/covariance_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ def _cholesky_precision(cov):
param cov: dictionary of covariance matrices {time: Sigma}
"""
return {time: np.linalg.cholesky(np.linalg.inv(item.values)) for time, item in cov.items()}
return {
time: np.linalg.cholesky(np.linalg.inv(item.values))
for time, item in cov.items()
}


def _B_t_col(Ls, nus, returns):
"""
Computes L.T @ return for each L factor in Ls and corresponding
return in returns
"""
return {time: L.T @ returns.loc[time].values - nus[time] for time, L in Ls.items()}
return {time: L.T @ returns.loc[time].values - nus[time] for time, L in Ls.items()}


def _diag_part(cholesky):
Expand All @@ -37,143 +40,172 @@ def _A(diags_interval, K):
returns: nNxK matrix where each column is a vector of stacked diagonals
"""
return np.column_stack([np.vstack(diags_interval.iloc[:,i]).flatten() for i in range(K)])
return np.column_stack(
[np.vstack(diags_interval.iloc[:, i]).flatten() for i in range(K)]
)


def _nu(Ls, means):
"""
Computes L.T @ mu for each L factor in Ls and corresponding
mu in means
"""
return {time: L.T @ means[time] for time, L in Ls.items()}
return {time: L.T @ means[time] for time, L in Ls.items()}


# Declaring namedtuple()
Result = namedtuple('Result', ['time', 'window', 'mean', 'covariance', 'weights'])
Result = namedtuple("Result", ["time", "mean", "covariance", "weights"])


class _CombinationProblem:
def __init__(self, keys, n, window):
self.keys = keys
self.K = len(keys)
self._weight = cvx.Variable(self.K, name="weights")
self.A_param = cvx.Parameter((n * window, self.K))
self.P_chol_param = cvx.Parameter((self.K, self.K))
K = len(keys)
self._weight = cvx.Variable(len(keys), name="weights")
self.A_param = cvx.Parameter((n * window, K))
self.P_chol_param = cvx.Parameter((K, K))

@property
def constraints(self):
def _constraints(self):
return [cvx.sum(self._weight) == 1, self._weight >= 0]

@property
def objective(self):
return cvx.sum(cvx.log(self.A_param @ self._weight)) - 0.5 * cvx.sum_squares(self.P_chol_param.T @ self._weight)
def _objective(self):
return cvx.sum(cvx.log(self.A_param @ self._weight)) - 0.5 * cvx.sum_squares(
self.P_chol_param.T @ self._weight
)

@property
def problem(self):
return cvx.Problem(cvx.Maximize(self.objective), self.constraints)
def _problem(self):
return cvx.Problem(cvx.Maximize(self._objective), self._constraints)

def solve(self, **kwargs):
self.problem.solve(**kwargs)
return self.weights
return self._problem.solve(**kwargs)
# return self.weights

@property
def weights(self):
return pd.Series(index=self.keys, data=self._weight.value)


class CovarianceCombination:
def __init__(self, sigmas, returns, means=None, window=None):
def __init__(self, sigmas, returns, means=None):
"""
Computes the covariance combination of a set of covariance matrices
param sigmas: dictionary of covariance matrices {key: {time: sigma}}
param returns: pandas DataFrame of returns
param means: dictionary of means {key: {time: mu}}, optional
param window: number of periods to use in covariance estimation, optional
"""
# Assert Sigmas and means have same keys if means not None
n = returns.shape[1]
if means is not None:
for key, sigma in sigmas.items():
# Assert sigmas and means have same keys
assert sigma.keys() == means[key].keys(), "sigmas and means must have same keys"
assert (
sigma.keys() == means[key].keys()
), "sigmas and means must have same keys"
else:
# Set means to zero if not provided
means = {k: {time: np.zeros(n) for time in sigma.keys()} for k, sigma in sigmas.items()}

means = {
k: {time: np.zeros(n) for time in sigma.keys()}
for k, sigma in sigmas.items()
}

self.__means = means
self.__sigmas = sigmas
self.__returns = returns

self.__Ls = pd.DataFrame({k: _cholesky_precision(sigma) for k, sigma in sigmas.items()})
# all those quantities don't depend on the window size
self.__Ls = pd.DataFrame(
{k: _cholesky_precision(sigma) for k, sigma in sigmas.items()}
)
self.__Ls_shifted = self.__Ls.shift(1).dropna()
self.__nus = pd.DataFrame({key: _nu(Ls, means[key]) for key, Ls in self.__Ls.items()})
nus_shifted = self.__nus.shift(1).dropna()
self.__nus = pd.DataFrame(
{key: _nu(Ls, means[key]) for key, Ls in self.__Ls.items()}
)
self.__nus_shifted = self.__nus.shift(1).dropna()

@property
def K(self):
"""
Returns the number of expert predictors
"""
return len(self.__sigmas)

@property
def assets(self):
"""
Returns the assets in the covariance combination problem
"""
return self.__returns.columns

def solve(self, window=None, **kwargs):
"""
The size of the window is crucial to specify the size of the parameters
for the cvxpy problem. Hence those computations are not in the __init__ method
Solves the covariance combination problem at a given time, i.e.,
finds the prediction for the covariance matrix at 'time+1'
param window: number of previous time steps to use in the covariance combination
"""
# If window is None, use all available data; cap window at length of data
window = window or len(self.__Ls_shifted)
window = min(window, len(self.__Ls_shifted))
self.window = window

# Compute P matrix and its Cholesky factor
Lts_at_r = pd.DataFrame({key: _B_t_col(Ls, nus_shifted[key], returns) for key, Ls in self.__Ls_shifted.items()})
Lts_at_r = pd.DataFrame(
{
key: _B_t_col(Ls, self.__nus_shifted[key], self.__returns)
for key, Ls in self.__Ls_shifted.items()
}
)

Bs = {time: np.column_stack(Lts_at_r.loc[time]) for time in Lts_at_r.index}
prod_Bs = pd.Series({time: B.T @ B for time, B in Bs.items()})
times = prod_Bs.index
P = {times[i]: sum(prod_Bs.loc[times[i - window + 1]:times[i]]) for i in range(window - 1, len(times))}
P = {
times[i]: sum(prod_Bs.loc[times[i - window + 1] : times[i]])
for i in range(window - 1, len(times))
}

self.__P_chol = {time: np.linalg.cholesky(matrix) for time, matrix in P.items()}
P_chol = {time: np.linalg.cholesky(matrix) for time, matrix in P.items()}

# Compute A matrix
Ls_diag = pd.DataFrame({k: _diag_part(L) for k, L in self.__Ls_shifted.items()})

self.__A = {times[i]: _A(Ls_diag.truncate(before=times[i - window + 1], after=times[i]), self.K) for i in
range(window - 1, len(times))}
A = {
times[i]: _A(
Ls_diag.truncate(before=times[i - window + 1], after=times[i]), self.K
)
for i in range(window - 1, len(times))
}

self.__problem = _CombinationProblem(keys=self.__sigmas.keys(), n=len(self.assets), window=window)
problem = _CombinationProblem(
keys=self.__sigmas.keys(), n=len(self.assets), window=window
)

@property
def K(self):
"""
Returns the number of expert predictors
"""
return len(self.__sigmas)
for time in A.keys():
problem.A_param.value = A[time]
problem.P_chol_param.value = P_chol[time]

@property
def index(self):
"""
Returns the index of the covariance combination problem, e.g. the timestamps
"""
return self.__A.keys()
yield self._solve(time=time, problem=problem, **kwargs)

@property
def assets(self):
def _solve(self, time, problem, **kwargs):
"""
Returns the assets in the covariance combination problem
"""
return self.__returns.columns

def solve(self, time, **kwargs):
"""
Solves the covariance combination problem at a given time, i.e.,
finds the prediction for the covariance matrix at 'time+1'
Solves the covariance combination problem at a given time t
"""
# Update parameters and solve problem
self.__problem.A_param.value = self.__A[time]
self.__problem.P_chol_param.value = self.__P_chol[time]
weights = self.__problem.solve(**kwargs)
# solve problem
problem.solve(**kwargs)
weights = problem.weights

# Get non-shifted L
L = sum(self.__Ls.loc[time] * weights.values) # prediction for time+1
L = sum(self.__Ls.loc[time] * weights.values) # prediction for time+1
nu = sum(self.__nus.loc[time] * weights.values) # prediction for time+1

mean = pd.Series(index=self.assets, data=np.linalg.inv(L.T) @ nu)
sigma = pd.DataFrame(index=self.assets, columns=self.assets, data=np.linalg.inv(L @ L.T))
return Result(time=time, window=self.window, mean=mean, covariance=sigma, weights=weights)

def solve_window(self, **kwargs):
"""
Solves the covariance combination problem for all time steps
"""
for time in self.index:
yield self.solve(time, **kwargs)
sigma = pd.DataFrame(
index=self.assets, columns=self.assets, data=np.linalg.inv(L @ L.T)
)
return Result(time=time, mean=mean, covariance=sigma, weights=weights)
Loading

0 comments on commit a1fb0eb

Please sign in to comment.