Update tests

ActurialCapital · Jul 3, 2024 · ae8226a · ae8226a
1 parent 3fba9e5
commit ae8226a
Show file tree

Hide file tree

Showing 8 changed files with 367 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -43,12 +43,12 @@
 The project latest version incorporates a wide array of models, offering an extensive toolkit for generating synthetic time series data. This version includes features like:
 
 * `GeometricBrownianMotion`
-* `AR` (Auto Regressive)
-* `NARMA` (Non-Linear Auto Regressive Moving Average)
+* `AutoRegressive`
+* `NARMA`
 * `Heston`
-* `CIR` (Cox–Ingersoll–Ross)
+* `CIR`
 * `LevyStable`
-* `MeanReverting` (Ornstein–Uhlenbeck)
+* `MeanReverting`
 * `Merton`
 * `Poisson`
 * `Seasonal`
@@ -109,13 +109,42 @@ In this example, we are using the following parameters for illustration purposes
 >>> model.transform(matrix) # Produces highly positively correlated features
 ```
 
-
-
 <p align="center"><img src="https://github.com/ActurialCapital/synthetica/blob/main/docs/static/gbm_corr_transform.png" alt="chart-2"  width="75%" height="75%"></p>
 
 
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
+## Positive Definiteness
+
+### What positive definite means in a covariance matrix
+
+A covariance matrix is considered positive definite if it satisfies the following key properties:
+
+1. It is symmetric, meaning the matrix is equal to its transpose.
+2. For any non-zero vector $x$, $x^T * C * x > 0$, where $C$ is the covariance matrix and $x^T$ is the transpose of $x$.
+3. All of its eigenvalues are strictly positive.
+
+Positive definiteness in a covariance matrix has important implications:
+
+1. It ensures the matrix is invertible, which is crucial for many [statistical techniques](https://stats.stackexchange.com/questions/52976/is-a-sample-covariance-matrix-always-symmetric-and-positive-definite).
+2. It guarantees that the matrix represents a [valid probability distribution](https://statproofbook.github.io/P/covmat-psd.html).
+3. It allows for unique solutions in [optimization problems](https://gowrishankar.info/blog/why-covariance-matrix-should-be-positive-semi-definite-tests-using-breast-cancer-dataset/) and ensures the stability of certain algorithms.
+4. It indicates that no linear combination of the variables has zero variance, meaning all variables contribute [meaningful information](https://math.stackexchange.com/questions/114072/what-is-the-proof-that-covariance-matrices-are-always-semi-definite).
+
+A covariance matrix that is positive semi-definite (allowing for eigenvalues to be non-negative rather than strictly positive) is still valid, but may indicate linear dependencies among variables.
+
+In practice, sample covariance matrices are often positive definite if the number of observations exceeds the number of variables and there are no perfect linear relationships among the variables.
+
+### Implementation
+
+`synthetica` automatically finds the nearest positive-definite matrix to input using `nearest_positive_definite` python function. it is directly sourced from [Computing a nearest symmetric positive semidefinite matrix](https://doi.org/10.1016/0024-3795(88)90223-6).
+
+### Other Sources
+
+* [Matlab code](https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd)
+* [Stackoverflow](https://stackoverflow.com/.../python-convert-matrix-to-positive-semi-definite)
+* [Gist](https://gist.github.com/fasiha/fdb5cec2054e6f1c6ae35476045a0bbd)
+
 <!-- CONTRIBUTING -->
 ## Contributing
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-synthetica"
-version = "0.1.2"
+version = "0.1.3"
 description = "Generate synthetic time series data."
 authors = ["ActurialCapital"]
 license = "BSD 3-Clause License"
@@ -13,7 +13,6 @@ python = "^3.11"
 numpy = "^1.26.4"
 pandas = "^2.2.2"
 scipy = "^1.13.1"
-statsmodels = "^0.14.2"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/synthetica/base.py b/synthetica/base.py
@@ -227,7 +227,8 @@ def create_corr_returns(self, matrix) -> pd.Series | pd.DataFrame:
 
         return self.to_pandas(np.array(extracted_paths).T)
 
-    def cholesky_transform(self, rvs: np.array, matrix: np.array) -> np.ndarray:
+    @staticmethod
+    def cholesky_transform(rvs: np.array, matrix: np.array) -> np.ndarray:
         """
         Perform Cholesky transformation on random variables.
 

diff --git a/synthetica/models.py b/synthetica/models.py
@@ -872,7 +872,7 @@ def transform(self, matrix: pd.DataFrame | np.ndarray = None) -> pd.Series | pd.
         return self.to_pandas(paths[1:,])
 
 
-class AR(BaseSynthetic):
+class AutoRegressive(BaseSynthetic):
     """
     A class for generating synthetic autoregressive (AR) model.
 

diff --git a/tests/test_base.py b/tests/test_base.py
@@ -0,0 +1,172 @@
+import pytest 
+import numpy as np
+import pandas as pd
+from pandas.testing import assert_index_equal
+
+from synthetica import GeometricBrownianMotion, nearest_positive_definite
+
+Model = GeometricBrownianMotion
+index = pd.date_range("2020-01-01", "2020-02-01", inclusive="left")
+
+
+def test_random_seed():
+    arr1 = Model(seed=123).white_noise
+    arr2 = Model(seed=123).white_noise
+    assert np.array_equal(arr1, arr2)
+
+    arr1 = Model(seed=None).white_noise
+    arr2 = Model(seed=None).white_noise
+    assert not np.array_equal(arr1, arr2)
+
+
+def test_repr():
+    model = Model()
+    assert model.__class__.__name__ == 'GeometricBrownianMotion'
+
+
+def test_datetime_index():
+    # Before transformation
+    model = Model(length=index)
+    assert_index_equal(model.index, index)
+
+    # After transformation
+    simulated = model.transform()
+    assert_index_equal(simulated.index, index)
+
+
+def test_cholesky_transform_positive_definite():
+    # Test with a positive definite matrix
+    rvs = np.array([[1, 2], [3, 4]])
+    matrix = np.array([[2, 1], [1, 2]])
+    expected_output = np.linalg.cholesky(matrix) @ rvs.T
+    expected_output = expected_output.T
+
+    result = Model().cholesky_transform(rvs, matrix)
+    assert np.allclose(result, expected_output), \
+        f"Expected {expected_output}, but got {result}"
+
+
+def test_cholesky_transform_non_positive_definite():
+    # Test with a non-positive definite matrix
+    rvs = np.array([[1, 2], [3, 4]])
+    matrix = np.array([[1, 2], [2, 1]])  # This is not positive definite
+
+    # Calculate the nearest positive definite matrix
+    positive_definite_matrix = nearest_positive_definite(matrix)
+    expected_output = np.linalg.cholesky(positive_definite_matrix) @ rvs.T
+    expected_output = expected_output.T
+
+    result = Model().cholesky_transform(rvs, matrix)
+    assert np.allclose(result, expected_output), \
+        f"Expected {expected_output}, but got {result}"
+
+
+def test_create_corr_returns_positive_definite():
+    num_paths = 2
+    length = 10
+    mean = 0
+    delta = 0.01
+    sigma = 0.2
+    seed = 123
+
+    model = Model(length, num_paths, mean, delta, sigma, seed=seed)
+    matrix = np.array([[2, 1], [1, 2]])  # Positive definite matrix
+    res = model.create_corr_returns(matrix)
+
+    assert isinstance(res, pd.DataFrame), \
+        "Expected result to be a pandas DataFrame"
+    assert res.shape == (length, num_paths), \
+        f"Expected shape {(length, num_paths)}, but got {res.shape}"
+
+def test_create_corr_returns_non_positive_definite():
+    num_paths = 2
+    length = 10
+    mean = 0
+    delta = 0.01
+    sigma = 0.2
+    seed = 123
+
+    model = Model(length, num_paths, mean, delta, sigma, seed=seed)
+    matrix = np.array([[1, -2], [2, 1]])  # Not positive definite matrix
+
+    with pytest.raises(np.linalg.LinAlgError):
+        np.linalg.cholesky(matrix)
+
+    # Calculate the nearest positive definite matrix
+    positive_definite_matrix = nearest_positive_definite(matrix)
+    res = model.create_corr_returns(positive_definite_matrix)
+
+    assert isinstance(res, pd.DataFrame), \
+        "Expected result to be a pandas DataFrame"
+    assert res.shape == (length, num_paths), \
+        f"Expected shape {(length, num_paths)}, but got {res.shape}"
+
+
+def test_red_noise():
+    pass
+
+
+def test_white_noise():
+    num_paths = 1
+    length = 10
+    mean = 0
+    delta = 0.01
+    sigma = 0.2
+    seed = 123
+
+    model = Model(length, num_paths, mean, delta, sigma, seed=seed)
+    res = model.white_noise
+
+    assert isinstance(res, np.ndarray), \
+        "Expected result to be a numpy ndarray"
+    assert res.shape == (length, num_paths), \
+        f"Expected shape {(length, num_paths)}, but got {res.shape}"
+
+    # Validate mean
+    actual_mean = res.mean()
+    assert np.isclose(actual_mean, mean, atol=0.1), \
+        f"Expected mean close to {mean}, but got {actual_mean}"
+
+    # Validate mean setter
+    new_mean = 1
+    model.mean = new_mean
+    actual_mean = model.white_noise.mean()
+    assert np.isclose(actual_mean, new_mean, rtol=0.1), \
+        f"Expected variance close to {new_mean}, but got {actual_mean}"
+
+    # Validate variance
+    expected_variance = np.sqrt(delta) * sigma
+    actual_variance = model.white_noise.std()
+    assert np.isclose(actual_variance, expected_variance, rtol=0.2), \
+        f"Expected variance close to {expected_variance}, but got {actual_variance}"
+
+    # Validate variance setter
+    new_sigma = 1
+    model.sigma = new_sigma
+    expected_variance = np.sqrt(delta) * new_sigma
+    actual_variance = model.white_noise.std()
+    assert np.isclose(actual_variance, expected_variance, rtol=0.2), \
+        f"Expected variance close to {expected_variance}, but got {actual_variance}"
+
+    # Validate delta setter
+    new_delta = 1
+    model.delta = new_delta
+    expected_variance = np.sqrt(new_delta) * new_sigma
+    actual_variance = model.white_noise.std()
+    assert np.isclose(actual_variance, expected_variance, rtol=0.2), \
+        f"Expected variance close to {expected_variance}, but got {actual_variance}"
+
+
+def test_transform():
+    num_paths = 2
+    length = 10
+
+    model = Model(length, num_paths)
+    res = model.transform()
+    # Check if the output data is a pandas Series or DataFrame
+    assert isinstance(res, (pd.Series, pd.DataFrame)), \
+        "Not a pd.Series or pd.DataFrame."
+    # Validate that the dimensions of the data match the specified length and
+    # number of paths
+    assert pd.DataFrame(res).shape == (length, num_paths), \
+        "Shape does not match settings."
diff --git a/tests/test_decorators.py b/tests/test_decorators.py
@@ -0,0 +1,36 @@
+from synthetica.decorators import callback
+
+
+class Model:
+    def __init__(self, param):
+        self._param = param
+
+    @property
+    def param(self) -> float:
+        """Mean value"""
+        return self._param
+
+    @param.setter
+    @callback('func')
+    def param(self, value) -> float:
+        """Mean value update"""
+        if value != self._param:
+            self._param = value
+
+    @property
+    def func(self):
+        return self.param + 1
+
+
+def test_callback():
+    model = Model(1)
+    assert model.param == 1
+    assert model.func == 2
+
+    model.param = 2
+    assert model.param == 2
+    assert model.func == 3
+
+
+
+