From ea9a9b7bde56dd14f7fad72b3bd3c27836a709b9 Mon Sep 17 00:00:00 2001
From: Joe Jevnik <joe@quantopian.com>
Date: Tue, 1 May 2018 18:46:27 -0400
Subject: [PATCH] TST: add test for beta where rhs is a dataframe

---
 empyrical/tests/test_stats.py | 80 +++++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 28 deletions(-)

diff --git a/empyrical/tests/test_stats.py b/empyrical/tests/test_stats.py
index 1b9f84b..2a7e088 100644
--- a/empyrical/tests/test_stats.py
+++ b/empyrical/tests/test_stats.py
@@ -2,7 +2,6 @@
 
 from copy import copy
 from operator import attrgetter
-import random
 from unittest import TestCase, SkipTest
 
 from parameterized import parameterized
@@ -25,6 +24,9 @@
 DECIMAL_PLACES = 8
 
 
+rand = np.random.RandomState(1337)
+
+
 class BaseTestCase(TestCase):
     def assert_indexes_match(self, result, expected):
         """
@@ -42,7 +44,6 @@ def assert_indexes_match(self, result, expected):
 
 
 class TestStats(BaseTestCase):
-
     # Simple benchmark, no drawdown
     simple_benchmark = pd.Series(
         np.array([0., 1., 0., 1., 0., 1., 0., 1., 0.]) / 100,
@@ -95,12 +96,12 @@ class TestStats(BaseTestCase):
 
     # Random noise
     noise = pd.Series(
-        [random.gauss(0, 0.001) for i in range(1000)],
+        rand.normal(0, 0.001, 1000),
         index=pd.date_range('2000-1-30', periods=1000, freq='D', tz='UTC')
     )
 
     noise_uniform = pd.Series(
-        [random.uniform(-0.01, 0.01) for i in range(1000)],
+        rand.uniform(-0.01, 0.01, 1000),
         index=pd.date_range('2000-1-30', periods=1000, freq='D', tz='UTC')
     )
 
@@ -131,11 +132,11 @@ class TestStats(BaseTestCase):
     )
 
     # Sparse noise, same as noise but with np.nan sprinkled in
-    replace_nan = random.sample(noise.index.tolist(), random.randint(1, 10))
+    replace_nan = rand.choice(noise.index.tolist(), rand.randint(1, 10))
     sparse_noise = noise.replace(replace_nan, np.nan)
 
     # Sparse flat line at 0.01
-    replace_nan = random.sample(noise.index.tolist(), random.randint(1, 10))
+    replace_nan = rand.choice(noise.index.tolist(), rand.randint(1, 10))
     sparse_flat_line_1_tz = flat_line_1_tz.replace(replace_nan, np.nan)
 
     one = [-0.00171614, 0.01322056, 0.03063862, -0.01422057, -0.00489779,
@@ -432,12 +433,12 @@ def test_sharpe_translation_1(self, returns, required_return, translation):
     def test_sharpe_noise(self, small, large):
         index = pd.date_range('2000-1-30', periods=1000, freq='D')
         smaller_normal = pd.Series(
-            [random.gauss(.01, small) for i in range(1000)],
-            index=index
+            rand.normal(.01, small, 1000),
+            index=index,
         )
         larger_normal = pd.Series(
-            [random.gauss(.01, large) for i in range(1000)],
-            index=index
+            rand.normal(.01, large, 1000),
+            index=index,
         )
         assert self.empyrical.sharpe_ratio(smaller_normal, 0.001) > \
             self.empyrical.sharpe_ratio(larger_normal, 0.001)
@@ -517,12 +518,20 @@ def test_downside_risk_trans(self, returns, required_return):
     ])
     def test_downside_risk_std(self, smaller_std, larger_std):
         less_noise = pd.Series(
-            [random.gauss(0, smaller_std) for i in range(1000)],
-            index=pd.date_range('2000-1-30', periods=1000, freq='D')
+            (
+                rand.normal(0, smaller_std, 1000)
+                if smaller_std != 0
+                else np.full(1000, 0)
+            ),
+            index=pd.date_range('2000-1-30', periods=1000, freq='D'),
         )
         more_noise = pd.Series(
-            [random.gauss(0, larger_std) for i in range(1000)],
-            index=pd.date_range('2000-1-30', periods=1000, freq='D')
+            (
+                rand.normal(0, larger_std, 1000)
+                if larger_std != 0
+                else np.full(1000, 0)
+            ),
+            index=pd.date_range('2000-1-30', periods=1000, freq='D'),
         )
         assert self.empyrical.downside_risk(less_noise) < \
             self.empyrical.downside_risk(more_noise)
@@ -580,7 +589,7 @@ def test_sortino_add_noise(self, returns, required_return):
         sr_1 = self.empyrical.sortino_ratio(returns, required_return)
         upside_values = returns[returns > required_return].index.tolist()
         # Add large losses at random upside locations
-        loss_loc = random.sample(upside_values, 2)
+        loss_loc = rand.choice(upside_values, 2)
         returns[loss_loc[0]] = -0.01
         sr_2 = self.empyrical.sortino_ratio(returns, required_return)
         returns[loss_loc[1]] = -0.01
@@ -600,7 +609,7 @@ def test_sortino_sub_noise(self, returns, required_return):
         sr_1 = self.empyrical.sortino_ratio(returns, required_return)
         downside_values = returns[returns < required_return].index.tolist()
         # Replace some values below the required return to the required return
-        loss_loc = random.sample(downside_values, 2)
+        loss_loc = rand.choice(downside_values, 2)
         returns[loss_loc[0]] = required_return
         sr_2 = self.empyrical.sortino_ratio(returns, required_return)
         returns[loss_loc[1]] = required_return
@@ -770,7 +779,7 @@ def test_alpha_beta_translation(self, mean_returns, translation):
         means = [mean_returns, .001]
         covs = [[std_returns**2, std_returns*std_bench*correlation],
                 [std_returns*std_bench*correlation, std_bench**2]]
-        (ret, bench) = np.random.multivariate_normal(means, covs, 1000).T
+        (ret, bench) = rand.multivariate_normal(means, covs, 1000).T
         returns = pd.Series(
             ret,
             index=pd.date_range('2000-1-30', periods=1000, freq='D'))
@@ -821,7 +830,7 @@ def test_alpha_beta_correlation(self, corr_less, corr_more):
         means_less = [mean_returns, mean_bench]
         covs_less = [[std_returns**2, std_returns*std_bench*corr_less],
                      [std_returns*std_bench*corr_less, std_bench**2]]
-        (ret_less, bench_less) = np.random.multivariate_normal(
+        (ret_less, bench_less) = rand.multivariate_normal(
             means_less, covs_less, 1000).T
         returns_less = pd.Series(ret_less, index=index)
         benchmark_less = pd.Series(bench_less, index=index)
@@ -829,7 +838,7 @@ def test_alpha_beta_correlation(self, corr_less, corr_more):
         means_more = [mean_returns, mean_bench]
         covs_more = [[std_returns**2, std_returns*std_bench*corr_more],
                      [std_returns*std_bench*corr_more, std_bench**2]]
-        (ret_more, bench_more) = np.random.multivariate_normal(
+        (ret_more, bench_more) = rand.multivariate_normal(
             means_more, covs_more, 1000).T
         returns_more = pd.Series(ret_more, index=index)
         benchmark_more = pd.Series(bench_more, index=index)
@@ -865,17 +874,32 @@ def test_alpha_beta_with_nan_inputs(self, returns, benchmark):
         (2 * noise, noise, 2.0),
         (noise, inv_noise, -1.0),
         (2 * noise, inv_noise, -2.0),
-        (sparse_noise*flat_line_1_tz, sparse_flat_line_1_tz, np.nan),
+        (sparse_noise * flat_line_1_tz, sparse_flat_line_1_tz, np.nan),
+        (
+            simple_benchmark + rand.normal(0, 0.001, len(simple_benchmark)),
+            pd.DataFrame({'returns': simple_benchmark}),
+            1.0,
+            2,
+        ),
     ])
-    def test_beta(self, returns, benchmark, expected):
+    def test_beta(self,
+                  returns,
+                  benchmark,
+                  expected,
+                  decimal_places=DECIMAL_PLACES):
         observed = self.empyrical.beta(returns, benchmark)
         assert_almost_equal(
             observed,
             expected,
-            DECIMAL_PLACES)
+            decimal_places,
+        )
 
         if len(returns) == len(benchmark):
             # Compare to scipy linregress
+
+            if isinstance(benchmark, pd.DataFrame):
+                benchmark = benchmark['returns']
+
             returns_arr = returns.values
             benchmark_arr = benchmark.values
             mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr)
@@ -943,7 +967,7 @@ def test_stability_of_timeseries(self, returns, expected):
         (empty_returns, np.nan),
         (one_return, 1.0),
         (mixed_returns, 0.9473684210526313),
-        (pd.Series(np.random.randn(100000)), 1.),
+        (pd.Series(rand.randn(100000)), 1.),
     ])
     def test_tail_ratio(self, returns, expected):
         assert_almost_equal(
@@ -1250,7 +1274,7 @@ def test_value_at_risk(self):
         assert_almost_equal(value_at_risk(returns, cutoff=0.3), 81.5)
 
         # Test a returns stream of 21 data points at different cutoffs.
-        returns = np.random.normal(0, 0.02, 21)
+        returns = rand.normal(0, 0.02, 21)
         for cutoff in (0, 0.0499, 0.05, 0.20, 0.999, 1):
             assert_almost_equal(
                 value_at_risk(returns, cutoff),
@@ -1262,7 +1286,7 @@ def test_conditional_value_at_risk(self):
         conditional_value_at_risk = self.empyrical.conditional_value_at_risk
 
         # A single-valued array will always just have a CVaR of its only value.
-        returns = np.random.normal(0, 0.02, 1)
+        returns = rand.normal(0, 0.02, 1)
         expected_cvar = returns[0]
         assert_almost_equal(
             conditional_value_at_risk(returns, cutoff=0), expected_cvar,
@@ -1272,7 +1296,7 @@ def test_conditional_value_at_risk(self):
         )
 
         # Test a returns stream of 21 data points at different cutoffs.
-        returns = np.random.normal(0, 0.02, 21)
+        returns = rand.normal(0, 0.02, 21)
 
         for cutoff in (0, 0.0499, 0.05, 0.20, 0.999, 1):
             # Find the VaR based on our cutoff, then take the average of all
@@ -1360,11 +1384,11 @@ def setUp(self):
         self.window = 12
 
         self.returns = pd.Series(
-            np.random.randn(1, 120)[0]/100.,
+            rand.randn(1, 120)[0]/100.,
             index=pd.date_range('2000-1-30', periods=120, freq='M'))
 
         self.factor_returns = pd.Series(
-            np.random.randn(1, 120)[0]/100.,
+            rand.randn(1, 120)[0]/100.,
             index=pd.date_range('2000-1-30', periods=120, freq='M'))
 
     def test_roll_pandas(self):