jasonstrimpel · dstadelman · Jan 7, 2023 · Jan 7, 2023 · Jan 8, 2023 · Jan 8, 2023
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,7 @@ docs/build
 *.ipynb
 *.idea
 venv/
-debug.py
+debug.py
+
+.venv/
+.vscode/
diff --git a/README.md b/README.md
@@ -14,7 +14,6 @@ to form the data properly.
 ### Volatility estimators include: ###
 
 * Garman Klass
-* Hodges Tompkins
 * Parkinson
 * Rogers Satchell
 * Yang Zhang

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+matplotlib
+numpy
+pandas
+pytest
+scipy
+statsmodels
+yfinance
diff --git a/tests/MSFT.csv b/tests/MSFT.csv
diff --git a/tests/SPY.csv b/tests/SPY.csv
diff --git a/volatility/data.py b/volatility/data.py
@@ -17,6 +17,11 @@ def yahoo_helper(symbol, data_path, *args):
     """
 
     try:
+
+        # NOTE: one might be tempted to use Adj Close here... and that would be wrong.
+        # Adj Close could be a value outside of the low to high range, causing these 
+        # volitility estimators to make absolute nonsense. 
+
         data = pandas.read_csv(
             data_path,
             parse_dates=['Date'],
@@ -26,12 +31,14 @@ def yahoo_helper(symbol, data_path, *args):
                 'Open',
                 'High',
                 'Low',
-                'Adj Close',
+                'Close',
+                # 'Adj Close',
             ],
             *args
-        ).rename(columns={
-            'Adj Close': 'Close'
-        })
+        )
+        # .rename(columns={
+        #     'Adj Close': 'Close'
+        # })
 
     except Exception as e:
         raise e

diff --git a/volatility/models/GarmanKlass.py b/volatility/models/GarmanKlass.py
@@ -1,21 +1,35 @@
 import math
 
 import numpy as np
+import pandas as pd
 
+from volatility.overlapping_sample import get_variance_overlapping_adjustment_factor
 
-def get_estimator(price_data, window=30, trading_periods=252, clean=True):
 
-    log_hl = (price_data['High'] / price_data['Low']).apply(np.log)
-    log_co = (price_data['Close'] / price_data['Open']).apply(np.log)
+def get_estimator(price_data, window=30, trading_periods=252, clean=True, use_overlapping_adjustment_factor=True):
+
+    m = get_variance_overlapping_adjustment_factor(window, len(price_data)) if use_overlapping_adjustment_factor else 1
+
+    df = pd.DataFrame({
+        'log_hl': (np.log(price_data['High']     / price_data['Low'])) ** 2
+
+    # NOTE: (close / yesterday's close) is how the function appears in Euan Sinclair's Volatility Trading, 2nd Edition, Equation 2.15
+    # and also in the original Garman-Klass paper (equation 19a) https://www.cmegroup.com/trading/fx/files/a_estimation_of_security_price.pdf
+    # ,   'log_cX': (np.log(price_data['Close']    / price_data['Close'].shift(1))) ** 2
+    # This version of the equation seem to produce "better looking" results  using the MSFT example data: https://portfolioslab.com/tools/garman-klass
+    ,   'log_cX': (np.log(price_data['Close']    / price_data['Open'])) ** 2
+    })
+
+    df['term1'] = df['log_hl'].rolling(window=window, center=False).apply(func=lambda v: (v / 2).mean())
+    df['term2'] = df['log_cX'].rolling(window=window, center=False).apply(func=lambda v: (((2 * math.log(2)) - 1) * v).mean())
+
+    # NOTE: Sometimes `term1` - `term2` is negative - though this is not found in the literature... if this happens set to 0
+    df['term_difference'] = df['term1'] - df['term2']
+    df['term_difference'] = df['term_difference'].apply(lambda x: 0 if x < 0 else x)
+
+    df['gk'] = np.sqrt(df['term_difference']) * math.sqrt(trading_periods) * math.sqrt(m)
 
-    rs = 0.5 * log_hl**2 - (2*math.log(2)-1) * log_co**2
-
-    def f(v):
-        return (trading_periods * v.mean())**0.5
-
-    result = rs.rolling(window=window, center=False).apply(func=f)
-
     if clean:
-        return result.dropna()
+        return df['gk'].dropna()
     else:
-        return result
+        return df['gk']
diff --git a/volatility/models/HodgesTompkins.py b/volatility/models/HodgesTompkins.py
diff --git a/volatility/models/Kurtosis.py b/volatility/models/Kurtosis.py
@@ -1,7 +1,9 @@
 import numpy as np
 
 
-def get_estimator(price_data, window=30, clean=True):
+def get_estimator(price_data, window=30, clean=True, use_overlapping_adjustment_factor=True):
+
+    # use_overlapping_adjustment_factor does not make sense here... but variable has to be in the API interface
 
     log_return = (price_data['Close'] / price_data['Close'].shift(1)).apply(np.log)
 

diff --git a/volatility/models/Parkinson.py b/volatility/models/Parkinson.py
@@ -2,18 +2,22 @@
 
 import numpy as np
 
+from volatility.overlapping_sample import get_variance_overlapping_adjustment_factor
 
-def get_estimator(price_data, window=30, trading_periods=252, clean=True):
 
-    rs = (1.0 / (4.0 * math.log(2.0))) * ((price_data['High'] / price_data['Low']).apply(np.log))**2.0
+def get_estimator(price_data, window=30, trading_periods=252, clean=True, use_overlapping_adjustment_factor=True):
 
-    def f(v):
-        return (trading_periods * v.mean())**0.5
+    m = get_variance_overlapping_adjustment_factor(window, len(price_data)) if use_overlapping_adjustment_factor else 1
+
+    log_h_l_sq = np.log(price_data['High'] / price_data['Low'])**2
+
+    def f(d):
+        return math.sqrt(d.mean() / (4 * math.log(2)))
 
-    result = rs.rolling(
+    result = log_h_l_sq.rolling(
         window=window,
         center=False
-    ).apply(func=f)
+    ).apply(func=f) * math.sqrt(trading_periods) * math.sqrt(m)
 
     if clean:
         return result.dropna()

diff --git a/volatility/models/Raw.py b/volatility/models/Raw.py
@@ -2,15 +2,18 @@
 
 import numpy as np
 
+from volatility.overlapping_sample import get_variance_overlapping_adjustment_factor
 
-def get_estimator(price_data, window=30, trading_periods=252, clean=True):
-
-    log_return = (price_data['Close'] / price_data['Close'].shift(1)).apply(np.log)
+def get_estimator(price_data, window=30, trading_periods=252, clean=True, use_overlapping_adjustment_factor=True):
+
+    m = get_variance_overlapping_adjustment_factor(window, len(price_data)) if use_overlapping_adjustment_factor else 1
+
+    log_return = np.log(price_data['Close'] / price_data['Close'].shift(1))
 
     result = log_return.rolling(
         window=window,
         center=False
-    ).std() * math.sqrt(trading_periods)
+    ).std() * math.sqrt(trading_periods) * math.sqrt(m)
 
     if clean:
         return result.dropna()

diff --git a/volatility/models/RogersSatchell.py b/volatility/models/RogersSatchell.py
@@ -2,22 +2,26 @@
 
 import numpy as np
 
+from volatility.overlapping_sample import get_variance_overlapping_adjustment_factor
 
-def get_estimator(price_data, window=30, trading_periods=252, clean=True):
+def get_estimator(price_data, window=30, trading_periods=252, clean=True, use_overlapping_adjustment_factor=True):
+
+    m = get_variance_overlapping_adjustment_factor(window, len(price_data)) if use_overlapping_adjustment_factor else 1
 
-    log_ho = (price_data['High'] / price_data['Open']).apply(np.log)
-    log_lo = (price_data['Low'] / price_data['Open']).apply(np.log)
-    log_co = (price_data['Close'] / price_data['Open']).apply(np.log)
+    log_hc = np.log(price_data['High']  / price_data['Close'])
+    log_ho = np.log(price_data['High']  / price_data['Open'])
+    log_lc = np.log(price_data['Low']   / price_data['Close'])
+    log_lo = np.log(price_data['Low']   / price_data['Open'])
 
-    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
+    rs = (log_hc * log_ho) + (log_lc * log_lo)
 
     def f(v):
-        return (trading_periods * v.mean())**0.5
+        return math.sqrt(v.mean())
 
     result = rs.rolling(
         window=window,
         center=False
-    ).apply(func=f)
+    ).apply(func=f) * math.sqrt(trading_periods) * math.sqrt(m)
 
     if clean:
         return result.dropna()

diff --git a/volatility/models/Skew.py b/volatility/models/Skew.py
@@ -1,7 +1,9 @@
 import numpy as np
 
 
-def get_estimator(price_data, window=30, clean=True):
+def get_estimator(price_data, window=30, clean=True, use_overlapping_adjustment_factor=True):
+
+    # use_overlapping_adjustment_factor does not make sense here... but variable has to be in the API interface
 
     log_return = (price_data['Close'] / price_data['Close'].shift(1)).apply(np.log)
 

diff --git a/volatility/models/YangZhang.py b/volatility/models/YangZhang.py
@@ -1,39 +1,54 @@
 import math
 
 import numpy as np
+import pandas as pd
 
+from volatility.overlapping_sample import get_variance_overlapping_adjustment_factor
 
-def get_estimator(price_data, window=30, trading_periods=252, clean=True):
+def get_estimator(price_data, window=30, trading_periods=252, clean=True, use_overlapping_adjustment_factor=True):
 
-    log_ho = (price_data['High'] / price_data['Open']).apply(np.log)
-    log_lo = (price_data['Low'] / price_data['Open']).apply(np.log)
-    log_co = (price_data['Close'] / price_data['Open']).apply(np.log)
-
-    log_oc = (price_data['Open'] / price_data['Close'].shift(1)).apply(np.log)
-    log_oc_sq = log_oc**2
-
-    log_cc = (price_data['Close'] / price_data['Close'].shift(1)).apply(np.log)
-    log_cc_sq = log_cc**2
-
-    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
+    m = get_variance_overlapping_adjustment_factor(window, len(price_data)) if use_overlapping_adjustment_factor else 1
+
+    d = {}
+
+    # open
+    # NOTE: (open / yesterday's open) is how the function appears in Euan Sinclair's Volatility Trading, 2nd Edition, Equation 2.17b
+    # d['log_oo'] = np.log(price_data['Open']     / price_data['Open'].shift(1))
+    # This version of the equation makes more sense: https://portfolioslab.com/tools/yang-zhang
+    d['log_oo'] = np.log(price_data['Open']     / price_data['Close'].shift(1))
+
+    # close
+    # NOTE: (close / yesterday's close) is how the function appears in Euan Sinclair's Volatility Trading, 2nd Edition, Equation 2.17c
+    # d['log_cc'] = np.log(price_data['Close']    / price_data['Close'].shift(1))
+    # This version of the equation makes more sense: https://portfolioslab.com/tools/yang-zhang
+    d['log_cc'] = np.log(price_data['Close']    / price_data['Open'])
+
+    # Rogers and Satchell
+    log_hc = np.log(price_data['High']      / price_data['Close'])
+    log_ho = np.log(price_data['High']      / price_data['Open'])
+    log_lc = np.log(price_data['Low']       / price_data['Close'])
+    log_lo = np.log(price_data['Low']       / price_data['Open'])
 
-    close_vol = log_cc_sq.rolling(
-        window=window,
-        center=False
-    ).sum() * (1.0 / (window - 1.0))
-    open_vol = log_oc_sq.rolling(
-        window=window,
-        center=False
-    ).sum() * (1.0 / (window - 1.0))
-    window_rs = rs.rolling(
-        window=window,
-        center=False
-    ).sum() * (1.0 / (window - 1.0))
-
-    k = 0.34 / (1.34 + (window + 1) / (window - 1))
-    result = (open_vol + k * close_vol + (1 - k) * window_rs).apply(np.sqrt) * math.sqrt(trading_periods)
+    d['rs']     = (log_hc * log_ho) + (log_lc * log_lo)
+
+    df = pd.DataFrame(d)
+
+    df['log_oo_avg']        = df['log_oo'].rolling(window=window, center=False).apply(func=lambda v: v.mean())
+    df['log_oo_avg_sq']     = (df['log_oo'] - df['log_oo_avg']) ** 2
+    df['log_oo_sigma_sq']   = df['log_oo_avg_sq'].rolling(window=window, center=False).apply(func=lambda v: v.sum() / (window - 1))
+
+    df['log_cc_avg']        = df['log_cc'].rolling(window=window, center=False).apply(func=lambda v: v.mean())
+    df['log_cc_avg_sq']     = (df['log_cc'] - df['log_cc_avg']) ** 2
+    df['log_cc_sigma_sq']   = df['log_cc_avg_sq'].rolling(window=window, center=False).apply(func=lambda v: v.sum() / (window - 1))
+
+    df['rs_sigma_sq']       = df['rs'].rolling(window=window, center=False).apply(func=lambda v: v.mean())
+
+    k = 0.34 / (1.34 + ((window + 1) / (window - 1)))
+
+    df['variance']          = df['log_oo_sigma_sq'] + (k * df['log_cc_sigma_sq']) + ((1 - k) * df['rs_sigma_sq'])
+    df['vol']               = np.sqrt(df['variance']) * math.sqrt(trading_periods) * math.sqrt(m)
 
     if clean:
-        return result.dropna()
+        return df['vol'].dropna()
     else:
-        return result
+        return df['vol']
diff --git a/volatility/models/api.py b/volatility/models/api.py
@@ -1,5 +1,4 @@
 from volatility.models import GarmanKlass
-from volatility.models import HodgesTompkins
 from volatility.models import Kurtosis
 from volatility.models import Parkinson
 from volatility.models import Raw
@@ -9,11 +8,11 @@
 
 __all__ = [
     'GarmanKlass',
-    'HodgesTompkins',
     'Kurtosis',
     'Parkinson',
     'Raw',
     'RogersSatchell',
     'Skew',
     'YangZhang',
 ]
+
diff --git a/volatility/overlapping_sample.py b/volatility/overlapping_sample.py
@@ -0,0 +1,15 @@
+
+def get_variance_overlapping_adjustment_factor(window: int, len_price_data: int):
+
+    h = window
+    T = len_price_data
+    n = T - h + 1
+
+    assert h < T/2
+
+    a = (h / n)
+    b = ((h*h)-1) / (3*(n*n))
+
+    m = 1 / (1 - a + b)
+
+    return m