|
| 1 | + |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +# In[3]: |
| 5 | + |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | + |
| 9 | + |
| 10 | +# In[4]: |
| 11 | + |
| 12 | + |
| 13 | +get_ipython().run_line_magic('matplotlib', 'inline') |
| 14 | + |
| 15 | + |
| 16 | +# In[5]: |
| 17 | + |
| 18 | + |
| 19 | +{"date":1483920000,"high":916.41833046,"low":880,"open":916.41832969,"close":902,"volume":1561326.5181909,"quoteVolume":1743.42476903,"weightedAverage":895.55141462} |
| 20 | + |
| 21 | + |
| 22 | +# In[8]: |
| 23 | + |
| 24 | + |
| 25 | +def CryptoData(symbol, frequency): |
| 26 | + #Params: String symbol, int frequency = 300,900,1800,7200,14400,86400 |
| 27 | + #Returns: df from first available date |
| 28 | + url ='https://poloniex.com/public?command=returnChartData¤cyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0' |
| 29 | + df = pd.read_json(url) |
| 30 | + df.set_index('date',inplace=True) |
| 31 | + return df |
| 32 | + |
| 33 | + |
| 34 | +# In[10]: |
| 35 | + |
| 36 | + |
| 37 | +df = CryptoData('USDT_BTC', 86400)['close'] |
| 38 | + |
| 39 | + |
| 40 | +# In[17]: |
| 41 | + |
| 42 | + |
| 43 | +df.head() |
| 44 | + |
| 45 | + |
| 46 | +# In[15]: |
| 47 | + |
| 48 | + |
| 49 | +df.plot() |
| 50 | + |
| 51 | + |
| 52 | +# In[16]: |
| 53 | + |
| 54 | + |
| 55 | +df.pct_change().describe() |
| 56 | + |
| 57 | + |
| 58 | +# In[18]: |
| 59 | + |
| 60 | + |
| 61 | +df.pct_change().hist(bins=100) |
| 62 | + |
| 63 | + |
| 64 | +# In[19]: |
| 65 | + |
| 66 | + |
| 67 | +def CryptoDataCSV(symbol, frequency): |
| 68 | + #Params: String symbol, int frequency = 300,900,1800,7200,14400,86400 |
| 69 | + #Returns: df from first available date |
| 70 | + url ='https://poloniex.com/public?command=returnChartData¤cyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0' |
| 71 | + df = pd.read_json(url) |
| 72 | + df.set_index('date',inplace=True) |
| 73 | + df.to_csv(symbol + '.csv') |
| 74 | + print('Processed: ' + symbol) |
| 75 | + |
| 76 | + |
| 77 | +# In[20]: |
| 78 | + |
| 79 | + |
| 80 | +import pandas as pd |
| 81 | +import matplotlib.pyplot as plt |
| 82 | +get_ipython().run_line_magic('matplotlib', 'inline') |
| 83 | + |
| 84 | + |
| 85 | +# In[22]: |
| 86 | + |
| 87 | + |
| 88 | +tickers = ['USDT_BTC','USDT_BCH','USDT_ETC','USDT_XMR','USDT_ETH','USDT_DASH', |
| 89 | + 'USDT_XRP','USDT_LTC','USDT_NXT','USDT_STR','USDT_REP','USDT_ZEC'] |
| 90 | + |
| 91 | + |
| 92 | +# In[25]: |
| 93 | + |
| 94 | + |
| 95 | +# CryptoDataCSV() |
| 96 | +for ticker in tickers: |
| 97 | + CryptoDataCSV(ticker, 86400) |
| 98 | + |
| 99 | + |
| 100 | +# In[26]: |
| 101 | + |
| 102 | + |
| 103 | +tickers = ['USDT_BTC','USDT_ETC','USDT_XMR','USDT_ETH','USDT_DASH', |
| 104 | + 'USDT_XRP','USDT_LTC','USDT_NXT','USDT_STR','USDT_REP','USDT_ZEC'] |
| 105 | + |
| 106 | + |
| 107 | +# In[27]: |
| 108 | + |
| 109 | + |
| 110 | +crypto_df = pd.DataFrame() |
| 111 | +for ticker in tickers: |
| 112 | + crypto_df[ticker] = pd.read_csv(ticker+'.csv', index_col = 'date')['close'] |
| 113 | +crypto_df.dropna(inplace=True) |
| 114 | + |
| 115 | + |
| 116 | +# In[28]: |
| 117 | + |
| 118 | + |
| 119 | +crypto_df.head() |
| 120 | + |
| 121 | + |
| 122 | +# In[30]: |
| 123 | + |
| 124 | + |
| 125 | +crypto_df_norm = crypto_df.divide(crypto_df.iloc[0]) |
| 126 | + |
| 127 | + |
| 128 | +# In[31]: |
| 129 | + |
| 130 | + |
| 131 | +crypto_df_norm.plot() |
| 132 | + |
| 133 | + |
| 134 | +# In[32]: |
| 135 | + |
| 136 | + |
| 137 | +crypto_df_pct = crypto_df.pct_change().dropna() |
| 138 | + |
| 139 | + |
| 140 | +# In[34]: |
| 141 | + |
| 142 | + |
| 143 | +corr = crypto_df_pct.corr() |
| 144 | + |
| 145 | + |
| 146 | +# In[37]: |
| 147 | + |
| 148 | + |
| 149 | +import seaborn as sns |
| 150 | +sns.heatmap(corr, |
| 151 | + xticklabels=corr.columns.values, |
| 152 | + yticklabels=corr.columns.values) |
| 153 | + |
| 154 | + |
| 155 | +# In[38]: |
| 156 | + |
| 157 | + |
| 158 | +corr['USDT_XMR']['USDT_DASH'] |
| 159 | + |
| 160 | + |
| 161 | +# In[39]: |
| 162 | + |
| 163 | + |
| 164 | +plt.scatter(crypto_df_pct['USDT_DASH'],crypto_df_pct['USDT_XMR']) |
| 165 | +plt.xlabel('USDT_DASH % Return') |
| 166 | +plt.ylabel('USDT_XMR % Return') |
| 167 | + |
| 168 | + |
| 169 | +# In[40]: |
| 170 | + |
| 171 | + |
| 172 | +import statsmodels.api as sm |
| 173 | +model = sm.OLS(crypto_df_pct['USDT_XMR'], |
| 174 | + crypto_df_pct['USDT_DASH']).fit() |
| 175 | +model.summary() |
| 176 | + |
| 177 | + |
| 178 | +# In[43]: |
| 179 | + |
| 180 | + |
| 181 | +'XMR % ret = DASH % ret * 0.6451' |
| 182 | + |
| 183 | + |
| 184 | +# In[42]: |
| 185 | + |
| 186 | + |
| 187 | +line=[model.params[0]*i for i in crypto_df_pct['USDT_DASH'].values] |
| 188 | +plt.plot(crypto_df_pct['USDT_DASH'], line, c = 'r') |
| 189 | +plt.scatter(crypto_df_pct['USDT_DASH'],crypto_df_pct['USDT_XMR']) |
| 190 | +plt.xlabel('USDT_DASH % Return') |
| 191 | +plt.ylabel('USDT_XMR % Return') |
| 192 | + |
| 193 | + |
| 194 | +# In[44]: |
| 195 | + |
| 196 | + |
| 197 | +def CryptoData(symbol, frequency): |
| 198 | + #Params: String symbol, int frequency = 300,900,1800,7200,14400,86400 |
| 199 | + #Returns: df from first available date |
| 200 | + url ='https://poloniex.com/public?command=returnChartData¤cyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0' |
| 201 | + df = pd.read_json(url) |
| 202 | + df.set_index('date',inplace=True) |
| 203 | + return df |
| 204 | + |
| 205 | + |
| 206 | +# In[45]: |
| 207 | + |
| 208 | + |
| 209 | +import pandas as pd |
| 210 | +import numpy as np |
| 211 | +import matplotlib.pyplot as plt |
| 212 | +import seaborn as sns |
| 213 | +get_ipython().run_line_magic('matplotlib', 'inline') |
| 214 | + |
| 215 | + |
| 216 | +# In[46]: |
| 217 | + |
| 218 | + |
| 219 | +df = CryptoData(symbol = 'BTC_LTC', frequency = 300) |
| 220 | + |
| 221 | + |
| 222 | +# In[47]: |
| 223 | + |
| 224 | + |
| 225 | +df['SMA_1000'] = df['close'].rolling(1000).mean() |
| 226 | +df['SMA_5000'] = df['close'].rolling(5000).mean() |
| 227 | +df[['close','SMA_1000','SMA_5000']][270000:].plot(figsize = (16,10)) |
| 228 | + |
| 229 | + |
| 230 | +# In[48]: |
| 231 | + |
| 232 | + |
| 233 | +def test_ma(df, lead, lag, pc_thresh = 0.025): |
| 234 | + ma_df = df.copy() |
| 235 | + ma_df['lead'] = ma_df['close'].rolling(lead).mean() |
| 236 | + ma_df['lag'] = ma_df['close'].rolling(lag).mean() |
| 237 | + ma_df.dropna(inplace = True) |
| 238 | + ma_df['lead-lag'] = ma_df['lead'] - ma_df['lag'] |
| 239 | + ma_df['pc_diff'] = ma_df['lead-lag'] / ma_df['close'] |
| 240 | + ma_df['regime'] = np.where(ma_df['pc_diff'] > pc_thresh, 1, 0) |
| 241 | + ma_df['regime'] = np.where(ma_df['pc_diff'] < -pc_thresh, -1, ma_df['regime']) |
| 242 | + ma_df['Market'] = np.log(ma_df['close'] / ma_df['close'].shift(1)) |
| 243 | + ma_df['Strategy'] = ma_df['regime'].shift(1) * ma_df['Market'] |
| 244 | + ma_df[['Market','Strategy']] = ma_df[['Market','Strategy']].cumsum().apply(np.exp) |
| 245 | + return ma_df |
| 246 | + |
| 247 | + |
| 248 | +# In[49]: |
| 249 | + |
| 250 | + |
| 251 | +ma_df = test_ma(df, 1000, 5000).dropna() |
| 252 | + |
| 253 | + |
| 254 | +# In[50]: |
| 255 | + |
| 256 | + |
| 257 | +ma_df['regime'].plot(figsize=(16,5)) |
| 258 | + |
| 259 | + |
| 260 | +# In[ ]: |
| 261 | + |
| 262 | + |
| 263 | +ma_df[['Market','Strategy']].iloc[-1] |
| 264 | + |
| 265 | + |
| 266 | +# In[ ]: |
| 267 | + |
| 268 | + |
| 269 | +'''Market 0.422360 |
| 270 | +Strategy 10.384434 |
| 271 | +Name: 2017-10-11 13:10:00, dtype: float64''' |
| 272 | + |
| 273 | + |
| 274 | +# In[51]: |
| 275 | + |
| 276 | + |
| 277 | +ma_df[['Market','Strategy']][200000:].plot(figsize = (16,10)) |
| 278 | + |
| 279 | + |
| 280 | +# In[52]: |
| 281 | + |
| 282 | + |
| 283 | +leads = np.arange(100, 4100, 100) |
| 284 | +lags = np.arange(4100, 8100, 100) |
| 285 | +lead_lags = [[lead,lag] for lead in leads for lag in lags] |
| 286 | +pnls = pd.DataFrame(index=lags,columns = leads) |
| 287 | + |
| 288 | + |
| 289 | +# In[ ]: |
| 290 | + |
| 291 | + |
| 292 | +for lead, lag in lead_lags: |
| 293 | + pnls[lead][lag] = test_ma(df, lead, lag)['Strategy'][-1] |
| 294 | + print(lead,lag,pnls[lead][lag]) |
| 295 | + |
| 296 | + |
| 297 | +# In[ ]: |
| 298 | + |
| 299 | + |
| 300 | +PNLs = pnls[pnls.columns].astype(float) |
| 301 | +plt.subplots(figsize = (14,10)) |
| 302 | +'''sns.heatmap(PNLs,cmap=’PiYG’)''' |
| 303 | + |
| 304 | + |
| 305 | +# In[ ]: |
| 306 | + |
| 307 | + |
| 308 | +PNLs.max() |
| 309 | + |
| 310 | + |
| 311 | +# In[ ]: |
| 312 | + |
| 313 | + |
| 314 | +PNLs[900][6600] |
| 315 | + |
| 316 | + |
| 317 | +# In[ ]: |
| 318 | + |
| 319 | + |
| 320 | +'''Transaction Costs |
| 321 | +
|
| 322 | +Commissions. We have assumed no transaction costs, even though typical exchanges charge 25 basis point (bps) per dollar transacted. This would have negative impact on PnL. |
| 323 | +
|
| 324 | +Shorting. We assume that we can openly short a cryptocurrency pair and that we pay no fees for holding short positions. In reality, some exchanges do not support shorting and if they do, other fees are associated with such transactions. |
| 325 | +
|
| 326 | +Slippage. Another assumption is that we can always get filled on the close price. Given how ‘thin’ some crypto pairs books are, other things being equal, we will get filled at progressively worse prices as our positions grow in size. In addition, as other traders may use similar signals, it will only increase the chances that the price may “run away” from us as we try and get a fill. |
| 327 | +
|
| 328 | +Market Impact. In our backtest, we assume that our trades have no impact on subsequent market dynamics. In reality, market can react positively or negatively to a trade. Backtesting market impact creates a never ending spiral of complexity, as it depends, upon other things on liquidity, number of market participants and different states of the market. |
| 329 | +
|
| 330 | +Biases |
| 331 | +
|
| 332 | +Overfitting. When we optimised for the best possible combination of leading and lagging look-back periods, we have taken the available historical data and threw a bunch of numbers at it to see what sticks. Whilst we did find a pattern that suggested that best PnLs are the ones whose lead / lag ratio is around 1/8, we ultimately did that on historical data and there is no guarantee that the same results would hold for live performance. In order to overcome this phenomenon, we could split our data into two sets — the one we find the best parameters on and the one we test these parameters on. If the test PnL holds up, it is safe to assume that the parameters are significant. There is a whole study in Statistics dedicated primarily to mitigation of overfitting. |
| 333 | +
|
| 334 | +Exchange Risk |
| 335 | +
|
| 336 | +Last but definitely not least, it is almost impossible to model exchange risk. Historically, a large portion of exchanges get hacked or otherwise compromised. Finding trustworthy exchanges requires further research.''' |
| 337 | + |
0 commit comments