Skip to content

Commit ce53771

Browse files
authored
Merge pull request #183 from Masmar25/Masmar25-Resampling_partial_fix
Resampling: label = left
2 parents e690ae2 + ffd9b28 commit ce53771

File tree

2 files changed

+61
-34
lines changed

2 files changed

+61
-34
lines changed

technical/util.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
defines utility functions to be used
33
"""
4-
from pandas import DataFrame, DatetimeIndex, merge, to_datetime
4+
from pandas import DataFrame, DatetimeIndex, merge, to_datetime, to_timedelta
55

66
TICKER_INTERVAL_MINUTES = {
77
"1m": 1,
@@ -45,63 +45,74 @@ def ticker_history_to_dataframe(ticker: list) -> DataFrame:
4545
return frame
4646

4747

48-
def resample_to_interval(dataframe, interval):
49-
if isinstance(interval, str):
50-
interval = TICKER_INTERVAL_MINUTES[interval]
51-
48+
def resample_to_interval(dataframe: DataFrame, interval):
5249
"""
53-
resamples the given dataframe to the desired interval.
54-
Please be aware you need to upscale this to join the results
55-
with the other dataframe
50+
Resamples the given dataframe to the desired interval.
51+
Please be aware you need to use resampled_merge to merge to another dataframe to
52+
avoid lookahead bias
5653
5754
:param dataframe: dataframe containing close/high/low/open/volume
5855
:param interval: to which ticker value in minutes would you like to resample it
5956
:return:
6057
"""
58+
if isinstance(interval, str):
59+
interval = TICKER_INTERVAL_MINUTES[interval]
6160

6261
df = dataframe.copy()
6362
df = df.set_index(DatetimeIndex(df["date"]))
6463
ohlc_dict = {"open": "first", "high": "max", "low": "min", "close": "last", "volume": "sum"}
65-
df = df.resample(str(interval) + "min", label="right").agg(ohlc_dict).dropna()
66-
df["date"] = df.index
64+
# Resample to "left" border as dates are candle open dates
65+
df = df.resample(str(interval) + "min", label="left").agg(ohlc_dict).dropna()
66+
df.reset_index(inplace=True)
6767

6868
return df
6969

7070

71-
def resampled_merge(original, resampled, fill_na=False):
71+
def resampled_merge(original: DataFrame, resampled: DataFrame, fill_na=True):
7272
"""
73-
this method merges a resampled dataset back into the orignal data set
73+
Merges a resampled dataset back into the orignal data set.
74+
Resampled candle will match OHLC only if full timespan is available in original dataframe.
7475
7576
:param original: the original non resampled dataset
7677
:param resampled: the resampled dataset
7778
:return: the merged dataset
7879
"""
7980

80-
resampled_interval = compute_interval(resampled)
81-
82-
# no point in interpolating these colums
83-
resampled = resampled.drop(columns=["date", "volume"])
84-
85-
# rename all the colums to the correct interval
86-
for header in list(resampled):
87-
# store the resampled columns in it
88-
resampled[f"resample_{resampled_interval}_{header}"] = resampled[header]
89-
90-
# drop columns which should not be joined
91-
resampled = resampled.drop(columns=["open", "high", "low", "close"])
92-
93-
resampled["date"] = resampled.index
94-
resampled.index = range(len(resampled))
95-
dataframe = merge(original, resampled, on="date", how="left")
81+
original_int = compute_interval(original)
82+
resampled_int = compute_interval(resampled)
83+
84+
if original_int < resampled_int:
85+
# Subtract "small" timeframe so merging is not delayed by 1 small candle.
86+
# Detailed explanation in https://github.com/freqtrade/freqtrade/issues/4073
87+
resampled["date_merge"] = (
88+
resampled["date"] + to_timedelta(resampled_int, "m") - to_timedelta(original_int, "m")
89+
)
90+
else:
91+
raise ValueError(
92+
"Tried to merge a faster timeframe to a slower timeframe." "Upsampling is not possible."
93+
)
94+
95+
# rename all the columns to the correct interval
96+
resampled.columns = [f"resample_{resampled_int}_{col}" for col in resampled.columns]
97+
98+
dataframe = merge(
99+
original,
100+
resampled,
101+
how="left",
102+
left_on="date",
103+
right_on=f"resample_{resampled_int}_date_merge",
104+
)
105+
dataframe = dataframe.drop(f"resample_{resampled_int}_date_merge", axis=1)
96106

97107
if fill_na:
98108
dataframe.fillna(method="ffill", inplace=True)
109+
99110
return dataframe
100111

101112

102113
def compute_interval(dataframe: DataFrame, exchange_interval=False):
103114
"""
104-
calculates the interval of the given dataframe for us
115+
Calculates the interval of the given dataframe for us
105116
:param dataframe:
106117
:param exchange_interval: should we convert the result to an exchange interval or just a number
107118
:return:

tests/test_util.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22

3+
import pandas as pd
4+
35
from technical.indicators import chaikin_money_flow
46
from technical.util import resample_to_interval, resampled_merge, ticker_history_to_dataframe
57

@@ -29,13 +31,27 @@ def test_resampled_merge(testdata_1m_btc):
2931
assert "resample_5_low" in merged
3032
assert "resample_5_high" in merged
3133

32-
assert "resample_5_date" not in merged
33-
assert "resample_5_volume" not in merged
34+
assert "resample_5_date" in merged
35+
assert "resample_5_volume" in merged
3436
# Verify the assignment goes to the correct candle
3537
# If resampling to 5m, then the resampled value needs to be on the 5m candle.
36-
assert sum(merged.loc[merged["date"] == "2017-11-14 22:54:00", "resample_5_close"].isna()) == 1
37-
assert sum(merged.loc[merged["date"] == "2017-11-14 22:55:00", "resample_5_close"].isna()) == 0
38-
assert sum(merged.loc[merged["date"] == "2017-11-14 22:56:00", "resample_5_close"].isna()) == 1
38+
date = pd.to_datetime("2017-11-14 22:45:00", utc=True)
39+
assert merged.loc[merged["date"] == "2017-11-14 22:48:00", "resample_5_date"].iloc[0] != date
40+
# The 5m candle for 22:45 is available at 22:50,
41+
# when both :49 1m and :45 5m candles close
42+
assert merged.loc[merged["date"] == "2017-11-14 22:49:00", "resample_5_date"].iloc[0] == date
43+
assert merged.loc[merged["date"] == "2017-11-14 22:50:00", "resample_5_date"].iloc[0] == date
44+
assert merged.loc[merged["date"] == "2017-11-14 22:51:00", "resample_5_date"].iloc[0] == date
45+
assert merged.loc[merged["date"] == "2017-11-14 22:52:00", "resample_5_date"].iloc[0] == date
46+
assert merged.loc[merged["date"] == "2017-11-14 22:53:00", "resample_5_date"].iloc[0] == date
47+
# The 5m candle for 22:50 is available at 22:54,
48+
# when both :54 1m and :50 5m candles close
49+
date = pd.to_datetime("2017-11-14 22:50:00", utc=True)
50+
assert merged.loc[merged["date"] == "2017-11-14 22:54:00", "resample_5_date"].iloc[0] == date
51+
assert merged.loc[merged["date"] == "2017-11-14 22:55:00", "resample_5_date"].iloc[0] == date
52+
assert merged.loc[merged["date"] == "2017-11-14 22:56:00", "resample_5_date"].iloc[0] == date
53+
assert merged.loc[merged["date"] == "2017-11-14 22:57:00", "resample_5_date"].iloc[0] == date
54+
assert merged.loc[merged["date"] == "2017-11-14 22:58:00", "resample_5_date"].iloc[0] == date
3955

4056

4157
def test_resampled_merge_contains_indicator(testdata_1m_btc):

0 commit comments

Comments
 (0)