diff --git a/feature_sources/__init__.py b/feature_sources/__init__.py index face94c..93fec1b 100644 --- a/feature_sources/__init__.py +++ b/feature_sources/__init__.py @@ -38,5 +38,16 @@ KrakenKlineFeatureSource, KrakenKlineField, ) +from .lunarcrush_time_series_feature_source import ( + LunarCrushMetric, + LunarCrushTimeSeriesTopic, + LunarCrushTimeSeriesCategory, + LunarCrushTimeSeriesCoin, + LunarCrushTimeSeriesStock, +) from .temporal_feature_source import TemporalFeatureSource from .test_feature_source import TestFeatureSource +from .yahoo_finance_kline_feature_source import ( + YahooFinanceKlineField, + YahooFinanceKlineFeatureSource, +) diff --git a/feature_sources/binance_kline_feature_source.py b/feature_sources/binance_kline_feature_source.py index d9b5d68..e562e1c 100644 --- a/feature_sources/binance_kline_feature_source.py +++ b/feature_sources/binance_kline_feature_source.py @@ -12,6 +12,7 @@ import statistics import time from time_util import current_interval_ms, time_span_ms +from urllib.parse import urlencode class BinanceKlineField(IntEnum): @@ -36,7 +37,8 @@ class BinanceKlineFeatureSource(FeatureSource): _QUERY_LIMIT = 1000 - # Must be in ascending order + _URL = "https://api.binance.com/api/v3/klines" + _INTERVALS = { time_span_ms(minutes=1): "1m", time_span_ms(minutes=3): "3m", @@ -94,11 +96,16 @@ def __init__( self.VALID_FEATURE_IDS = feature_ids super().__init__(feature_ids, feature_dtypes, default_dtype) - self._symbol = symbol + query_parameters = { + "symbol": symbol, + "interval": query_interval, + "limit": self._QUERY_LIMIT, + } + self._source_interval_ms = source_interval_ms - self._query_interval = query_interval self._feature_mappings = feature_mappings self._fields = list(feature_mappings.values()) + self._query_parameters = query_parameters self._retries = retries self._logger = getLogger(self.__class__.__name__) @@ -112,10 +119,10 @@ def _convert_samples(self, data_rows: list[list]) -> None: for row in data_rows: self._convert_sample(row) - # TODO: Examine moving this functionality into the FeatureSource base class + # TODO: Examine moving this into FeatureSource base class def _compact_samples(self, samples: list[list]) -> list: result = samples[-1].copy() - for field in BinanceKlineField: + for field in self._fields: compaction = self._FIELD_COMPACTIONS.get(field, FeatureCompaction.LAST) if compaction == FeatureCompaction.LAST: continue @@ -148,28 +155,37 @@ def get_feature_samples( _OPEN_TIME = BinanceKlineField.OPEN_TIME # Binance uses open time for queries - open_start_time_ms = start_time_ms - interval_ms + query_start_time_ms = start_time_ms - interval_ms # Align on interval so queries for 1 sample include at least 1 sample - open_start_time_ms = current_interval_ms( - open_start_time_ms, self._source_interval_ms + query_start_time_ms = current_interval_ms( + query_start_time_ms, self._source_interval_ms ) - open_end_time_ms = start_time_ms + (interval_ms * (sample_count - 2)) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 1)) + query_parameters = self._query_parameters.copy() data_rows = [] retries = self._retries # Loop for pagination - while True: - url = ( - "https://api.binance.com/api/v3/klines" - f"?symbol={self._symbol}&interval={self._query_interval}" - f"&startTime={open_start_time_ms}&endTime={open_end_time_ms}" - f"&limit={self._QUERY_LIMIT}" + while query_start_time_ms < end_time_ms: + page_sample_count = ( + end_time_ms - query_start_time_ms + ) / self._source_interval_ms + page_sample_count = int(min(page_sample_count, self._QUERY_LIMIT)) + + if page_sample_count < 1: + break + + query_end_time_ms = query_start_time_ms + ( + self._source_interval_ms * (page_sample_count - 1) ) + query_parameters["startTime"] = str(query_start_time_ms) + query_parameters["endTime"] = str(query_end_time_ms) + url = self._URL + "?" + urlencode(query_parameters) + success = False - response_row_count = 0 # Loop for retries while True: try: @@ -189,7 +205,6 @@ def get_feature_samples( ) else: response_rows = response.json() - response_row_count = len(response_rows) data_rows.extend(response_rows) success = True @@ -208,11 +223,9 @@ def get_feature_samples( retries -= 1 time.sleep(self.RETRY_DELAY) - if response_row_count != self._QUERY_LIMIT: - break - - open_start_time_ms = data_rows[-1][_OPEN_TIME] + self._source_interval_ms + query_start_time_ms = query_end_time_ms + self._source_interval_ms + # TODO: Examine moving the rest of this function into FeatureSource base class row_count = len(data_rows) if row_count == 0: raise RuntimeError("No samples received.") diff --git a/feature_sources/bybit_kline_feature_source.py b/feature_sources/bybit_kline_feature_source.py index f303b0e..ff416fb 100644 --- a/feature_sources/bybit_kline_feature_source.py +++ b/feature_sources/bybit_kline_feature_source.py @@ -11,6 +11,7 @@ import statistics import time from time_util import current_interval_ms, time_span_ms +from urllib.parse import urlencode class BybitKlineField(IntEnum): @@ -31,7 +32,8 @@ class BybitKlineFeatureSource(FeatureSource): _QUERY_LIMIT = 1000 - # Must be in ascending order + _URL = "https://api.bybit.com/v5/market/kline" + _INTERVALS = { time_span_ms(minutes=1): "1", time_span_ms(minutes=3): "3", @@ -84,12 +86,17 @@ def __init__( self.VALID_FEATURE_IDS = feature_ids super().__init__(feature_ids, feature_dtypes, default_dtype) - self._category = category - self._symbol = symbol + query_parameters = { + "category": category, + "symbol": symbol, + "interval": query_interval, + "limit": self._QUERY_LIMIT, + } + self._source_interval_ms = source_interval_ms - self._query_interval = query_interval self._feature_mappings = feature_mappings self._fields = list(feature_mappings.values()) + self._query_parameters = query_parameters self._retries = retries self._logger = getLogger(self.__class__.__name__) @@ -107,7 +114,7 @@ def _convert_samples(self, data_rows: list[list]) -> None: def _compact_samples(self, samples: list[list]) -> list: result = samples[-1].copy() - for field in BybitKlineField: + for field in self._fields: compaction = self._FIELD_COMPACTIONS.get(field, FeatureCompaction.LAST) if compaction == FeatureCompaction.LAST: continue @@ -140,30 +147,36 @@ def get_feature_samples( _OPEN_TIME = BybitKlineField.OPEN_TIME # Bybit uses open time for queries - open_start_time_ms = start_time_ms - interval_ms + query_start_time_ms = start_time_ms - interval_ms # Align on interval so queries for 1 sample include at least 1 sample - open_start_time_ms = current_interval_ms( - open_start_time_ms, self._source_interval_ms + query_start_time_ms = current_interval_ms( + query_start_time_ms, self._source_interval_ms ) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 1)) + + query_parameters = self._query_parameters.copy() data_rows = [] retries = self._retries - samples_left = sample_count # Loop for pagination - while True: - page_sample_count = min(samples_left, self._QUERY_LIMIT) - open_end_time_ms = open_start_time_ms + ( - interval_ms * (page_sample_count - 1) - ) + while query_start_time_ms < end_time_ms: + page_sample_count = ( + end_time_ms - query_start_time_ms + ) / self._source_interval_ms + page_sample_count = int(min(page_sample_count, self._QUERY_LIMIT)) - url = ( - "https://api.bybit.com/v5/market/kline" - f"?category={self._category}&symbol={self._symbol}" - f"&start={open_start_time_ms}&end={open_end_time_ms}" - f"&interval={self._query_interval}&limit={self._QUERY_LIMIT}" + if page_sample_count < 1: + break + + query_end_time_ms = query_start_time_ms + ( + self._source_interval_ms * (page_sample_count - 1) ) + query_parameters["start"] = str(query_start_time_ms) + query_parameters["end"] = str(query_end_time_ms) + url = self._URL + "?" + urlencode(query_parameters) + success = False # Loop for retries while True: @@ -200,13 +213,7 @@ def get_feature_samples( retries -= 1 time.sleep(self.RETRY_DELAY) - samples_left -= page_sample_count - if samples_left <= 0: - break - - open_start_time_ms = ( - int(data_rows[-1][_OPEN_TIME]) + self._source_interval_ms - ) + query_start_time_ms = query_end_time_ms + self._source_interval_ms row_count = len(data_rows) if row_count == 0: diff --git a/feature_sources/coin_metrics_feature_source.py b/feature_sources/coin_metrics_feature_source.py index 67da835..d833b9c 100644 --- a/feature_sources/coin_metrics_feature_source.py +++ b/feature_sources/coin_metrics_feature_source.py @@ -153,14 +153,13 @@ def _convert_metric(self, metric: str, value): value = float(value) return value - def _convert_sample(self, sample: dict) -> dict: - results = {} + def _convert_sample(self, sample: dict) -> None: for metric in self._convert_metrics: - results[metric] = self._convert_metric(metric, sample[metric]) - return results + sample[metric.value] = self._convert_metric(metric, sample[metric]) - def _convert_samples(self, data_rows: list[dict]) -> list[dict]: - return [self._convert_sample(row) for row in data_rows] + def _convert_samples(self, data_rows: list[dict]) -> None: + for row in data_rows: + self._convert_sample(row) def _compact_samples(self, samples: list[dict]) -> dict: result = samples[-1].copy() @@ -216,19 +215,22 @@ def get_feature_samples( interval_ms: int, sample_count: int, ) -> dict[FeatureID, ndarray]: - query_start_time_ms = start_time_ms + _OPEN_TIME = CoinMetric.TIME + + # Coin Metrics uses open time for queries + query_start_time_ms = start_time_ms - interval_ms # Align on interval so queries for 1 sample include at least 1 sample query_start_time_ms = current_interval_ms( query_start_time_ms, self._source_interval_ms ) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 1)) + # Times must be preformatted because Coin Metrics rejects times with # the ISO timezone suffix for UTC ("+00:00") and their Python # library doesn't format it for their preference start_time = datetime.fromtimestamp_ms(query_start_time_ms) - # TODO: Subtract 1 from sample_count? - end_time_ms = start_time_ms + (interval_ms * sample_count) end_time = datetime.fromtimestamp_ms(end_time_ms) start_time_string = start_time.to_iso8601_string() end_time_string = end_time.to_iso8601_string() @@ -240,9 +242,7 @@ def get_feature_samples( if row_count == 0: raise RuntimeError("No samples received.") - # TODO: Change to inplace conversion? - converted_samples = self._convert_samples(data_rows) - + self._convert_samples(data_rows) feature_samples = self._create_feature_samples(sample_count) sample_time_ms = start_time_ms @@ -252,8 +252,8 @@ def get_feature_samples( compact_samples = self._compact_samples for sample_index in range(sample_count): while True: - row = converted_samples[row_index] - row_time_ms = row[CoinMetric.TIME] + row = data_rows[row_index] + row_time_ms = row[_OPEN_TIME] + self._source_interval_ms if row_time_ms > sample_time_ms: break interval_rows.append(row) diff --git a/feature_sources/coinbase_kline_feature_source.py b/feature_sources/coinbase_kline_feature_source.py index 4714188..ea13853 100644 --- a/feature_sources/coinbase_kline_feature_source.py +++ b/feature_sources/coinbase_kline_feature_source.py @@ -12,6 +12,7 @@ import statistics import time from time_util import current_interval_ms, time_span_ms +from urllib.parse import quote, urlencode class CoinbaseKlineField(IntEnum): @@ -60,17 +61,23 @@ def __init__( ): if source_interval_ms not in self._INTERVALS: raise ValueError(f"interval_ms {source_interval_ms} is not supported.") - query_interval = int(source_interval_ms / time_span_ms(seconds=1)) + query_interval = str(int(source_interval_ms / time_span_ms(seconds=1))) feature_ids = list(feature_mappings.keys()) self.VALID_FEATURE_IDS = feature_ids super().__init__(feature_ids, feature_dtypes, default_dtype) - self._symbol = symbol + query_parameters = { + "granularity": query_interval, + } + + symbol = quote(symbol, safe="") + self._source_interval_ms = source_interval_ms - self._query_interval = query_interval self._feature_mappings = feature_mappings self._fields = list(feature_mappings.values()) + self._url = f"https://api.exchange.coinbase.com/products/{symbol}/candles" + self._query_parameters = query_parameters self._retries = retries self._logger = getLogger(self.__class__.__name__) @@ -83,7 +90,7 @@ def _convert_samples(self, data_rows: list[list]) -> None: def _compact_samples(self, samples: list[list]) -> list: result = samples[-1].copy() - for field in CoinbaseKlineField: + for field in self._fields: compaction = self._FIELD_COMPACTIONS.get(field, FeatureCompaction.LAST) if compaction == FeatureCompaction.LAST: continue @@ -116,32 +123,39 @@ def get_feature_samples( _OPEN_TIME = CoinbaseKlineField.OPEN_TIME # Coinbase uses open time for queries - open_start_time_ms = start_time_ms - interval_ms + query_start_time_ms = start_time_ms - interval_ms # Align on interval so queries for 1 sample include at least 1 sample - open_start_time_ms = current_interval_ms( - open_start_time_ms, self._source_interval_ms + query_start_time_ms = current_interval_ms( + query_start_time_ms, self._source_interval_ms ) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 1)) + + query_parameters = self._query_parameters.copy() data_rows = [] retries = self._retries - samples_left = sample_count # Loop for pagination - while True: - page_sample_count = min(samples_left, self._QUERY_LIMIT) - open_end_time_ms = open_start_time_ms + ( - interval_ms * (page_sample_count - 1) - ) + while query_start_time_ms < end_time_ms: + page_sample_count = ( + end_time_ms - query_start_time_ms + ) / self._source_interval_ms + page_sample_count = int(min(page_sample_count, self._QUERY_LIMIT)) - query_start_time = int(open_start_time_ms / time_span_ms(seconds=1)) - query_end_time = int(open_end_time_ms / time_span_ms(seconds=1)) + if page_sample_count < 1: + break - url = ( - f"https://api.exchange.coinbase.com/products/{self._symbol}/candles" - f"?granularity={self._query_interval}&start={query_start_time}" - f"&end={query_end_time}" + query_end_time_ms = query_start_time_ms + ( + self._source_interval_ms * (page_sample_count - 1) ) + query_start_time = int(query_start_time_ms / time_span_ms(seconds=1)) + query_end_time = int(query_end_time_ms / time_span_ms(seconds=1)) + + query_parameters["start"] = str(query_start_time) + query_parameters["end"] = str(query_end_time) + url = self._url + "?" + urlencode(query_parameters) + success = False # Loop for retries while True: @@ -177,13 +191,7 @@ def get_feature_samples( retries -= 1 time.sleep(self.RETRY_DELAY) - samples_left -= page_sample_count - if samples_left <= 0: - break - - open_start_time_ms = ( - data_rows[-1][_OPEN_TIME] * time_span_ms(seconds=1) - ) + self._source_interval_ms + query_start_time_ms = query_end_time_ms + self._source_interval_ms row_count = len(data_rows) if row_count == 0: diff --git a/feature_sources/kraken_kline_feature_source.py b/feature_sources/kraken_kline_feature_source.py index eb14e20..d2a8abf 100644 --- a/feature_sources/kraken_kline_feature_source.py +++ b/feature_sources/kraken_kline_feature_source.py @@ -11,6 +11,7 @@ import statistics import time from time_util import current_interval_ms, time_span_ms +from urllib.parse import urlencode class KrakenKlineField(IntEnum): @@ -34,7 +35,8 @@ class KrakenKlineFeatureSource(FeatureSource): _QUERY_LIMIT = 720 - # Must be in ascending order + _URL = "https://api.kraken.com/0/public/OHLC" + _INTERVALS = [ time_span_ms(minutes=1), time_span_ms(minutes=5), @@ -83,11 +85,16 @@ def __init__( self.VALID_FEATURE_IDS = feature_ids super().__init__(feature_ids, feature_dtypes, default_dtype) + query_parameters = { + "pair": symbol, + "interval": query_interval, + } + self._symbol = symbol self._source_interval_ms = source_interval_ms - self._query_interval = query_interval self._feature_mappings = feature_mappings self._fields = list(feature_mappings.values()) + self._query_parameters = query_parameters self._retries = retries self._logger = getLogger(self.__class__.__name__) @@ -106,7 +113,7 @@ def _convert_samples(self, data_rows: list[list]) -> None: def _compact_samples(self, samples: list[list]) -> list: result = samples[-1].copy() - for field in KrakenKlineField: + for field in self._fields: compaction = self._FIELD_COMPACTIONS.get(field, FeatureCompaction.LAST) if compaction == FeatureCompaction.LAST: continue @@ -138,34 +145,40 @@ def get_feature_samples( ) -> dict[FeatureID, ndarray]: _OPEN_TIME = KrakenKlineField.OPEN_TIME - if sample_count > self._QUERY_LIMIT: - raise ValueError( - f"sample_count {interval_ms} is greater than the " - f"maximum of {self._QUERY_LIMIT}." - ) - # Kraken uses open time for queries - open_start_time_ms = start_time_ms - interval_ms + query_start_time_ms = start_time_ms - interval_ms # Align on interval so queries for 1 sample include at least 1 sample - open_start_time_ms = current_interval_ms( - open_start_time_ms, self._source_interval_ms + query_start_time_ms = current_interval_ms( + query_start_time_ms, self._source_interval_ms ) - open_end_time_ms = open_start_time_ms + (interval_ms * (sample_count - 2)) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 1)) - query_since = int(open_start_time_ms / time_span_ms(seconds=1)) + page_sample_count = ( + end_time_ms - query_start_time_ms + ) / self._source_interval_ms + page_sample_count = int(min(page_sample_count, self._QUERY_LIMIT)) - url = ( - "https://api.kraken.com/0/public/OHLC" - f"?pair={self._symbol}&interval={self._query_interval}&since={query_since}" - ) + if page_sample_count > self._QUERY_LIMIT: + raise ValueError( + f"sample_count {sample_count} at interval_ms {interval_ms} with " + f"source_interval_ms {self._source_interval_ms} requires " + f"{page_sample_count} samples, which is greater than the " + f"maximum query limit of {self._QUERY_LIMIT}." + ) + + query_since = int(query_start_time_ms / time_span_ms(seconds=1)) + + query_parameters = self._query_parameters.copy() + query_parameters["since"] = query_since + url = self._URL + "?" + urlencode(query_parameters) data_rows = [] retries = self._retries success = False # Loop for retries - while True: + while page_sample_count > 0: try: response = requests.get(url) @@ -201,7 +214,7 @@ def get_feature_samples( first_row = data_rows[0] first_open_time_ms = first_row[_OPEN_TIME] * time_span_ms(seconds=1) - if first_open_time_ms > open_end_time_ms: + if first_open_time_ms > end_time_ms: raise RuntimeError("Requested timeframe is too far in the past.") self._convert_samples(data_rows) diff --git a/feature_sources/lunarcrush_time_series_feature_source.py b/feature_sources/lunarcrush_time_series_feature_source.py new file mode 100644 index 0000000..5bbf552 --- /dev/null +++ b/feature_sources/lunarcrush_time_series_feature_source.py @@ -0,0 +1,370 @@ +# developer: taoshi-mbrown +# Copyright © 2024 Taoshi Inc +from enum import Enum +from features import FeatureCompaction, FeatureID, FeatureSource +from http import HTTPStatus +from json import JSONDecodeError +from logging import getLogger +import math +import numpy as np +from numpy import ndarray +import os +import requests +import statistics +import time +from time_util import current_interval_ms, time_span_ms +import urllib.parse + + +class LunarCrushMetric(str, Enum): + OPEN_TIME = "time" + + POSTS_CREATED = "posts_created" + POSTS_ACTIVE = "posts_active" + INTERACTIONS = "interactions" + CONTRIBUTORS_CREATED = "contributors_created" + CONTRIBUTORS_ACTIVE = "contributors_active" + SENTIMENT = "sentiment" + SPAM = "spam" + + PRICE_OPEN = "open" + PRICE_CLOSE = "close" + PRICE_HIGH = "high" + PRICE_LOW = "low" + PRICE_FLOOR = "floor_price" + VOLUME = "volume" + VOLUME_24H = "volume_24h" + MARKET_CAP = "market_cap" + CIRCULATING_SUPPLY = "circulating_supply" + GALAXY_SCORE = "galaxy_score" + + VOLATILITY = "volatility" + ALT_RANK = "alt_rank" + SOCIAL_DOMINANCE = "social_dominance" + + +class LunarCrushTimeSeriesFeatureSource(FeatureSource): + DEFAULT_RETRIES = 3 + RETRY_DELAY = 1.0 + + _BASE_URL = "https://lunarcrush.com/api4/public" + _METRIC = "time-series" + _VERSION = "v1" + + _BUCKETS = { + time_span_ms(hours=1): "hour", + time_span_ms(days=1): "day", + } + + # Default is using last sample, so on only include other types + _METRIC_COMPACTIONS = { + LunarCrushMetric.POSTS_CREATED: FeatureCompaction.SUM, + LunarCrushMetric.POSTS_ACTIVE: FeatureCompaction.SUM, + LunarCrushMetric.INTERACTIONS: FeatureCompaction.SUM, + LunarCrushMetric.CONTRIBUTORS_CREATED: FeatureCompaction.SUM, + LunarCrushMetric.CONTRIBUTORS_ACTIVE: FeatureCompaction.SUM, + LunarCrushMetric.SPAM: FeatureCompaction.SUM, + LunarCrushMetric.PRICE_OPEN: FeatureCompaction.FIRST, + LunarCrushMetric.PRICE_HIGH: FeatureCompaction.MAX, + LunarCrushMetric.PRICE_LOW: FeatureCompaction.MIN, + LunarCrushMetric.VOLUME: FeatureCompaction.SUM, + LunarCrushMetric.VOLUME_24H: FeatureCompaction.SUM, + } + + def __init__( + self, + kind: str, + selector: str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, LunarCrushMetric], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + api_key: str = None, + retries: int = DEFAULT_RETRIES, + ): + bucket = self._BUCKETS.get(source_interval_ms) + if bucket is None: + raise ValueError(f"interval_ms {source_interval_ms} is not supported.") + + feature_ids = list(feature_mappings.keys()) + self.VALID_FEATURE_IDS = feature_ids + super().__init__(feature_ids, feature_dtypes, default_dtype) + + if api_key is None: + api_key = os.environ.get("LC_API_KEY") + + headers = {"Authorization": f"Bearer {api_key}"} + + kind = urllib.parse.quote(kind, safe="") + selector = urllib.parse.quote(selector, safe="") + + self._source_interval_ms = source_interval_ms + self._bucket = bucket + self._retries = retries + self._metrics = list(feature_mappings.values()) + self._convert_metrics = [LunarCrushMetric.OPEN_TIME, *self._metrics] + self._url = f"{self._BASE_URL}/{kind}/{selector}/{self._METRIC}/{self._VERSION}" + self._headers = headers + self._logger = getLogger(self.__class__.__name__) + + # noinspection PyMethodMayBeStatic + def _convert_metric(self, metric: str, value): + match metric: + case LunarCrushMetric.OPEN_TIME: + value *= time_span_ms(seconds=1) + case _: + if value is None: + value = 0 + else: + value = float(value) + return value + + def _convert_sample(self, sample: dict) -> None: + for metric in self._convert_metrics: + sample_value = sample.get(metric, 0) + sample[metric.value] = self._convert_metric(metric, sample_value) + + def _convert_samples(self, data_rows: list[dict]) -> None: + for row in data_rows: + self._convert_sample(row) + + def _compact_samples(self, samples: list[dict]) -> dict: + result = samples[-1].copy() + for metric in self._metrics: + compaction = self._METRIC_COMPACTIONS.get(metric, FeatureCompaction.LAST) + if compaction == FeatureCompaction.LAST: + continue + elif compaction == FeatureCompaction.FIRST: + result[metric] = samples[0][metric] + else: + values = [sample[metric] for sample in samples] + match compaction: + case FeatureCompaction.MIN: + metric_result = min(values) + case FeatureCompaction.MAX: + metric_result = max(values) + case FeatureCompaction.MEAN: + metric_result = statistics.mean(values) + case FeatureCompaction.MEDIAN: + metric_result = statistics.median(values) + case FeatureCompaction.MODE: + metric_result = statistics.mode(values) + case _: + metric_result = math.fsum(values) + result[metric] = metric_result + return result + + def get_feature_samples( + self, + start_time_ms: int, + interval_ms: int, + sample_count: int, + ) -> dict[FeatureID, ndarray]: + _OPEN_TIME = LunarCrushMetric.OPEN_TIME + + # LunarCrush uses open time for queries + open_start_time_ms = start_time_ms - interval_ms + + # Align on interval so queries for 1 sample include at least 1 sample + open_start_time_ms = current_interval_ms( + open_start_time_ms, self._source_interval_ms + ) + + open_start_time = int(open_start_time_ms / time_span_ms(seconds=1)) + end_time_ms = start_time_ms + (interval_ms * (sample_count - 2)) + end_time = int(end_time_ms / time_span_ms(seconds=1)) + + # Ensure that the end time is not the same as the start time + if end_time == open_start_time: + end_time += 1 + + query_parameters = { + "start": open_start_time, + "end": end_time, + "bucket": self._bucket, + } + url = self._url + "?" + urllib.parse.urlencode(query_parameters) + + data_rows = [] + retries = self._retries + + success = False + # Loop for retries + while True: + try: + response = requests.get(url, headers=self._headers) + + if response.status_code >= HTTPStatus.BAD_REQUEST: + try: + error_response = response.json() + error_message = error_response.get("error") + lunarcrush_error = f", LunarCrush error: {error_message}" + except JSONDecodeError: + lunarcrush_error = "" + self._logger.error( + f"HTTP error {response.status_code}: {response.reason}" + f"{lunarcrush_error}", + ) + else: + response_rows = response.json() + data_rows = response_rows["data"] + success = True + + except Exception as e: + self._logger.warning( + "Exception occurred requesting feature samples using " f"{url}: {e}" + ) + + if success or (retries == 0): + break + + retries -= 1 + time.sleep(self.RETRY_DELAY) + + row_count = len(data_rows) + if row_count == 0: + raise RuntimeError("No samples received.") + + self._convert_samples(data_rows) + feature_samples = self._create_feature_samples(sample_count) + + sample_time_ms = start_time_ms + interval_rows = [] + row_index = 0 + last_row_index = row_count - 1 + compact_samples = self._compact_samples + for sample_index in range(sample_count): + while True: + row = data_rows[row_index] + row_time_ms = row[_OPEN_TIME] + self._source_interval_ms + if row_time_ms > sample_time_ms: + break + interval_rows.append(row) + if row_index == last_row_index: + break + row_index += 1 + + interval_row_count = len(interval_rows) + if interval_row_count == 1: + row = interval_rows[0] + elif interval_row_count > 1: + row = compact_samples(interval_rows) + + for feature_index, metric in enumerate(self._metrics): + feature_samples[feature_index][sample_index] = row[metric] + + interval_rows.clear() + sample_time_ms += interval_ms + + results = { + self.feature_ids[feature_index]: feature_samples[feature_index] + for feature_index in range(self.feature_count) + } + + self._check_feature_samples(results, start_time_ms, interval_ms) + + return results + + +class LunarCrushTimeSeriesTopic(LunarCrushTimeSeriesFeatureSource): + SOURCE_NAME = "LunarCrushTimeSeriesTopic" + + def __init__( + self, + topic: str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, LunarCrushMetric], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + api_key: str = None, + retries: int = LunarCrushTimeSeriesFeatureSource.DEFAULT_RETRIES, + ): + super().__init__( + "topic", + topic, + source_interval_ms, + feature_mappings, + feature_dtypes, + default_dtype, + api_key, + retries, + ) + + +class LunarCrushTimeSeriesCategory(LunarCrushTimeSeriesFeatureSource): + SOURCE_NAME = "LunarCrushTimeSeriesCategory" + + def __init__( + self, + category: str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, LunarCrushMetric], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + api_key: str = None, + retries: int = LunarCrushTimeSeriesFeatureSource.DEFAULT_RETRIES, + ): + super().__init__( + "category", + category, + source_interval_ms, + feature_mappings, + feature_dtypes, + default_dtype, + api_key, + retries, + ) + + +class LunarCrushTimeSeriesCoin(LunarCrushTimeSeriesFeatureSource): + SOURCE_NAME = "LunarCrushTimeSeriesCoin" + + _VERSION = "v2" + + def __init__( + self, + coin: int | str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, LunarCrushMetric], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + api_key: str = None, + retries: int = LunarCrushTimeSeriesFeatureSource.DEFAULT_RETRIES, + ): + super().__init__( + "coins", + str(coin), + source_interval_ms, + feature_mappings, + feature_dtypes, + default_dtype, + api_key, + retries, + ) + + +class LunarCrushTimeSeriesStock(LunarCrushTimeSeriesFeatureSource): + SOURCE_NAME = "LunarCrushTimeSeriesStock" + + _VERSION = "v2" + + def __init__( + self, + stock: int | str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, LunarCrushMetric], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + api_key: str = None, + retries: int = LunarCrushTimeSeriesFeatureSource.DEFAULT_RETRIES, + ): + super().__init__( + "stocks", + str(stock), + source_interval_ms, + feature_mappings, + feature_dtypes, + default_dtype, + api_key, + retries, + ) diff --git a/feature_sources/yahoo_finance_kline_feature_source.py b/feature_sources/yahoo_finance_kline_feature_source.py new file mode 100644 index 0000000..7fe2402 --- /dev/null +++ b/feature_sources/yahoo_finance_kline_feature_source.py @@ -0,0 +1,183 @@ +# developer: taoshi-mbrown +# Copyright © 2024 Taoshi Inc +from enum import Enum +from features import FeatureCompaction, FeatureID, FeatureSource +import math +import numpy as np +from numpy import ndarray +import statistics +from time_util import current_interval_ms, datetime, time_span_ms +from yfinance import Ticker + + +class YahooFinanceKlineField(str, Enum): + OPEN_TIME = "Date" + + PRICE_OPEN = "Open" + PRICE_CLOSE = "Close" + PRICE_HIGH = "High" + PRICE_LOW = "Low" + VOLUME = "Volume" + DIVIDENDS = "Dividends" + SPLITS = "Stock Splits" + + +class YahooFinanceKlineFeatureSource(FeatureSource): + SOURCE_NAME = "YahooFinanceKline" + + _INTERVALS = { + time_span_ms(minutes=1): "1m", + time_span_ms(minutes=2): "2m", + time_span_ms(minutes=5): "5m", + time_span_ms(minutes=15): "15m", + time_span_ms(minutes=30): "30m", + time_span_ms(hours=1): "60m", + time_span_ms(hours=3): "90m", + time_span_ms(days=1): "1d", + time_span_ms(days=3): "5d", + time_span_ms(weeks=1): "1wk", + time_span_ms(days=30): "1mo", + time_span_ms(days=90): "3mo", + } + + # Default is using last sample, so on only include other types + _METRIC_COMPACTIONS = { + YahooFinanceKlineField.PRICE_OPEN: FeatureCompaction.FIRST, + YahooFinanceKlineField.PRICE_HIGH: FeatureCompaction.MAX, + YahooFinanceKlineField.PRICE_LOW: FeatureCompaction.MIN, + YahooFinanceKlineField.VOLUME: FeatureCompaction.SUM, + YahooFinanceKlineField.DIVIDENDS: FeatureCompaction.SUM, + YahooFinanceKlineField.SPLITS: FeatureCompaction.SUM, + } + + def __init__( + self, + ticker: str, + source_interval_ms: int, + feature_mappings: dict[FeatureID, YahooFinanceKlineField], + feature_dtypes: list[np.dtype] = None, + default_dtype: np.dtype = np.dtype(np.float32), + ): + query_interval = self._INTERVALS.get(source_interval_ms) + if query_interval is None: + raise ValueError(f"interval_ms {source_interval_ms} is not supported.") + + feature_ids = list(feature_mappings.keys()) + self.VALID_FEATURE_IDS = feature_ids + super().__init__(feature_ids, feature_dtypes, default_dtype) + + self._source_interval_ms = source_interval_ms + self._query_interval = query_interval + self._metrics = list(feature_mappings.values()) + self._convert_metrics = [YahooFinanceKlineField.OPEN_TIME, *self._metrics] + self._client = Ticker(ticker) + + # noinspection PyMethodMayBeStatic + def _convert_samples(self, data_rows: list[dict]) -> None: + _OPEN_TIME = YahooFinanceKlineField.OPEN_TIME + _SEC_TO_MS = time_span_ms(seconds=1) + for row in data_rows: + open_time = row[_OPEN_TIME] + row[_OPEN_TIME] = int(open_time.timestamp() * _SEC_TO_MS) + + def _compact_samples(self, samples: list[dict]) -> dict: + result = samples[-1].copy() + for metric in self._metrics: + compaction = self._METRIC_COMPACTIONS.get(metric, FeatureCompaction.LAST) + if compaction == FeatureCompaction.LAST: + continue + elif compaction == FeatureCompaction.FIRST: + result[metric] = samples[0][metric] + else: + values = [sample[metric] for sample in samples] + match compaction: + case FeatureCompaction.MIN: + metric_result = min(values) + case FeatureCompaction.MAX: + metric_result = max(values) + case FeatureCompaction.MEAN: + metric_result = statistics.mean(values) + case FeatureCompaction.MEDIAN: + metric_result = statistics.median(values) + case FeatureCompaction.MODE: + metric_result = statistics.mode(values) + case _: + metric_result = math.fsum(values) + result[metric] = metric_result + return result + + def get_feature_samples( + self, + start_time_ms: int, + interval_ms: int, + sample_count: int, + ) -> dict[FeatureID, ndarray]: + _OPEN_TIME = YahooFinanceKlineField.OPEN_TIME + + # Yahoo Finance uses open time for queries + query_start_time_ms = start_time_ms - interval_ms + + # Align on interval so queries for 1 sample include at least 1 sample + query_start_time_ms = current_interval_ms( + query_start_time_ms, self._source_interval_ms + ) + + end_time_ms = start_time_ms + (interval_ms * (sample_count - 2)) + + start_time = datetime.fromtimestamp_ms(query_start_time_ms) + end_time = datetime.fromtimestamp_ms(end_time_ms) + + data_frame = self._client.history( + interval=self._query_interval, + start=start_time, + end=end_time, + timeout=None, + raise_errors=True, + ) + # Ensure the index is accessible as a time field + data_frame[_OPEN_TIME.value] = data_frame.index + data_rows = data_frame.to_dict(orient="records") + + row_count = len(data_rows) + if row_count == 0: + raise RuntimeError("No samples received.") + + self._convert_samples(data_rows) + feature_samples = self._create_feature_samples(sample_count) + + sample_time_ms = start_time_ms + interval_rows = [] + row_index = 0 + last_row_index = row_count - 1 + compact_samples = self._compact_samples + for sample_index in range(sample_count): + while True: + row = data_rows[row_index] + row_time_ms = row[_OPEN_TIME] + self._source_interval_ms + if row_time_ms > sample_time_ms: + break + interval_rows.append(row) + if row_index == last_row_index: + break + row_index += 1 + + interval_row_count = len(interval_rows) + if interval_row_count == 1: + row = interval_rows[0] + elif interval_row_count > 1: + row = compact_samples(interval_rows) + + for feature_index, metric in enumerate(self._metrics): + feature_samples[feature_index][sample_index] = row[metric] + + interval_rows.clear() + sample_time_ms += interval_ms + + results = { + self.feature_ids[feature_index]: feature_samples[feature_index] + for feature_index in range(self.feature_count) + } + + self._check_feature_samples(results, start_time_ms, interval_ms) + + return results diff --git a/features/feature_id.py b/features/feature_id.py index 67df002..47abf1e 100644 --- a/features/feature_id.py +++ b/features/feature_id.py @@ -13,9 +13,18 @@ class FeatureID(IntEnum): TIME_OF_MONTH = 102 TIME_OF_YEAR = 103 + CRYPTO_SOCIAL_POSTS_CREATED = 20000 + CRYPTO_SOCIAL_POSTS_ACTIVE = 20001 + CRYPTO_SOCIAL_INTERACTIONS = 20002 + CRYPTO_SOCIAL_CONTRIBUTORS_CREATED = 20003 + CRYPTO_SOCIAL_CONTRIBUTORS_ACTIVE = 20004 + CRYPTO_SOCIAL_SENTIMENT = 20005 + CRYPTO_SOCIAL_SPAM = 20006 + BTC_HASH_RATE = 100000 - BTC_MARKET_CAP_USD = 100010 + BTC_USD_MARKET_CAP = 100010 + BTC_CIRCULATING_SUPPLY = 100011 BTC_ADDR_COUNT_100K_USD = 100105 BTC_ADDR_COUNT_1M_USD = 100106 @@ -25,6 +34,9 @@ class FeatureID(IntEnum): BTC_MCRC = 100201 BTC_MOMR = 100202 + BTC_GALAXY_SCORE = 100210 + BTC_ALT_RANK = 100211 + BTC_USD_OPEN = 101000 BTC_USD_CLOSE = 101001 BTC_USD_HIGH = 101002 @@ -47,3 +59,30 @@ class FeatureID(IntEnum): BTC_USD_FUTURES_LIQUIDATIONS_BUY_USD = 102201 BTC_USD_FUTURES_LIQUIDATIONS_SELL = 102202 BTC_USD_FUTURES_LIQUIDATIONS_SELL_USD = 102203 + + BTC_SOCIAL_POSTS_CREATED = 151000 + BTC_SOCIAL_POSTS_ACTIVE = 151001 + BTC_SOCIAL_INTERACTIONS = 151002 + BTC_SOCIAL_CONTRIBUTORS_CREATED = 151003 + BTC_SOCIAL_CONTRIBUTORS_ACTIVE = 151004 + BTC_SOCIAL_SENTIMENT = 151005 + BTC_SOCIAL_SPAM = 151006 + BTC_SOCIAL_DOMINANCE = 151007 + + SPX_USD_OPEN = 201000 + SPX_USD_CLOSE = 201001 + SPX_USD_HIGH = 201002 + SPX_USD_LOW = 201003 + SPX_USD_VOLUME = 201004 + + EUR_USD_OPEN = 301000 + EUR_USD_CLOSE = 301001 + EUR_USD_HIGH = 301002 + EUR_USD_LOW = 301003 + EUR_USD_VOLUME = 301004 + + NVDA_USD_OPEN = 701000 + NVDA_USD_CLOSE = 701001 + NVDA_USD_HIGH = 701002 + NVDA_USD_LOW = 701003 + NVDA_USD_VOLUME = 701004 diff --git a/requirements.txt b/requirements.txt index ec8ca1c..cda7877 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ requests>=2.31.0 scikit-learn>=1.4.1.post1 scipy>=1.12.0 tensorflow==2.13.1 +yfinance==0.2.37 diff --git a/streams/btcusd_5m/__init__.py b/streams/btcusd_5m/__init__.py index ef80b9b..54955c1 100644 --- a/streams/btcusd_5m/__init__.py +++ b/streams/btcusd_5m/__init__.py @@ -126,7 +126,7 @@ FeatureID.BTC_MCTC: CoinMetric.MCTC, FeatureID.BTC_MCRC: CoinMetric.MCRC, FeatureID.BTC_MOMR: CoinMetric.MOMR, - FeatureID.BTC_MARKET_CAP_USD: CoinMetric.MARKET_CAP_USD, + FeatureID.BTC_USD_MARKET_CAP: CoinMetric.MARKET_CAP_USD, }, ) diff --git a/tests/feature_tests/test_coin_metrics_sources.py b/tests/feature_tests/test_coin_metrics_sources.py new file mode 100644 index 0000000..0d769c0 --- /dev/null +++ b/tests/feature_tests/test_coin_metrics_sources.py @@ -0,0 +1,374 @@ +# developer: taoshi-mbrown +# Copyright © 2024 Taoshi Inc +from features import FeatureID +from feature_sources import ( + CoinMetric, + CoinMetricsAssetMetrics, + CoinMetricsExchangeMetrics, + CoinMetricsExchangeAssetMetrics, + CoinMetricsMarketMetrics, + CoinMetricsPairMetrics, + CoinMetricsPairCandles, + CoinMetricsInstitutionMetrics, + CoinMetricsMarketTrades, + CoinMetricsMarketOpenInterest, + CoinMetricsMarketLiquidations, + CoinMetricsMarketFundingRates, + CoinMetricsMarketQuotes, + CoinMetricsMarketCandles, + CoinMetricsMarketContractPrices, + CoinMetricsMarketImpliedVolatility, + CoinMetricsMarketGreeks, + CoinMetricsIndexCandles, + CoinMetricsIndexLevels, +) +from time_util import datetime, time_span_ms +import unittest + + +class TestCoinMetricsFeatureSources(unittest.TestCase): + def test_asset_metrics_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(minutes=10) + _SAMPLE_COUNT = 2000 + + test_source = CoinMetricsAssetMetrics( + kind="btc", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_VOLATILITY: CoinMetric.VOLATILITY_REALIZED_USD_ROLLING_24H, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + 0: { + FeatureID.BTC_USD_VOLATILITY: 0.1137799, + }, + 1249: { + FeatureID.BTC_USD_VOLATILITY: 0.2130412, + }, + -1: { + FeatureID.BTC_USD_VOLATILITY: 0.9794625, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=5, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=5, + msg=f"index: {index} feature_id: {feature_id}", + ) + + def test_exchange_metrics_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 2000 + + test_source = CoinMetricsExchangeMetrics( + kind="binance", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_BUY: CoinMetric.LIQUIDATIONS_BUY_UNITS_5M, + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_SELL: CoinMetric.LIQUIDATIONS_SELL_UNITS_5M, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + 0: { + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_BUY: 0.0, + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_SELL: 443.527, + }, + 1249: { + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_BUY: 347535.1, + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_SELL: 1894658.0, + }, + -1: { + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_BUY: 7858.7, + FeatureID.BTC_USD_FUTURES_LIQUIDATIONS_SELL: 1034368.94, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=1, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + def test_market_metrics_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(minutes=1) + _SAMPLE_COUNT = 2000 + + test_source = CoinMetricsMarketMetrics( + kind="binance-btc-usdt-spot", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_SPREAD: CoinMetric.LIQUIDITY_BID_ASK_SPREAD_PERCENT_1M, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + 0: { + FeatureID.BTC_USD_SPREAD: 0.0034465278, + }, + 1249: { + FeatureID.BTC_USD_SPREAD: 0.0024788794, + }, + -1: { + FeatureID.BTC_USD_SPREAD: 0.001673903, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=4, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + def test_market_open_interest_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(minutes=1) + _SAMPLE_COUNT = 2000 + + test_source = CoinMetricsMarketOpenInterest( + kind="binance-BTCUSD_PERP-future", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_FUTURES_OPEN_CONTRACTS: CoinMetric.CONTRACT_COUNT, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + 0: { + FeatureID.BTC_USD_FUTURES_OPEN_CONTRACTS: 3390107.0, + }, + 1249: { + FeatureID.BTC_USD_FUTURES_OPEN_CONTRACTS: 3425057.0, + }, + -1: { + FeatureID.BTC_USD_FUTURES_OPEN_CONTRACTS: 3468191.0, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + def test_market_funding_rates_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 01:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 2000 + + test_source = CoinMetricsMarketFundingRates( + kind="binance-BTCUSD_PERP-future", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_FUTURES_FUNDING_RATE: CoinMetric.RATE, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + 0: { + FeatureID.BTC_USD_FUTURES_FUNDING_RATE: -1.609e-05, + }, + 1249: { + FeatureID.BTC_USD_FUTURES_FUNDING_RATE: 1e-04, + }, + -1: { + FeatureID.BTC_USD_FUTURES_FUNDING_RATE: -7.29e-05, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + def test_market_candles_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(minutes=5) + _SAMPLE_COUNT = 7500 + + test_source = CoinMetricsMarketCandles( + kind="binance-btc-usdt-spot", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_CLOSE: CoinMetric.PRICE_CLOSE, + FeatureID.BTC_USD_HIGH: CoinMetric.PRICE_HIGH, + FeatureID.BTC_USD_LOW: CoinMetric.PRICE_LOW, + FeatureID.BTC_USD_VOLUME: CoinMetric.VOLUME, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + # Expected values are the same as those used in + # TestKlineFeatureSource.test_binance_kline_feature_source + expected_values = { + 0: { + FeatureID.BTC_USD_CLOSE: 16542.40000000, + FeatureID.BTC_USD_HIGH: 16544.47000000, + FeatureID.BTC_USD_LOW: 16535.05000000, + FeatureID.BTC_USD_VOLUME: 227.06684000, + }, + 6249: { + FeatureID.BTC_USD_CLOSE: 22831.88000000, + FeatureID.BTC_USD_HIGH: 22831.88000000, + FeatureID.BTC_USD_LOW: 22797.00000000, + FeatureID.BTC_USD_VOLUME: 665.50900000, + }, + -1: { + FeatureID.BTC_USD_CLOSE: 22913.75000000, + FeatureID.BTC_USD_HIGH: 22982.91000000, + FeatureID.BTC_USD_LOW: 22897.02000000, + FeatureID.BTC_USD_VOLUME: 1445.37762000, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/feature_tests/test_feature_aggregator.py b/tests/feature_tests/test_feature_aggregator.py index 316e16c..9f19b0f 100644 --- a/tests/feature_tests/test_feature_aggregator.py +++ b/tests/feature_tests/test_feature_aggregator.py @@ -18,8 +18,7 @@ class TestFeatureAggregator(unittest.TestCase): def test_binance_bybit_coinbase_kline_feature_aggregator(self): _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() _INTERVAL_MS = time_span_ms(minutes=5) - _SAMPLE_COUNT = 2500 - _ITERATIONS = 3 + _SAMPLE_COUNT = 7500 binance_source = BinanceKlineFeatureSource( symbol="BTCUSDT", @@ -65,17 +64,9 @@ def test_binance_bybit_coinbase_kline_feature_aggregator(self): }, ) - test_feature_samples = { - feature_id: [] for feature_id in test_aggregator.feature_ids - } - start_time_ms = _START_TIME_MS - for i in range(_ITERATIONS): - feature_samples = test_aggregator.get_feature_samples( - start_time_ms, _INTERVAL_MS, _SAMPLE_COUNT - ) - for feature_id, samples in feature_samples.items(): - test_feature_samples[feature_id].extend(samples) - start_time_ms += _INTERVAL_MS * _SAMPLE_COUNT + test_feature_samples = test_aggregator.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) expected_values = { # Open time: 1672530900000 @@ -103,7 +94,7 @@ def test_binance_bybit_coinbase_kline_feature_aggregator(self): for index, samples in expected_values.items(): for feature_id, expected_value in samples.items(): - test_value = test_feature_samples[feature_id][index] + test_value = float(test_feature_samples[feature_id][index]) self.assertAlmostEqual( test_value, expected_value, diff --git a/tests/feature_tests/test_kline_feature_sources.py b/tests/feature_tests/test_kline_feature_sources.py index cd7c1e2..525f0ac 100644 --- a/tests/feature_tests/test_kline_feature_sources.py +++ b/tests/feature_tests/test_kline_feature_sources.py @@ -19,8 +19,7 @@ class TestKlineFeatureSource(unittest.TestCase): def test_binance_kline_feature_source(self): _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() _INTERVAL_MS = time_span_ms(minutes=5) - _SAMPLE_COUNT = 2500 - _ITERATIONS = 3 + _SAMPLE_COUNT = 7500 test_source = BinanceKlineFeatureSource( symbol="BTCUSDT", @@ -33,17 +32,9 @@ def test_binance_kline_feature_source(self): }, ) - test_feature_samples = { - feature_id: [] for feature_id in test_source.feature_ids - } - start_time_ms = _START_TIME_MS - for i in range(_ITERATIONS): - feature_samples = test_source.get_feature_samples( - start_time_ms, _INTERVAL_MS, _SAMPLE_COUNT - ) - for feature_id, samples in feature_samples.items(): - test_feature_samples[feature_id].extend(samples) - start_time_ms += _INTERVAL_MS * _SAMPLE_COUNT + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) expected_values = { # Open time: 1672530900000 @@ -71,18 +62,33 @@ def test_binance_kline_feature_source(self): for index, samples in expected_values.items(): for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) self.assertAlmostEqual( - test_feature_samples[feature_id][index], + test_value, expected_value, places=2, msg=f"index: {index} feature_id: {feature_id}", ) + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + def test_bybit_kline_feature_source(self): _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() _INTERVAL_MS = time_span_ms(minutes=5) - _SAMPLE_COUNT = 2500 - _ITERATIONS = 3 + _SAMPLE_COUNT = 7500 test_source = BybitKlineFeatureSource( category="spot", @@ -96,17 +102,9 @@ def test_bybit_kline_feature_source(self): }, ) - test_feature_samples = { - feature_id: [] for feature_id in test_source.feature_ids - } - start_time_ms = _START_TIME_MS - for i in range(_ITERATIONS): - feature_samples = test_source.get_feature_samples( - start_time_ms, _INTERVAL_MS, _SAMPLE_COUNT - ) - for feature_id, samples in feature_samples.items(): - test_feature_samples[feature_id].extend(samples) - start_time_ms += _INTERVAL_MS * _SAMPLE_COUNT + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) expected_values = { # Open time: 1672530900000 @@ -134,18 +132,33 @@ def test_bybit_kline_feature_source(self): for index, samples in expected_values.items(): for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) self.assertAlmostEqual( - test_feature_samples[feature_id][index], + test_value, expected_value, places=2, msg=f"index: {index} feature_id: {feature_id}", ) + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + def test_coinbase_kline_feature_source(self): _START_TIME_MS = datetime.parse("2023-01-01 00:00:00").timestamp_ms() _INTERVAL_MS = time_span_ms(minutes=5) - _SAMPLE_COUNT = 2500 - _ITERATIONS = 3 + _SAMPLE_COUNT = 7500 test_source = CoinbaseKlineFeatureSource( symbol="BTC-USD", @@ -158,17 +171,9 @@ def test_coinbase_kline_feature_source(self): }, ) - test_feature_samples = { - feature_id: [] for feature_id in test_source.feature_ids - } - start_time_ms = _START_TIME_MS - for i in range(_ITERATIONS): - feature_samples = test_source.get_feature_samples( - start_time_ms, _INTERVAL_MS, _SAMPLE_COUNT - ) - for feature_id, samples in feature_samples.items(): - test_feature_samples[feature_id].extend(samples) - start_time_ms += _INTERVAL_MS * _SAMPLE_COUNT + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) expected_values = { # Open time: 1672530900 (seconds) @@ -196,13 +201,29 @@ def test_coinbase_kline_feature_source(self): for index, samples in expected_values.items(): for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) self.assertAlmostEqual( - test_feature_samples[feature_id][index], + test_value, expected_value, places=2, msg=f"index: {index} feature_id: {feature_id}", ) + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertAlmostEqual( + test_value, + expected_value, + places=2, + msg=f"index: {index} feature_id: {feature_id}", + ) + def test_kraken_kline_feature_source(self): _INTERVAL_MS = time_span_ms(minutes=5) _SKIP_COUNT = 10 @@ -210,8 +231,8 @@ def test_kraken_kline_feature_source(self): _START_TIME_MS = previous_interval_ms( datetime.now().timestamp_ms(), _INTERVAL_MS ) - ((_SAMPLE_COUNT + _SKIP_COUNT) * _INTERVAL_MS) - _BTC_USD_LOW_MIN = 30000 - _BTC_USD_HIGH_MAX = 70000 + _BTC_USD_LOW_MIN = 40000 + _BTC_USD_HIGH_MAX = 120000 test_source = KrakenKlineFeatureSource( symbol="XXBTZUSD", @@ -246,6 +267,24 @@ def test_kraken_kline_feature_source(self): assert low > _BTC_USD_LOW_MIN assert volume != last_volume + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + i = 0 + close = test_feature_samples[FeatureID.BTC_USD_CLOSE][i] + high = test_feature_samples[FeatureID.BTC_USD_HIGH][i] + low = test_feature_samples[FeatureID.BTC_USD_LOW][i] + volume = test_feature_samples[FeatureID.BTC_USD_VOLUME][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _BTC_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _BTC_USD_LOW_MIN + assert volume != last_volume + if __name__ == "__main__": unittest.main() diff --git a/tests/feature_tests/test_lunarcrush_sources.py b/tests/feature_tests/test_lunarcrush_sources.py new file mode 100644 index 0000000..5d8330a --- /dev/null +++ b/tests/feature_tests/test_lunarcrush_sources.py @@ -0,0 +1,404 @@ +# developer: taoshi-mbrown +# Copyright © 2024 Taoshi Inc +from features import FeatureID +from feature_sources import ( + LunarCrushMetric, + LunarCrushTimeSeriesCategory, + LunarCrushTimeSeriesCoin, + LunarCrushTimeSeriesStock, + LunarCrushTimeSeriesTopic, +) +import math +from time_util import datetime, time_span_ms +import unittest + + +class TestLunarCrushFeatureSources(unittest.TestCase): + def test_lunarcrush_category_feature_source(self): + _START_TIME_MS = datetime.parse("2024-02-01 01:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 500 + + test_source = LunarCrushTimeSeriesCategory( + category="cryptocurrencies", + source_interval_ms=time_span_ms(hours=1), + feature_mappings={ + FeatureID.CRYPTO_SOCIAL_POSTS_CREATED: LunarCrushMetric.POSTS_CREATED, + FeatureID.CRYPTO_SOCIAL_POSTS_ACTIVE: LunarCrushMetric.POSTS_ACTIVE, + FeatureID.CRYPTO_SOCIAL_INTERACTIONS: LunarCrushMetric.INTERACTIONS, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_CREATED: LunarCrushMetric.CONTRIBUTORS_CREATED, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_ACTIVE: LunarCrushMetric.CONTRIBUTORS_ACTIVE, + FeatureID.CRYPTO_SOCIAL_SENTIMENT: LunarCrushMetric.SENTIMENT, + FeatureID.CRYPTO_SOCIAL_SPAM: LunarCrushMetric.SPAM, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + # start: 1706745600 + 0: { + FeatureID.CRYPTO_SOCIAL_POSTS_CREATED: 7508, + FeatureID.CRYPTO_SOCIAL_POSTS_ACTIVE: 7631, + FeatureID.CRYPTO_SOCIAL_INTERACTIONS: 300300, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_CREATED: 5339, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_ACTIVE: 10441, + FeatureID.CRYPTO_SOCIAL_SENTIMENT: 79, + FeatureID.CRYPTO_SOCIAL_SPAM: 900, + }, + # start: 1707656400 + 253: { + FeatureID.CRYPTO_SOCIAL_POSTS_CREATED: 7098, + FeatureID.CRYPTO_SOCIAL_POSTS_ACTIVE: 150224, + FeatureID.CRYPTO_SOCIAL_INTERACTIONS: 28660679, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_CREATED: 4645, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_ACTIVE: 66932, + FeatureID.CRYPTO_SOCIAL_SENTIMENT: 83, + FeatureID.CRYPTO_SOCIAL_SPAM: 0, + }, + # start: 1708542000 + -1: { + FeatureID.CRYPTO_SOCIAL_POSTS_CREATED: 6947, + FeatureID.CRYPTO_SOCIAL_POSTS_ACTIVE: 135999, + FeatureID.CRYPTO_SOCIAL_INTERACTIONS: 19205146, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_CREATED: 5023, + FeatureID.CRYPTO_SOCIAL_CONTRIBUTORS_ACTIVE: 61985, + FeatureID.CRYPTO_SOCIAL_SENTIMENT: 83, + FeatureID.CRYPTO_SOCIAL_SPAM: 1389, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = test_feature_samples[feature_id][index] + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + def test_lunarcrush_coin_feature_source(self): + _START_TIME_MS = datetime.parse("2024-02-01 01:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 500 + + test_source = LunarCrushTimeSeriesCoin( + coin="bitcoin", + source_interval_ms=time_span_ms(hours=1), + feature_mappings={ + FeatureID.BTC_USD_OPEN: LunarCrushMetric.PRICE_OPEN, + FeatureID.BTC_USD_CLOSE: LunarCrushMetric.PRICE_CLOSE, + FeatureID.BTC_USD_HIGH: LunarCrushMetric.PRICE_HIGH, + FeatureID.BTC_USD_LOW: LunarCrushMetric.PRICE_LOW, + FeatureID.BTC_USD_VOLUME: LunarCrushMetric.VOLUME_24H, + FeatureID.BTC_USD_MARKET_CAP: LunarCrushMetric.MARKET_CAP, + FeatureID.BTC_CIRCULATING_SUPPLY: LunarCrushMetric.CIRCULATING_SUPPLY, + FeatureID.BTC_SOCIAL_SENTIMENT: LunarCrushMetric.SENTIMENT, + FeatureID.BTC_GALAXY_SCORE: LunarCrushMetric.GALAXY_SCORE, + FeatureID.BTC_USD_VOLATILITY: LunarCrushMetric.VOLATILITY, + FeatureID.BTC_ALT_RANK: LunarCrushMetric.ALT_RANK, + FeatureID.BTC_SOCIAL_POSTS_CREATED: LunarCrushMetric.POSTS_CREATED, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: LunarCrushMetric.POSTS_ACTIVE, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: LunarCrushMetric.CONTRIBUTORS_CREATED, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: LunarCrushMetric.CONTRIBUTORS_ACTIVE, + FeatureID.BTC_SOCIAL_INTERACTIONS: LunarCrushMetric.INTERACTIONS, + FeatureID.BTC_SOCIAL_DOMINANCE: LunarCrushMetric.SOCIAL_DOMINANCE, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + # start: 1706745600 + 0: { + FeatureID.BTC_USD_OPEN: 42569.76139843987, + FeatureID.BTC_USD_CLOSE: 42421.03004299571, + FeatureID.BTC_USD_HIGH: 42657.611350802246, + FeatureID.BTC_USD_LOW: 42222.44063408701, + FeatureID.BTC_USD_VOLUME: 24549499233.53, + FeatureID.BTC_USD_MARKET_CAP: 832064112201.09, + FeatureID.BTC_CIRCULATING_SUPPLY: 19614425, + FeatureID.BTC_SOCIAL_SENTIMENT: 78, + FeatureID.BTC_GALAXY_SCORE: 75, + FeatureID.BTC_USD_VOLATILITY: 0.008550716661002312, + FeatureID.BTC_ALT_RANK: 32, + FeatureID.BTC_SOCIAL_POSTS_CREATED: 1117, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 32130, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 757, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 16399, + FeatureID.BTC_SOCIAL_INTERACTIONS: 4263864, + FeatureID.BTC_SOCIAL_DOMINANCE: 0, + }, + # start: 1707656400 + 253: { + FeatureID.BTC_USD_OPEN: 48321.70719797294, + FeatureID.BTC_USD_CLOSE: 48100.66526716035, + FeatureID.BTC_USD_HIGH: 48371.15928848688, + FeatureID.BTC_USD_LOW: 48065.92147415219, + FeatureID.BTC_USD_VOLUME: 19354861474.99, + FeatureID.BTC_USD_MARKET_CAP: 943966801546.9432, + FeatureID.BTC_CIRCULATING_SUPPLY: 19624818, + FeatureID.BTC_SOCIAL_SENTIMENT: 81, + FeatureID.BTC_GALAXY_SCORE: 70, + FeatureID.BTC_USD_VOLATILITY: 0.0089, + FeatureID.BTC_ALT_RANK: 47, + FeatureID.BTC_SOCIAL_POSTS_CREATED: 1411, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 33865, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 935, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 15689, + FeatureID.BTC_SOCIAL_INTERACTIONS: 8055386, + FeatureID.BTC_SOCIAL_DOMINANCE: 22.54300244967515, + }, + # start: 1708542000 + -1: { + FeatureID.BTC_USD_OPEN: 50864.29735630042, + FeatureID.BTC_USD_CLOSE: 51048.33614009864, + FeatureID.BTC_USD_HIGH: 51048.33614009864, + FeatureID.BTC_USD_LOW: 50664.7403283826, + FeatureID.BTC_USD_VOLUME: 27995847309.48, + FeatureID.BTC_USD_MARKET_CAP: 1002280479357.89, + FeatureID.BTC_CIRCULATING_SUPPLY: 19633950, + FeatureID.BTC_SOCIAL_SENTIMENT: 82, + FeatureID.BTC_GALAXY_SCORE: 70, + FeatureID.BTC_USD_VOLATILITY: 0.0083, + FeatureID.BTC_ALT_RANK: 87, + FeatureID.BTC_SOCIAL_POSTS_CREATED: 1834, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 49629, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 1234, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 23053, + FeatureID.BTC_SOCIAL_INTERACTIONS: 9905471, + FeatureID.BTC_SOCIAL_DOMINANCE: 36.49217987060547, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = test_feature_samples[feature_id][index] + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + def lunarcrush_stock_feature_source(self): + _START_TIME_MS = datetime.parse("2024-02-01 01:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 500 + + test_source = LunarCrushTimeSeriesStock( + stock="nvda", + source_interval_ms=time_span_ms(hours=1), + feature_mappings={ + FeatureID.NVDA_USD_OPEN: LunarCrushMetric.PRICE_OPEN, + FeatureID.NVDA_USD_CLOSE: LunarCrushMetric.PRICE_CLOSE, + FeatureID.NVDA_USD_HIGH: LunarCrushMetric.PRICE_HIGH, + FeatureID.NVDA_USD_LOW: LunarCrushMetric.PRICE_LOW, + FeatureID.NVDA_USD_VOLUME: LunarCrushMetric.VOLUME, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + # start: 1706745600 + 0: { + FeatureID.NVDA_USD_OPEN: 616.93, + FeatureID.NVDA_USD_CLOSE: 618.23, + FeatureID.NVDA_USD_HIGH: 618.25, + FeatureID.NVDA_USD_LOW: 616.65, + FeatureID.NVDA_USD_VOLUME: 49327112.28, + }, + # start: 1707656400 + # (markets are closed on Sunday, so no volume) + 253: { + FeatureID.NVDA_USD_OPEN: 721.31, + FeatureID.NVDA_USD_CLOSE: 721.31, + FeatureID.NVDA_USD_HIGH: 721.31, + FeatureID.NVDA_USD_LOW: 721.31, + FeatureID.NVDA_USD_VOLUME: 0, + }, + # start: 1707742800 + 277: { + FeatureID.NVDA_USD_OPEN: 727, + FeatureID.NVDA_USD_CLOSE: 725.68, + FeatureID.NVDA_USD_HIGH: 728.4799, + FeatureID.NVDA_USD_LOW: 722, + FeatureID.NVDA_USD_VOLUME: 393150789.56, + }, + # start: 1708542000 + -1: { + FeatureID.NVDA_USD_OPEN: 670.46, + FeatureID.NVDA_USD_CLOSE: 667.736, + FeatureID.NVDA_USD_HIGH: 674.95, + FeatureID.NVDA_USD_LOW: 666.74, + FeatureID.NVDA_USD_VOLUME: 4099908587.69, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = test_feature_samples[feature_id][index] + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + def test_lunarcrush_topic_feature_source(self): + _START_TIME_MS = datetime.parse("2023-01-01 01:00:00").timestamp_ms() + _INTERVAL_MS = time_span_ms(hours=1) + _SAMPLE_COUNT = 7500 + + test_source = LunarCrushTimeSeriesTopic( + topic="bitcoin", + source_interval_ms=time_span_ms(hours=1), + feature_mappings={ + FeatureID.BTC_SOCIAL_POSTS_CREATED: LunarCrushMetric.POSTS_CREATED, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: LunarCrushMetric.POSTS_ACTIVE, + FeatureID.BTC_SOCIAL_INTERACTIONS: LunarCrushMetric.INTERACTIONS, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: LunarCrushMetric.CONTRIBUTORS_CREATED, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: LunarCrushMetric.CONTRIBUTORS_ACTIVE, + FeatureID.BTC_SOCIAL_SENTIMENT: LunarCrushMetric.SENTIMENT, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + expected_values = { + # start: 1672531200 + 0: { + FeatureID.BTC_SOCIAL_POSTS_CREATED: 317, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 3957, + FeatureID.BTC_SOCIAL_INTERACTIONS: 278276, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 255, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 2898, + FeatureID.BTC_SOCIAL_SENTIMENT: 80, + }, + # start: 1695027600 + 6249: { + FeatureID.BTC_SOCIAL_POSTS_CREATED: 1216, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 18201, + FeatureID.BTC_SOCIAL_INTERACTIONS: 2200823, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 1005, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 10481, + FeatureID.BTC_SOCIAL_SENTIMENT: 79, + }, + # start: 1699527600 + -1: { + FeatureID.BTC_SOCIAL_POSTS_CREATED: 676, + FeatureID.BTC_SOCIAL_POSTS_ACTIVE: 41184, + FeatureID.BTC_SOCIAL_INTERACTIONS: 4800962, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_CREATED: 556, + FeatureID.BTC_SOCIAL_CONTRIBUTORS_ACTIVE: 16607, + FeatureID.BTC_SOCIAL_SENTIMENT: 80, + }, + } + + for index, samples in expected_values.items(): + for feature_id, expected_value in samples.items(): + test_value = test_feature_samples[feature_id][index] + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, 1 + ) + + index = 0 + samples = expected_values[index] + for feature_id, expected_value in samples.items(): + test_value = float(test_feature_samples[feature_id][index]) + self.assertTrue( + math.isclose( + test_value, + expected_value, + rel_tol=1e-07, + ), + msg=f"index: {index} feature_id: {feature_id.name} " + f"test_value:{test_value} expected_value: {expected_value}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/feature_tests/test_yahoo_finance_source.py b/tests/feature_tests/test_yahoo_finance_source.py new file mode 100644 index 0000000..5adf987 --- /dev/null +++ b/tests/feature_tests/test_yahoo_finance_source.py @@ -0,0 +1,191 @@ +# developer: taoshi-mbrown +# Copyright © 2024 Taoshi Inc +from features import FeatureID +from feature_sources import ( + YahooFinanceKlineField, + YahooFinanceKlineFeatureSource, +) +from time_util import datetime, time_span_ms, previous_interval_ms +import unittest + + +class TestYahooFinanceKlineFeatureSource(unittest.TestCase): + def test_yahoo_finance_feature_source_btc_usd_5m(self): + _INTERVAL_MS = time_span_ms(minutes=5) + _SKIP_COUNT = 10 + _SAMPLE_COUNT = 100 + _START_TIME_MS = previous_interval_ms( + datetime.now().timestamp_ms(), _INTERVAL_MS + ) - ((_SAMPLE_COUNT + _SKIP_COUNT) * _INTERVAL_MS) + _BTC_USD_LOW_MIN = 40000 + _BTC_USD_HIGH_MAX = 120000 + + test_source = YahooFinanceKlineFeatureSource( + ticker="BTC-USD", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.BTC_USD_CLOSE: YahooFinanceKlineField.PRICE_CLOSE, + FeatureID.BTC_USD_HIGH: YahooFinanceKlineField.PRICE_HIGH, + FeatureID.BTC_USD_LOW: YahooFinanceKlineField.PRICE_LOW, + FeatureID.BTC_USD_VOLUME: YahooFinanceKlineField.VOLUME, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + last_close = None + last_high = None + last_low = None + last_volume = None + for i in range(_SAMPLE_COUNT): + close = test_feature_samples[FeatureID.BTC_USD_CLOSE][i] + high = test_feature_samples[FeatureID.BTC_USD_HIGH][i] + low = test_feature_samples[FeatureID.BTC_USD_LOW][i] + volume = test_feature_samples[FeatureID.BTC_USD_VOLUME][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _BTC_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _BTC_USD_LOW_MIN + assert volume != last_volume + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + i = 0 + close = test_feature_samples[FeatureID.BTC_USD_CLOSE][i] + high = test_feature_samples[FeatureID.BTC_USD_HIGH][i] + low = test_feature_samples[FeatureID.BTC_USD_LOW][i] + volume = test_feature_samples[FeatureID.BTC_USD_VOLUME][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _BTC_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _BTC_USD_LOW_MIN + assert volume != last_volume + + def test_yahoo_finance_feature_source_spx_usd_5m(self): + _INTERVAL_MS = time_span_ms(minutes=5) + _SKIP_COUNT = 10 + _SAMPLE_COUNT = 100 + # TODO: Adjust for market opening + _START_DATE = datetime.parse("2024-03-12T15:00:00-04:00") + _START_TIME_MS = previous_interval_ms( + _START_DATE.timestamp_ms(), _INTERVAL_MS + ) - ((_SAMPLE_COUNT + _SKIP_COUNT) * _INTERVAL_MS) + _SPX_USD_LOW_MIN = 3000 + _SPX_USD_HIGH_MAX = 7000 + + test_source = YahooFinanceKlineFeatureSource( + ticker="^GSPC", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.SPX_USD_CLOSE: YahooFinanceKlineField.PRICE_CLOSE, + FeatureID.SPX_USD_HIGH: YahooFinanceKlineField.PRICE_HIGH, + FeatureID.SPX_USD_LOW: YahooFinanceKlineField.PRICE_LOW, + FeatureID.SPX_USD_VOLUME: YahooFinanceKlineField.VOLUME, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + last_close = None + last_high = None + last_low = None + last_volume = None + for i in range(_SAMPLE_COUNT): + close = test_feature_samples[FeatureID.SPX_USD_CLOSE][i] + high = test_feature_samples[FeatureID.SPX_USD_HIGH][i] + low = test_feature_samples[FeatureID.SPX_USD_LOW][i] + volume = test_feature_samples[FeatureID.SPX_USD_VOLUME][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _SPX_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _SPX_USD_LOW_MIN + assert volume != last_volume + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + i = 0 + close = test_feature_samples[FeatureID.SPX_USD_CLOSE][i] + high = test_feature_samples[FeatureID.SPX_USD_HIGH][i] + low = test_feature_samples[FeatureID.SPX_USD_LOW][i] + volume = test_feature_samples[FeatureID.SPX_USD_VOLUME][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _SPX_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _SPX_USD_LOW_MIN + assert volume != last_volume + + def test_yahoo_finance_feature_source_eur_usd_5m(self): + _INTERVAL_MS = time_span_ms(minutes=5) + _SKIP_COUNT = 10 + _SAMPLE_COUNT = 100 + _START_TIME_MS = previous_interval_ms( + datetime.now().timestamp_ms(), _INTERVAL_MS + ) - ((_SAMPLE_COUNT + _SKIP_COUNT) * _INTERVAL_MS) + _EUR_USD_LOW_MIN = 1.0 + _EUR_USD_HIGH_MAX = 1.3 + + test_source = YahooFinanceKlineFeatureSource( + ticker="EURUSD=X", + source_interval_ms=_INTERVAL_MS, + feature_mappings={ + FeatureID.EUR_USD_CLOSE: YahooFinanceKlineField.PRICE_CLOSE, + FeatureID.EUR_USD_HIGH: YahooFinanceKlineField.PRICE_HIGH, + FeatureID.EUR_USD_LOW: YahooFinanceKlineField.PRICE_LOW, + }, + ) + + test_feature_samples = test_source.get_feature_samples( + _START_TIME_MS, _INTERVAL_MS, _SAMPLE_COUNT + ) + + last_close = None + last_high = None + last_low = None + last_volume = None + for i in range(_SAMPLE_COUNT): + close = test_feature_samples[FeatureID.EUR_USD_CLOSE][i] + high = test_feature_samples[FeatureID.EUR_USD_HIGH][i] + low = test_feature_samples[FeatureID.EUR_USD_LOW][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _EUR_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _EUR_USD_LOW_MIN + + i = 0 + close = test_feature_samples[FeatureID.EUR_USD_CLOSE][i] + high = test_feature_samples[FeatureID.EUR_USD_HIGH][i] + low = test_feature_samples[FeatureID.EUR_USD_LOW][i] + assert close != last_close + assert high != last_high + assert high >= close + assert high < _EUR_USD_HIGH_MAX + assert low != last_low + assert low <= close + assert low > _EUR_USD_LOW_MIN + + +if __name__ == "__main__": + unittest.main()