From dcd53b12f4427729d50bb9fe0c83784226f3008d Mon Sep 17 00:00:00 2001 From: sansbacon Date: Sun, 1 Aug 2021 20:09:41 -0500 Subject: [PATCH] Added downcast option for dataframes with default as True --- nfl_data_py/__init__.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 3d0c31c..df66ac4 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -5,8 +5,18 @@ import datetime -def import_pbp_data(years, columns=None): +def import_pbp_data(years, columns=None, downcast=True): + """Imports play-by-play data + + Args: + years (List[int]): years to get PBP data for + columns (List[str]): only return these columns + downcast (bool): convert float64 to float32, default True + + Returns: + DataFrame + """ if not isinstance(years, (list, range)): raise ValueError('Input must be list or range.') @@ -41,12 +51,27 @@ def import_pbp_data(years, columns=None): except: print('Data not available for ' + str(year)) + + # converts float64 to float32, saves ~30% memory + if downcast: + cols = plays.select_dtypes(include=[numpy.float64]).columns + plays.loc[:, cols] = plays.loc[:, cols].astype(numpy.float32) return plays -def import_weekly_data(years, columns=None): +def import_weekly_data(years, columns=None, downcast=True): + """Imports weekly player data + + Args: + years (List[int]): years to get PBP data for + columns (List[str]): only return these columns + downcast (bool): convert float64 to float32, default True + + Returns: + DataFrame + """ if not isinstance(years, (list, range)): raise ValueError('Input must be list or range.') @@ -62,6 +87,11 @@ def import_weekly_data(years, columns=None): if len(columns) > 0: data = data[columns] + # converts float64 to float32, saves ~30% memory + if downcast: + cols = data.select_dtypes(include=[numpy.float64]).columns + data.loc[:, cols] = data.loc[:, cols].astype(numpy.float32) + return data