From cc3fff587dda73fe88d6315c0e50f47ecd95cfaf Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Tue, 6 Aug 2024 22:41:31 -0500 Subject: [PATCH 1/4] update tests, metadata --- .gitignore | 3 +++ nfl_data_py/tests/nfl_test.py | 14 ++++++-------- setup.py | 13 +++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index a15111e..48f5afe 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,6 @@ dmypy.json # Cython debug symbols cython_debug/ + +# Mac local files +.DS_Store \ No newline at end of file diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py index 290b968..763a886 100644 --- a/nfl_data_py/tests/nfl_test.py +++ b/nfl_data_py/tests/nfl_test.py @@ -1,6 +1,7 @@ from unittest import TestCase from pathlib import Path import shutil +import random import pandas as pd @@ -20,7 +21,7 @@ def test_is_df_with_data_thread_requests(self): def test_uses_cache_when_cache_is_true(self): - cache = Path(__file__).parent/"tmpcache" + cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}" self.assertRaises( ValueError, nfl.import_pbp_data, [2020], cache=True, alt_path=cache @@ -268,17 +269,14 @@ def test_is_df_with_data_thread_requests(self): class test_cache(TestCase): def test_cache(self): - cache = Path(__file__).parent/"tmpcache" + cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}" self.assertFalse(cache.is_dir()) nfl.cache_pbp([2020], alt_path=cache) - new_paths = list(cache.glob("**/*")) - self.assertEqual(len(new_paths), 2) - self.assertTrue(new_paths[0].is_dir()) - self.assertTrue(new_paths[1].is_file()) - - pbp2020 = pd.read_parquet(new_paths[1]) + self.assertTrue(cache.is_dir()) + + pbp2020 = pd.read_parquet(cache/"season=2020"/"part.0.parquet") self.assertIsInstance(pbp2020, pd.DataFrame) self.assertFalse(pbp2020.empty) diff --git a/setup.py b/setup.py index 4293e74..11a06d0 100644 --- a/setup.py +++ b/setup.py @@ -14,22 +14,23 @@ # Package meta-data. NAME = 'nfl_data_py' DESCRIPTION = 'python library for interacting with NFL data sourced from nflfastR' -URL = 'https://github.com/cooperdff/nfl_data_py' -EMAIL = 'cooper.dff11@gmail.com' -AUTHOR = 'cooperdff' -REQUIRES_PYTHON = '>=3.6.0' +URL = 'https://github.com/nflverse/nfl_data_py' +EMAIL = 'alec.ostrander@gmail.com' +AUTHOR = 'Alec Ostrander' +REQUIRES_PYTHON = '>=3.9.0' VERSION = '0.3.1' # What packages are required for this module to be executed? REQUIRED = [ - 'pandas>1', + 'pandas>=2', + 'numpy=>=2' 'appdirs>1', - 'fastparquet>0.5', ] # What packages are optional? EXTRAS = { + "fastparquet": ['fastparquet>0.5'] } # The rest you shouldn't have to touch too much :) From 293fe05e6f8e30d4ec62250d187874d1aa4c1d1b Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Tue, 6 Aug 2024 23:16:17 -0500 Subject: [PATCH 2/4] clean up lint issues --- nfl_data_py/__init__.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 604ba7c..5fbdceb 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -150,7 +150,7 @@ def import_pbp_data( pbp_data.append(raw) print(str(year) + ' done.') - except Error as e: + except Exception as e: print(e) print('Data not available for ' + str(year)) @@ -1138,18 +1138,6 @@ def clean_nfl_data(df): 'Southern Miss': 'Southern Mississippi', 'Louisiana State': 'LSU' } - - pro_tm_repl = { - 'GNB': 'GB', - 'KAN': 'KC', - 'LA': 'LAR', - 'LVR': 'LV', - 'NWE': 'NE', - 'NOR': 'NO', - 'SDG': 'SD', - 'SFO': 'SF', - 'TAM': 'TB' - } na_replace = { 'NA':numpy.nan @@ -1164,8 +1152,4 @@ def clean_nfl_data(df): if 'col_team' in df.columns: df.replace({'col_team': col_tm_repl}, inplace=True) - if 'name' in df.columns: - for z in player_col_tm_repl: - df[df['name'] == z[0]] = df[df['name'] == z[0]].replace({z[1]: z[2]}) - return df From 753ec6c372e6a71ec3fff798661c60ed574ae905 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Tue, 6 Aug 2024 23:56:38 -0500 Subject: [PATCH 3/4] add development dependencies --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..df06964 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +appdirs +fastparquet +numpy +pandas \ No newline at end of file From e94511ba08b512c2da85c0cd092ff815ed55f739 Mon Sep 17 00:00:00 2001 From: justinrobinson1020 <79549296+justinrobinson1020@users.noreply.github.com> Date: Mon, 16 Sep 2024 22:26:08 -0400 Subject: [PATCH 4/4] Add column data types to import_ids() (#88) --- nfl_data_py/__init__.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 5fbdceb..740ba7f 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -760,7 +760,17 @@ def import_ids(columns=None, ids=None): raise ValueError('ids variable can only contain ' + ', '.join(avail_sites)) # import data - df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv') + dtypes = { + 'mfl_id': str, 'sportradar_id': str, 'fantasypros_id': str, 'gsis_id': str, + 'pff_id': str, 'sleeper_id': str, 'nfl_id': str, 'espn_id': str, 'yahoo_id': str, + 'fleaflicker_id': str, 'cbs_id': str, 'pfr_id': str, 'cfbref_id': str, + 'rotowire_id': str, 'rotoworld_id': str, 'ktc_id': str, 'stats_id': str, + 'stats_global_id': str, 'fantasy_data_id': str, 'swish_id': str, 'name': str, + 'merge_name': str, 'position': str, 'team': str, 'age': 'Float64', + 'draft_year': 'Int64', 'draft_round': 'Int64', 'draft_pick': 'Int64', 'draft_ovr': 'Int64', + 'twitter_username': str, 'height': 'Int64', 'weight': 'Int64', 'college': str, 'db_season': 'Int64' + } + df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv', dtype=dtypes, parse_dates=['birthdate']) rem_cols = [x for x in df.columns if x not in avail_ids] tgt_ids = [x + '_id' for x in ids]