From cc3fff587dda73fe88d6315c0e50f47ecd95cfaf Mon Sep 17 00:00:00 2001
From: Alec Ostrander <alec.ostrander@gmail.com>
Date: Tue, 6 Aug 2024 22:41:31 -0500
Subject: [PATCH 1/4] update tests, metadata

---
 .gitignore                    |  3 +++
 nfl_data_py/tests/nfl_test.py | 14 ++++++--------
 setup.py                      | 13 +++++++------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index a15111e..48f5afe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,3 +139,6 @@ dmypy.json
 
 # Cython debug symbols
 cython_debug/
+
+# Mac local files
+.DS_Store
\ No newline at end of file
diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py
index 290b968..763a886 100644
--- a/nfl_data_py/tests/nfl_test.py
+++ b/nfl_data_py/tests/nfl_test.py
@@ -1,6 +1,7 @@
 from unittest import TestCase
 from pathlib import Path
 import shutil
+import random
 
 import pandas as pd
 
@@ -20,7 +21,7 @@ def test_is_df_with_data_thread_requests(self):
 		
         
     def test_uses_cache_when_cache_is_true(self):
-        cache = Path(__file__).parent/"tmpcache"
+        cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}"
         self.assertRaises(
             ValueError,
             nfl.import_pbp_data, [2020], cache=True, alt_path=cache
@@ -268,17 +269,14 @@ def test_is_df_with_data_thread_requests(self):
         
 class test_cache(TestCase):
     def test_cache(self):
-        cache = Path(__file__).parent/"tmpcache"
+        cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}"
         self.assertFalse(cache.is_dir())
         
         nfl.cache_pbp([2020], alt_path=cache)
         
-        new_paths = list(cache.glob("**/*"))
-        self.assertEqual(len(new_paths), 2)
-        self.assertTrue(new_paths[0].is_dir())
-        self.assertTrue(new_paths[1].is_file())
-        
-        pbp2020 = pd.read_parquet(new_paths[1])
+        self.assertTrue(cache.is_dir())
+
+        pbp2020 = pd.read_parquet(cache/"season=2020"/"part.0.parquet")
         self.assertIsInstance(pbp2020, pd.DataFrame)
         self.assertFalse(pbp2020.empty)
         
diff --git a/setup.py b/setup.py
index 4293e74..11a06d0 100644
--- a/setup.py
+++ b/setup.py
@@ -14,22 +14,23 @@
 # Package meta-data.
 NAME = 'nfl_data_py'
 DESCRIPTION = 'python library for interacting with NFL data sourced from nflfastR'
-URL = 'https://github.com/cooperdff/nfl_data_py'
-EMAIL = 'cooper.dff11@gmail.com'
-AUTHOR = 'cooperdff'
-REQUIRES_PYTHON = '>=3.6.0'
+URL = 'https://github.com/nflverse/nfl_data_py'
+EMAIL = 'alec.ostrander@gmail.com'
+AUTHOR = 'Alec Ostrander'
+REQUIRES_PYTHON = '>=3.9.0'
 VERSION = '0.3.1'
 
 
 # What packages are required for this module to be executed?
 REQUIRED = [
-    'pandas>1',
+    'pandas>=2',
+    'numpy=>=2'
     'appdirs>1',
-    'fastparquet>0.5',
 ]
 
 # What packages are optional?
 EXTRAS = {
+    "fastparquet": ['fastparquet>0.5']
 }
 
 # The rest you shouldn't have to touch too much :)

From 293fe05e6f8e30d4ec62250d187874d1aa4c1d1b Mon Sep 17 00:00:00 2001
From: Alec Ostrander <alec.ostrander@gmail.com>
Date: Tue, 6 Aug 2024 23:16:17 -0500
Subject: [PATCH 2/4] clean up lint issues

---
 nfl_data_py/__init__.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py
index 604ba7c..5fbdceb 100644
--- a/nfl_data_py/__init__.py
+++ b/nfl_data_py/__init__.py
@@ -150,7 +150,7 @@ def import_pbp_data(
                 pbp_data.append(raw)
                 print(str(year) + ' done.')
 
-            except Error as e:
+            except Exception as e:
                 print(e)
                 print('Data not available for ' + str(year))
     
@@ -1138,18 +1138,6 @@ def clean_nfl_data(df):
         'Southern Miss': 'Southern Mississippi',
         'Louisiana State': 'LSU'
     }
-
-    pro_tm_repl = {
-        'GNB': 'GB',
-        'KAN': 'KC',
-        'LA': 'LAR',
-        'LVR': 'LV',
-        'NWE': 'NE',
-        'NOR': 'NO',
-        'SDG': 'SD',
-        'SFO': 'SF',
-        'TAM': 'TB'
-    }
     
     na_replace = {
         'NA':numpy.nan
@@ -1164,8 +1152,4 @@ def clean_nfl_data(df):
     if 'col_team' in df.columns:
         df.replace({'col_team': col_tm_repl}, inplace=True)
 
-        if 'name' in df.columns:
-            for z in player_col_tm_repl:
-                df[df['name'] == z[0]] = df[df['name'] == z[0]].replace({z[1]: z[2]})
-
     return df

From 753ec6c372e6a71ec3fff798661c60ed574ae905 Mon Sep 17 00:00:00 2001
From: Alec Ostrander <alec.ostrander@gmail.com>
Date: Tue, 6 Aug 2024 23:56:38 -0500
Subject: [PATCH 3/4] add development dependencies

---
 requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..df06964
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+appdirs
+fastparquet
+numpy
+pandas
\ No newline at end of file

From e94511ba08b512c2da85c0cd092ff815ed55f739 Mon Sep 17 00:00:00 2001
From: justinrobinson1020
 <79549296+justinrobinson1020@users.noreply.github.com>
Date: Mon, 16 Sep 2024 22:26:08 -0400
Subject: [PATCH 4/4] Add column data types to import_ids() (#88)

---
 nfl_data_py/__init__.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py
index 5fbdceb..740ba7f 100644
--- a/nfl_data_py/__init__.py
+++ b/nfl_data_py/__init__.py
@@ -760,7 +760,17 @@ def import_ids(columns=None, ids=None):
         raise ValueError('ids variable can only contain ' + ', '.join(avail_sites))
         
     # import data
-    df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv')
+    dtypes = {
+        'mfl_id': str, 'sportradar_id': str, 'fantasypros_id': str, 'gsis_id': str,
+        'pff_id': str, 'sleeper_id': str, 'nfl_id': str, 'espn_id': str, 'yahoo_id': str,
+        'fleaflicker_id': str, 'cbs_id': str, 'pfr_id': str, 'cfbref_id': str,
+        'rotowire_id': str, 'rotoworld_id': str, 'ktc_id': str, 'stats_id': str,
+        'stats_global_id': str, 'fantasy_data_id': str, 'swish_id': str, 'name': str,
+        'merge_name': str, 'position': str, 'team': str, 'age': 'Float64',
+        'draft_year': 'Int64', 'draft_round': 'Int64', 'draft_pick': 'Int64', 'draft_ovr': 'Int64',
+        'twitter_username': str, 'height': 'Int64', 'weight': 'Int64', 'college': str, 'db_season': 'Int64'
+    }
+    df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv', dtype=dtypes, parse_dates=['birthdate'])
     
     rem_cols = [x for x in df.columns if x not in avail_ids]
     tgt_ids = [x + '_id' for x in ids]