diff --git a/.tox/dist/nfl_data_py-0.1.5.zip b/.tox/dist/nfl_data_py-0.1.5.zip new file mode 100644 index 0000000..5efb7e2 Binary files /dev/null and b/.tox/dist/nfl_data_py-0.1.5.zip differ diff --git a/.tox/log/GLOB-0.log b/.tox/log/GLOB-0.log new file mode 100644 index 0000000..011f412 --- /dev/null +++ b/.tox/log/GLOB-0.log @@ -0,0 +1,40 @@ +action: GLOB, msg: packaging +cwd: C:\Users\JCoop\Documents\GitHub\nflfastPy +cmd: 'c:\users\jcoop\anaconda3\envs\myenv\python.exe' setup.py sdist --formats=zip --dist-dir '.tox\dist' +running sdist +running egg_info +writing nfl_data_py.egg-info\PKG-INFO +writing dependency_links to nfl_data_py.egg-info\dependency_links.txt +writing requirements to nfl_data_py.egg-info\requires.txt +writing top-level names to nfl_data_py.egg-info\top_level.txt +reading manifest file 'nfl_data_py.egg-info\SOURCES.txt' +writing manifest file 'nfl_data_py.egg-info\SOURCES.txt' +running check +creating nfl_data_py-0.1.5 +creating nfl_data_py-0.1.5\nfl_data_py +creating nfl_data_py-0.1.5\nfl_data_py.egg-info +copying files to nfl_data_py-0.1.5... +copying README.md -> nfl_data_py-0.1.5 +copying setup.py -> nfl_data_py-0.1.5 +copying nfl_data_py\__init__.py -> nfl_data_py-0.1.5\nfl_data_py +copying nfl_data_py.egg-info\PKG-INFO -> nfl_data_py-0.1.5\nfl_data_py.egg-info +copying nfl_data_py.egg-info\SOURCES.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info +copying nfl_data_py.egg-info\dependency_links.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info +copying nfl_data_py.egg-info\requires.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info +copying nfl_data_py.egg-info\top_level.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info +Writing nfl_data_py-0.1.5\setup.cfg +creating '.tox\dist\nfl_data_py-0.1.5.zip' and adding 'nfl_data_py-0.1.5' to it +adding 'nfl_data_py-0.1.5' +adding 'nfl_data_py-0.1.5\nfl_data_py' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info' +adding 'nfl_data_py-0.1.5\PKG-INFO' +adding 'nfl_data_py-0.1.5\README.md' +adding 'nfl_data_py-0.1.5\setup.cfg' +adding 'nfl_data_py-0.1.5\setup.py' +adding 'nfl_data_py-0.1.5\nfl_data_py\__init__.py' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\dependency_links.txt' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\PKG-INFO' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\requires.txt' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\SOURCES.txt' +adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\top_level.txt' +removing 'nfl_data_py-0.1.5' (and everything under it) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 488b0e6..2787d2f 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -36,9 +36,9 @@ def import_pbp_data(years, columns=None, downcast=True): try: if len(columns) != 0: - data = pandas.read_parquet(url1 + str(year) + url2, columns=columns, engine='fastparquet') + data = pandas.read_parquet(url1 + str(year) + url2, columns=columns, engine='auto') else: - data = pandas.read_parquet(url1 + str(year) + url2, engine='fastparquet') + data = pandas.read_parquet(url1 + str(year) + url2, engine='auto') raw = pandas.DataFrame(data) raw['season'] = year @@ -84,7 +84,8 @@ def import_weekly_data(years, columns=None, downcast=True): columns = [] # read weekly data - data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet') + data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto') + data = data[data['season'].isin(years)] if len(columns) > 0: @@ -120,7 +121,7 @@ def import_seasonal_data(years, s_type='REG'): raise ValueError('Only REG, ALL, POST allowed for s_type.') # import weekly data - data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet') + data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto') # filter to appropriate season_type if s_type == 'ALL': @@ -179,7 +180,8 @@ def see_pbp_cols(): """ # load pbp file, identify columns - data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/play_by_play_2020.parquet', engine='fastparquet') + data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/play_by_play_2020.parquet', engine='auto') + cols = data.columns return cols @@ -193,7 +195,8 @@ def see_weekly_cols(): """ # load weekly file, identify columns - data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet') + data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto') + cols = data.columns return cols @@ -284,14 +287,8 @@ def import_schedules(years): scheds = pandas.DataFrame() # import schedule for specified years - for x in years: - - try: - temp = pandas.read_csv(r'https://raw.githubusercontent.com/cooperdff/nfl_data_py/main/data/schedules//' + str(x) + '.csv').drop('Unnamed: 0', axis=1) - scheds = scheds.append(temp) - - except: - print('Data not available for ' + str(x)) + scheds = pandas.read_csv(r'http://www.habitatring.com/games.csv') + scheds = scheds[scheds['season'].isin(years)] return scheds @@ -513,6 +510,58 @@ def import_ids(columns=None, ids=None): return df +def import_ids(columns=None, ids=None): + """Import mapping table of ids for most major data providers + + Args: + columns (List[str]): list of columns to return + ids (List[str]): list of specific ids to return + + Returns: + DataFrame + """ + + # create list of id options + avail_ids = ['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id', + 'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id', + 'cbs_id', 'rotowire_id', 'rotoworld_id', 'ktc_id', 'pfr_id', + 'cfbref_id', 'stats_id', 'stats_global_id', 'fantasy_data_id'] + avail_sites = [x[:-3] for x in avail_ids] + + # check variable types + if columns is None: + columns = [] + + if ids is None: + ids = [] + + if not isinstance(columns, list): + raise ValueError('columns variable must be list.') + + if not isinstance(ids, list): + raise ValueError('ids variable must be list.') + + # confirm id is in table + if False in [x in avail_sites for x in ids]: + raise ValueError('ids variable can only contain ' + ', '.join(avail_sites)) + + # import data + df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv') + + rem_cols = [x for x in df.columns if x not in avail_ids] + tgt_ids = [x + '_id' for x in ids] + + # filter df to just specified columns + if len(columns) > 0 and len(ids) > 0: + df = df[set(tgt_ids + columns)] + elif len(columns) > 0 and len(ids) == 0: + df = df[set(avail_ids + columns)] + elif len(columns) == 0 and len(ids) > 0: + df = df[set(tgt_ids + rem_cols)] + + return df + + def clean_nfl_data(df): """Cleans descriptive data for players and teams to help with consistency across datasets @@ -592,6 +641,13 @@ def clean_nfl_data(df): 'SFO': 'SF', 'TAM': 'TB' } + + na_replace = { + 'NA':np.nan + } + + for col in df.columns: + df.replace({col:na_replace}, inplace=True) if 'name' in df.columns: df.replace({'name': name_repl}, inplace=True) diff --git a/nfl_data_py/__pycache__/__init__.cpython-39.pyc b/nfl_data_py/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..e901b4b Binary files /dev/null and b/nfl_data_py/__pycache__/__init__.cpython-39.pyc differ diff --git a/nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc b/nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc new file mode 100644 index 0000000..4ee0a20 Binary files /dev/null and b/nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc differ diff --git a/nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc b/nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc new file mode 100644 index 0000000..c167e43 Binary files /dev/null and b/nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc differ diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py new file mode 100644 index 0000000..f45268d --- /dev/null +++ b/nfl_data_py/tests/nfl_test.py @@ -0,0 +1,75 @@ +from unittest import TestCase +import pandas as pd + +import nfl_data_py as nfl + +class test_pbp(TestCase): + def test_is_df(self): + s = nfl.import_pbp_data([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_weekly(TestCase): + def test_is_df(self): + s = nfl.import_weekly_data([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_seasonal(TestCase): + def test_is_df(self): + s = nfl.import_seasonal_data([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_pbp_cols(TestCase): + def test_is_list(self): + s = nfl.see_pbp_cols() + self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set)) + +class test_weekly_cols(TestCase): + def test_is_list(self): + s = nfl.see_weekly_cols() + self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set)) + +class test_rosters(TestCase): + def test_is_df(self): + s = nfl.import_rosters([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_team_desc(TestCase): + def test_is_df(self): + s = nfl.import_team_desc() + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_schedules(TestCase): + def test_is_df(self): + s = nfl.import_schedules([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_wins(TestCase): + def test_is_df(self): + s = nfl.import_win_totals([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_officials(TestCase): + def test_is_df(self): + s = nfl.import_officials([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_draft_picks(TestCase): + def test_is_df(self): + s = nfl.import_draft_picks([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_draft_values(TestCase): + def test_is_df(self): + s = nfl.import_draft_values() + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_combine(TestCase): + def test_is_df(self): + s = nfl.import_combine_data([2020]) + self.assertEqual(True, isinstance(s, pd.DataFrame)) + +class test_ids(TestCase): + def test_is_df(self): + s = nfl.import_ids() + self.assertEqual(True, isinstance(s, pd.DataFrame)) + diff --git a/setup.py b/setup.py index 04325e1..1c8ace5 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ EMAIL = 'cooper.dff11@gmail.com' AUTHOR = 'cooperdff' REQUIRES_PYTHON = '>=3.6.0' -VERSION = '0.1.5' +VERSION = '0.1.6' + # What packages are required for this module to be executed? REQUIRED = [ @@ -107,12 +108,9 @@ def run(self): python_requires=REQUIRES_PYTHON, url=URL, packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), - # If your package is a single module, use this instead of 'packages': - # py_modules=['mypackage'], + test_suite='nose.collector', + tests_require=['nose'], - # entry_points={ - # 'console_scripts': ['mycli=mymodule:cli'], - # }, install_requires=REQUIRED, extras_require=EXTRAS, include_package_data=True, diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..4db5190 --- /dev/null +++ b/tox.ini @@ -0,0 +1,8 @@ +# content of: tox.ini , put in same dir as setup.py +[tox] +envlist = python2.7,python3.5,python3.6,python3.7,python3.8,python3.9 +[testenv] +deps = pytest +# run the tests +# ... or run any other command line tool you need to run here +commands = pytest \ No newline at end of file