Skip to content

Commit

Permalink
Merge pull request #14 from cooperdff/features
Browse files Browse the repository at this point in the history
Features
  • Loading branch information
cooperdff authored Aug 7, 2021
2 parents 6704850 + 37f7a4d commit a29b442
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 20 deletions.
Binary file added .tox/dist/nfl_data_py-0.1.5.zip
Binary file not shown.
40 changes: 40 additions & 0 deletions .tox/log/GLOB-0.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
action: GLOB, msg: packaging
cwd: C:\Users\JCoop\Documents\GitHub\nflfastPy
cmd: 'c:\users\jcoop\anaconda3\envs\myenv\python.exe' setup.py sdist --formats=zip --dist-dir '.tox\dist'
running sdist
running egg_info
writing nfl_data_py.egg-info\PKG-INFO
writing dependency_links to nfl_data_py.egg-info\dependency_links.txt
writing requirements to nfl_data_py.egg-info\requires.txt
writing top-level names to nfl_data_py.egg-info\top_level.txt
reading manifest file 'nfl_data_py.egg-info\SOURCES.txt'
writing manifest file 'nfl_data_py.egg-info\SOURCES.txt'
running check
creating nfl_data_py-0.1.5
creating nfl_data_py-0.1.5\nfl_data_py
creating nfl_data_py-0.1.5\nfl_data_py.egg-info
copying files to nfl_data_py-0.1.5...
copying README.md -> nfl_data_py-0.1.5
copying setup.py -> nfl_data_py-0.1.5
copying nfl_data_py\__init__.py -> nfl_data_py-0.1.5\nfl_data_py
copying nfl_data_py.egg-info\PKG-INFO -> nfl_data_py-0.1.5\nfl_data_py.egg-info
copying nfl_data_py.egg-info\SOURCES.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info
copying nfl_data_py.egg-info\dependency_links.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info
copying nfl_data_py.egg-info\requires.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info
copying nfl_data_py.egg-info\top_level.txt -> nfl_data_py-0.1.5\nfl_data_py.egg-info
Writing nfl_data_py-0.1.5\setup.cfg
creating '.tox\dist\nfl_data_py-0.1.5.zip' and adding 'nfl_data_py-0.1.5' to it
adding 'nfl_data_py-0.1.5'
adding 'nfl_data_py-0.1.5\nfl_data_py'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info'
adding 'nfl_data_py-0.1.5\PKG-INFO'
adding 'nfl_data_py-0.1.5\README.md'
adding 'nfl_data_py-0.1.5\setup.cfg'
adding 'nfl_data_py-0.1.5\setup.py'
adding 'nfl_data_py-0.1.5\nfl_data_py\__init__.py'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\dependency_links.txt'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\PKG-INFO'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\requires.txt'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\SOURCES.txt'
adding 'nfl_data_py-0.1.5\nfl_data_py.egg-info\top_level.txt'
removing 'nfl_data_py-0.1.5' (and everything under it)
84 changes: 70 additions & 14 deletions nfl_data_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def import_pbp_data(years, columns=None, downcast=True):

try:
if len(columns) != 0:
data = pandas.read_parquet(url1 + str(year) + url2, columns=columns, engine='fastparquet')
data = pandas.read_parquet(url1 + str(year) + url2, columns=columns, engine='auto')
else:
data = pandas.read_parquet(url1 + str(year) + url2, engine='fastparquet')
data = pandas.read_parquet(url1 + str(year) + url2, engine='auto')

raw = pandas.DataFrame(data)
raw['season'] = year
Expand Down Expand Up @@ -84,7 +84,8 @@ def import_weekly_data(years, columns=None, downcast=True):
columns = []

# read weekly data
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet')
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto')

data = data[data['season'].isin(years)]

if len(columns) > 0:
Expand Down Expand Up @@ -120,7 +121,7 @@ def import_seasonal_data(years, s_type='REG'):
raise ValueError('Only REG, ALL, POST allowed for s_type.')

# import weekly data
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet')
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto')

# filter to appropriate season_type
if s_type == 'ALL':
Expand Down Expand Up @@ -179,7 +180,8 @@ def see_pbp_cols():
"""

# load pbp file, identify columns
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/play_by_play_2020.parquet', engine='fastparquet')
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/play_by_play_2020.parquet', engine='auto')

cols = data.columns

return cols
Expand All @@ -193,7 +195,8 @@ def see_weekly_cols():
"""

# load weekly file, identify columns
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='fastparquet')
data = pandas.read_parquet(r'https://github.com/nflverse/nflfastR-data/raw/master/data/player_stats.parquet', engine='auto')

cols = data.columns

return cols
Expand Down Expand Up @@ -284,14 +287,8 @@ def import_schedules(years):
scheds = pandas.DataFrame()

# import schedule for specified years
for x in years:

try:
temp = pandas.read_csv(r'https://raw.githubusercontent.com/cooperdff/nfl_data_py/main/data/schedules//' + str(x) + '.csv').drop('Unnamed: 0', axis=1)
scheds = scheds.append(temp)

except:
print('Data not available for ' + str(x))
scheds = pandas.read_csv(r'http://www.habitatring.com/games.csv')
scheds = scheds[scheds['season'].isin(years)]

return scheds

Expand Down Expand Up @@ -513,6 +510,58 @@ def import_ids(columns=None, ids=None):
return df


def import_ids(columns=None, ids=None):
"""Import mapping table of ids for most major data providers
Args:
columns (List[str]): list of columns to return
ids (List[str]): list of specific ids to return
Returns:
DataFrame
"""

# create list of id options
avail_ids = ['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id',
'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id',
'cbs_id', 'rotowire_id', 'rotoworld_id', 'ktc_id', 'pfr_id',
'cfbref_id', 'stats_id', 'stats_global_id', 'fantasy_data_id']
avail_sites = [x[:-3] for x in avail_ids]

# check variable types
if columns is None:
columns = []

if ids is None:
ids = []

if not isinstance(columns, list):
raise ValueError('columns variable must be list.')

if not isinstance(ids, list):
raise ValueError('ids variable must be list.')

# confirm id is in table
if False in [x in avail_sites for x in ids]:
raise ValueError('ids variable can only contain ' + ', '.join(avail_sites))

# import data
df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv')

rem_cols = [x for x in df.columns if x not in avail_ids]
tgt_ids = [x + '_id' for x in ids]

# filter df to just specified columns
if len(columns) > 0 and len(ids) > 0:
df = df[set(tgt_ids + columns)]
elif len(columns) > 0 and len(ids) == 0:
df = df[set(avail_ids + columns)]
elif len(columns) == 0 and len(ids) > 0:
df = df[set(tgt_ids + rem_cols)]

return df


def clean_nfl_data(df):
"""Cleans descriptive data for players and teams to help with consistency across datasets
Expand Down Expand Up @@ -592,6 +641,13 @@ def clean_nfl_data(df):
'SFO': 'SF',
'TAM': 'TB'
}

na_replace = {
'NA':np.nan
}

for col in df.columns:
df.replace({col:na_replace}, inplace=True)

if 'name' in df.columns:
df.replace({'name': name_repl}, inplace=True)
Expand Down
Binary file added nfl_data_py/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
75 changes: 75 additions & 0 deletions nfl_data_py/tests/nfl_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from unittest import TestCase
import pandas as pd

import nfl_data_py as nfl

class test_pbp(TestCase):
def test_is_df(self):
s = nfl.import_pbp_data([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_weekly(TestCase):
def test_is_df(self):
s = nfl.import_weekly_data([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_seasonal(TestCase):
def test_is_df(self):
s = nfl.import_seasonal_data([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_pbp_cols(TestCase):
def test_is_list(self):
s = nfl.see_pbp_cols()
self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set))

class test_weekly_cols(TestCase):
def test_is_list(self):
s = nfl.see_weekly_cols()
self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set))

class test_rosters(TestCase):
def test_is_df(self):
s = nfl.import_rosters([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_team_desc(TestCase):
def test_is_df(self):
s = nfl.import_team_desc()
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_schedules(TestCase):
def test_is_df(self):
s = nfl.import_schedules([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_wins(TestCase):
def test_is_df(self):
s = nfl.import_win_totals([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_officials(TestCase):
def test_is_df(self):
s = nfl.import_officials([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_draft_picks(TestCase):
def test_is_df(self):
s = nfl.import_draft_picks([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_draft_values(TestCase):
def test_is_df(self):
s = nfl.import_draft_values()
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_combine(TestCase):
def test_is_df(self):
s = nfl.import_combine_data([2020])
self.assertEqual(True, isinstance(s, pd.DataFrame))

class test_ids(TestCase):
def test_is_df(self):
s = nfl.import_ids()
self.assertEqual(True, isinstance(s, pd.DataFrame))

10 changes: 4 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
EMAIL = '[email protected]'
AUTHOR = 'cooperdff'
REQUIRES_PYTHON = '>=3.6.0'
VERSION = '0.1.5'
VERSION = '0.1.6'


# What packages are required for this module to be executed?
REQUIRED = [
Expand Down Expand Up @@ -107,12 +108,9 @@ def run(self):
python_requires=REQUIRES_PYTHON,
url=URL,
packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
# If your package is a single module, use this instead of 'packages':
# py_modules=['mypackage'],
test_suite='nose.collector',
tests_require=['nose'],

# entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
# },
install_requires=REQUIRED,
extras_require=EXTRAS,
include_package_data=True,
Expand Down
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# content of: tox.ini , put in same dir as setup.py
[tox]
envlist = python2.7,python3.5,python3.6,python3.7,python3.8,python3.9
[testenv]
deps = pytest
# run the tests
# ... or run any other command line tool you need to run here
commands = pytest

0 comments on commit a29b442

Please sign in to comment.