Skip to content

Commit

Permalink
Update for 2023-2024 Season (#290)
Browse files Browse the repository at this point in the history
* Updated data for 2023-24 season

* Update xgboost ml train model file

* Updated main

* Update requirements.txt:

* Update NN model for ML

* Update NN model for UO

* Update get odds data

* Clean up NN runner

* Clan up get_data

* Clean up create games:

* Updated xgboost ml model

* Clean up xgboost uo model training
  • Loading branch information
kyleskom authored Oct 7, 2023
1 parent 069d2a2 commit b141a1d
Show file tree
Hide file tree
Showing 45 changed files with 67 additions and 60 deletions.
Binary file modified Data/dataset.sqlite
Binary file not shown.
Binary file modified Data/odds.sqlite
Binary file not shown.
Binary file modified Data/teams.sqlite
Binary file not shown.
1 change: 0 additions & 1 deletion Models/XGBoost_Models/XGBoost_68.6%_ML-2.json

This file was deleted.

1 change: 1 addition & 0 deletions Models/XGBoost_Models/XGBoost_68.9%_ML-3.json

Large diffs are not rendered by default.

Binary file removed Odds-Data/Odds-Data-Clean/2007-08.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2008-09.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2009-10.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2010-11.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2011-12.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2012-13.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2013-14.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2014-15.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2015-16.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2016-17.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2017-18.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2018-19.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2019-20.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2020-21.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2021-22.xlsx
Binary file not shown.
Binary file removed Odds-Data/Odds-Data-Clean/2022-23.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2007-08.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2008-09.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2009-10.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2010-11.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2012-13.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2013-14.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2014-15.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2015-16.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2016-17.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2017-18.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2018-19.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2019-20.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2020-21.xlsx
Binary file not shown.
Binary file removed Odds-Data/nba odds 2021-22.xlsx
Binary file not shown.
20 changes: 10 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import argparse
from datetime import datetime, timedelta
import time
from colorama import Fore, Style

import pandas as pd
import tensorflow as tf
from colorama import Fore, Style

from src.DataProviders.SbrOddsProvider import SbrOddsProvider
from src.Predict import NN_Runner, XGBoost_Runner
from src.Utils.Dictionaries import team_index_current
from src.Utils.tools import create_todays_games_from_odds, get_json_data, to_data_frame, get_todays_games_json, create_todays_games
from src.DataProviders.SbrOddsProvider import SbrOddsProvider


todays_games_url = 'https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/2022/scores/00_todays_scores.json'
data_url = 'https://stats.nba.com/stats/leaguedashteamstats?' \
Expand All @@ -26,7 +26,7 @@ def createTodaysGames(games, df, odds):
todays_games_uo = []
home_team_odds = []
away_team_odds = []
# todo: get the days rest for current games

home_team_days_rest = []
away_team_days_rest = []

Expand All @@ -38,7 +38,7 @@ def createTodaysGames(games, df, odds):
if odds is not None:
game_odds = odds[home_team + ':' + away_team]
todays_games_uo.append(game_odds['under_over_odds'])

home_team_odds.append(game_odds[home_team]['money_line_odds'])
away_team_odds.append(game_odds[away_team]['money_line_odds'])

Expand All @@ -47,7 +47,7 @@ def createTodaysGames(games, df, odds):

home_team_odds.append(input(home_team + ' odds: '))
away_team_odds.append(input(away_team + ' odds: '))

# calculate days rest for both teams
dateparse = lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M')
schedule_df = pd.read_csv('Data/nba-2023-UTC.csv', parse_dates=['Date'], date_parser=dateparse)
Expand Down Expand Up @@ -86,9 +86,9 @@ def main():
if len(games) == 0:
print("No games found.")
return
if((games[0][0]+':'+games[0][1]) not in list(odds.keys())):
print(games[0][0]+':'+games[0][1])
print(Fore.RED, "--------------Games list not up to date for todays games!!! Scraping disabled until list is updated.--------------")
if (games[0][0] + ':' + games[0][1]) not in list(odds.keys()):
print(games[0][0] + ':' + games[0][1])
print(Fore.RED,"--------------Games list not up to date for todays games!!! Scraping disabled until list is updated.--------------")
print(Style.RESET_ALL)
odds = None
else:
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
colorama==0.4.6
pandas==2.1.1
sbrscrape==0.0.8
sbrscrape==0.0.9
tensorflow==2.14.0
tensorflow-metal==1.1.0
xgboost==2.0.0
tqdm==4.66.1
flask==3.0.0
flask==3.0.0
scikit-learn==1.3.1
8 changes: 4 additions & 4 deletions src/Predict/NN_Runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import copy

import numpy as np
import pandas as pd
import tensorflow as tf
from colorama import Fore, Style, init, deinit
from tensorflow.keras.models import load_model
from keras.models import load_model

from src.Utils import Expected_Value
from src.Utils import Kelly_Criterion as kc

Expand Down Expand Up @@ -70,8 +71,7 @@ def nn_runner(data, todays_games_uo, frame_ml, games, home_team_odds, away_team_
if home_team_odds[count] and away_team_odds[count]:
ev_home = float(Expected_Value.expected_value(ml_predictions_array[count][0][1], int(home_team_odds[count])))
ev_away = float(Expected_Value.expected_value(ml_predictions_array[count][0][0], int(away_team_odds[count])))
expected_value_colors = {'home_color': Fore.GREEN if ev_home > 0 else Fore.RED,
'away_color': Fore.GREEN if ev_away > 0 else Fore.RED}
expected_value_colors = {'home_color': Fore.GREEN if ev_home > 0 else Fore.RED, 'away_color': Fore.GREEN if ev_away > 0 else Fore.RED}
bankroll_descriptor = ' Fraction of Bankroll: '
bankroll_fraction_home = bankroll_descriptor + str(kc.calculate_kelly_criterion(home_team_odds[count], ml_predictions_array[count][0][1])) + '%'
bankroll_fraction_away = bankroll_descriptor + str(kc.calculate_kelly_criterion(away_team_odds[count], ml_predictions_array[count][0][0])) + '%'
Expand Down
8 changes: 5 additions & 3 deletions src/Process-Data/Create_Games.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import sqlite3
import numpy as np
import pandas as pd
import sys
from datetime import datetime

import numpy as np
import pandas as pd
from tqdm import tqdm

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from Utils.Dictionaries import team_index_07, team_index_08, team_index_12, team_index_13, team_index_14, team_index_current
from src.Utils.Dictionaries import team_index_07, team_index_08, team_index_12, team_index_13, team_index_14, team_index_current

# season_array = ["2007-08", "2008-09", "2009-10", "2010-11", "2011-12", "2012-13", "2013-14", "2014-15", "2015-16",
# "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23"]
Expand Down
14 changes: 8 additions & 6 deletions src/Process-Data/Get_Data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
import random
import sqlite3
import time
import sys
import time
from datetime import date, datetime, timedelta

from tqdm import tqdm

from src.Utils.tools import get_json_data, to_data_frame

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from Utils.tools import get_json_data, to_data_frame

url = 'https://stats.nba.com/stats/' \
'leaguedashteamstats?Conference=&' \
Expand All @@ -29,7 +30,8 @@
# "2017-18", "2018-19", "2019-20", "2020-2021", "2021-2022"]

month = [10, 11, 12, 1, 2, 3, 4, 5, 6]
days = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
days = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31]

begin_year_pointer = year[0]
end_year_pointer = year[0]
Expand All @@ -45,7 +47,7 @@
for day1 in tqdm(days):
if month1 == 10 and day1 < 19:
continue
if month1 in [4,6,9,11] and day1 > 30:
if month1 in [4, 6, 9, 11] and day1 > 30:
continue
if month1 == 2 and day1 > 28:
continue
Expand All @@ -61,8 +63,8 @@

x = str(real_date).split('-')
general_df.to_sql(f"teams_{season1}-{str(int(x[1]))}-{str(int(x[2]))}", con, if_exists="replace")

time.sleep(random.randint(1, 3))
begin_year_pointer = year[count]

con.close()
con.close()
30 changes: 16 additions & 14 deletions src/Process-Data/Get_Odds_Data.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import os
import random
import time
import pandas as pd
import sqlite3
import os
import sys

import time
from datetime import datetime, timedelta
from tqdm import tqdm

import pandas as pd
from sbrscrape import Scoreboard
from tqdm import tqdm

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from Utils.tools import get_date
from src.Utils.tools import get_date

year = [2022, 2023]
year = ["2022", "2023"]
season = ["2022-23"]

month = [10, 11, 12, 1, 2, 3, 4, 5, 6]
Expand All @@ -21,7 +22,7 @@
end_year_pointer = year[0]
count = 0

sportsbook='fanduel'
sportsbook = 'fanduel'
df_data = []

con = sqlite3.connect("../../Data/odds.sqlite")
Expand All @@ -35,7 +36,7 @@
for day1 in tqdm(days):
if month1 == 10 and day1 < 19:
continue
if month1 in [4,6,9,11] and day1 > 30:
if month1 in [4, 6, 9, 11] and day1 > 30:
continue
if month1 == 2 and day1 > 28:
continue
Expand All @@ -44,13 +45,14 @@
continue
if month1 > datetime.now().month:
continue
print(f"{end_year_pointer}-{month1:02}-{day1:02}")
sb = Scoreboard(date=f"{end_year_pointer}-{month1:02}-{day1:02}")
if not hasattr(sb, "games"):
continue
for game in sb.games:
if game['home_team'] not in teams_last_played:
teams_last_played[game['home_team']] = get_date(f"{season1}-{month1:02}{day1:02}")
home_games_rested = timedelta(days=7) # start of season, big number
home_games_rested = timedelta(days=7) # start of season, big number
else:
current_date = get_date(f"{season1}-{month1:02}{day1:02}")
home_games_rested = current_date - teams_last_played[game['home_team']]
Expand All @@ -59,12 +61,12 @@

if game['away_team'] not in teams_last_played:
teams_last_played[game['away_team']] = get_date(f"{season1}-{month1:02}{day1:02}")
away_games_rested = timedelta(days=7) # start of season, big number
away_games_rested = timedelta(days=7) # start of season, big number
else:
current_date = get_date(f"{season1}-{month1:02}{day1:02}")
away_games_rested = current_date - teams_last_played[game['away_team']]
teams_last_played[game['away_team']] = current_date

try:
df_data.append({
'Unnamed: 0': 0,
Expand All @@ -85,6 +87,6 @@
time.sleep(random.randint(1, 3))
begin_year_pointer = year[count]

df = pd.DataFrame(df_data,)
df = pd.DataFrame(df_data, )
df.to_sql(f"odds_{season1}", con, if_exists="replace")
con.close()
con.close()
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import sqlite3
import time

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

current_time = str(time.time())

Expand Down Expand Up @@ -34,7 +35,6 @@
model.add(tf.keras.layers.Dense(2, activation=tf.nn.softmax))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, validation_split=0.1, batch_size=32,
callbacks=[tensorboard, earlyStopping, mcp_save])
model.fit(x_train, y_train, epochs=50, validation_split=0.1, batch_size=32, callbacks=[tensorboard, earlyStopping, mcp_save])

print('Done')
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import sqlite3
import time

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

current_time = str(time.time())

Expand Down Expand Up @@ -35,7 +36,6 @@
model.add(tf.keras.layers.Dense(3, activation=tf.nn.softmax))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, validation_split=0.1, batch_size=32,
callbacks=[tensorboard, earlyStopping, mcp_save])
model.fit(x_train, y_train, epochs=50, validation_split=0.1, batch_size=32, callbacks=[tensorboard, earlyStopping, mcp_save])

print('Done')
13 changes: 7 additions & 6 deletions src/Train-Models/XGBoost_Model_ML.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import sqlite3

import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np

dataset = "dataset_2012-23"
con = sqlite3.connect("../../Data/dataset.sqlite")
Expand All @@ -19,19 +20,19 @@

data = data.astype(float)
acc_results = []
for x in tqdm(range(100)):
for x in tqdm(range(300)):
x_train, x_test, y_train, y_test = train_test_split(data, margin, test_size=.1)

train = xgb.DMatrix(x_train, label=y_train)
test = xgb.DMatrix(x_test, label=y_test)

param = {
'max_depth': 2,
'max_depth': 3,
'eta': 0.01,
'objective': 'multi:softprob',
'num_class': 2
}
epochs = 500
epochs = 750

model = xgb.train(param, train, epochs)
predictions = model.predict(test)
Expand All @@ -40,9 +41,9 @@
for z in predictions:
y.append(np.argmax(z))

acc = round(accuracy_score(y_test, y)*100, 1)
acc = round(accuracy_score(y_test, y) * 100, 1)
print(f"{acc}%")
acc_results.append(acc)
# only save results if they are the best so far
if acc == max(acc_results):
model.save_model('../../Models/XGBoost_{}%_ML-2.json'.format(acc))
model.save_model('../../Models/XGBoost_{}%_ML-3.json'.format(acc))
15 changes: 7 additions & 8 deletions src/Train-Models/XGBoost_Model_UO.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import sqlite3

import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np


dataset = "dataset_2012-23"
con = sqlite3.connect("../../Data/dataset.sqlite")
data = pd.read_sql_query(f"select * from \"{dataset}\"", con, index_col="index")
con.close()
OU = data['OU-Cover']
total = data['OU']
data.drop(['Score', 'Home-Team-Win', 'TEAM_NAME', 'Date', 'TEAM_NAME.1', 'Date.1', 'OU-Cover', 'OU'], axis=1,
inplace=True)
data.drop(['Score', 'Home-Team-Win', 'TEAM_NAME', 'Date', 'TEAM_NAME.1', 'Date.1', 'OU-Cover', 'OU'], axis=1, inplace=True)

data['OU'] = np.asarray(total)
data = data.values
Expand All @@ -28,12 +27,12 @@
test = xgb.DMatrix(x_test)

param = {
'max_depth': 6,
'max_depth': 20,
'eta': 0.05,
'objective': 'multi:softprob',
'num_class': 3
}
epochs = 300
epochs = 750

model = xgb.train(param, train, epochs)

Expand All @@ -43,9 +42,9 @@
for z in predictions:
y.append(np.argmax(z))

acc = round(accuracy_score(y_test, y)*100, 1)
acc = round(accuracy_score(y_test, y) * 100, 1)
print(f"{acc}%")
acc_results.append(acc)
# only save results if they are the best so far
if acc == max(acc_results):
model.save_model('../../Models/XGBoost_{}%_UO-8.json'.format(acc))
model.save_model('../../Models/XGBoost_{}%_UO-20-01.json'.format(acc))

1 comment on commit b141a1d

@ethan-hurst
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from src.Utils.tools import get_json_data, to_data_frame

This line addition seems to be creating an error. I can recreate on both Windows and OSX.

traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "/Users/ethan/NBA-Machine-Learning-Sports-Betting/src/Process-Data/Get_Data.py", line 10, in
from src.Utils.tools import get_json_data, to_data_frame
ModuleNotFoundError: No module named 'src'

Please sign in to comment.