-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(data-tools): Add dbstats.ipynb and .gitignore files
- Added dbstats.ipynb file to perform database queries and data processing - Created a .gitignore file to exclude the 'data/' directory from version control
- Loading branch information
1 parent
d0e0ca1
commit 616535e
Showing
2 changed files
with
193 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
data/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": "region = 'eu'", | ||
"id": "bcac0e82dd6465d8", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": { | ||
"SqlCellData": { | ||
"data_source_name": "[email protected]", | ||
"variableName$1": "df_players" | ||
} | ||
}, | ||
"cell_type": "code", | ||
"source": [ | ||
"%%sql\n", | ||
"/* SQL: Import players from database */\n", | ||
"SELECT * FROM \"Players\"" | ||
], | ||
"id": "dd6907f7abb39669", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": { | ||
"SqlCellData": { | ||
"data_source_name": "[email protected]", | ||
"variableName$1": "df_posts" | ||
} | ||
}, | ||
"cell_type": "code", | ||
"source": [ | ||
"%%sql\n", | ||
"/* SQL: Import posts from database */\n", | ||
"SELECT * FROM \"Posts\"" | ||
], | ||
"id": "336f355903439be5", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"id": "initial_id", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"# Importing the libraries\n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import pandas as pd" | ||
], | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": [ | ||
"# Posts: The Flairs column is actually a bitfield, so we need to convert it to a list of flairs :\n", | ||
"# Neutral=0\n", | ||
"# Perfomance: Good=1 Bad=2\n", | ||
"# Teamplay: Good=4 Bad=8\n", | ||
"# Courtessy: Good=16 Bad=32\n", | ||
"\n", | ||
"# We will use the following function to convert the flairs bitfield into a series of nullable booleans\n", | ||
"class PostFlairs:\n", | ||
" Neutral = 0x00\n", | ||
" PerformanceGood = 0x01\n", | ||
" PerformanceBad = 0x02\n", | ||
" TeamplayGood = 0x04\n", | ||
" TeamplayBad = 0x08\n", | ||
" CourtesyGood = 0x10\n", | ||
" CourtesyBad = 0x20\n", | ||
"\n", | ||
"def flairs_to_maskvalue(flairs, neg_mask, pos_mask):\n", | ||
" flairs_bit = bin(flairs)\n", | ||
" \n", | ||
" if flairs is None or 0: return 'neutral'\n", | ||
" elif flairs & pos_mask != 0: return 'positive'\n", | ||
" elif flairs & neg_mask != 0: return 'negative'\n", | ||
" else: return 'neutral'\n", | ||
"\n", | ||
"def flairs_to_list(flairs):\n", | ||
" return [\n", | ||
" flairs_to_maskvalue(flairs, PostFlairs.PerformanceBad, PostFlairs.PerformanceGood), # Performance\n", | ||
" flairs_to_maskvalue(flairs, PostFlairs.TeamplayBad, PostFlairs.TeamplayGood), # Teamplay\n", | ||
" flairs_to_maskvalue(flairs, PostFlairs.CourtesyBad, PostFlairs.CourtesyGood) # Courtesy\n", | ||
" ]" | ||
], | ||
"id": "fdbeb06bdfd97c4d", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": [ | ||
"# Process flairs and add each property to the dataframe\n", | ||
"df_posts_flairs = df_posts.copy()\n", | ||
"[df_posts_flairs['Performance'], df_posts_flairs['Teamplay'], df_posts_flairs['Courtesy']] = zip(*df_posts_flairs['Flairs'].apply(flairs_to_list))" | ||
], | ||
"id": "28207ecdd331f213", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": "df_posts_flairs", | ||
"id": "d7300e72dc20fd37", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": [ | ||
"# Define the post type as a median of the flairs : Negative gives -1, Neutral gives 0, Positive gives 1.\n", | ||
"# The post type is determined as greater, lesser, or equal to 0, which gives us respectively the following types : Positive, Negative, Neutral\n", | ||
"def flairs_to_type(metrics):\n", | ||
" balance = 0\n", | ||
" \n", | ||
" for metric in metrics:\n", | ||
" if metric == 'positive' : balance += 1\n", | ||
" elif metric == 'negative': balance -= 1\n", | ||
" \n", | ||
" return 'positive' if balance > 0 \\\n", | ||
" else 'negative' if balance < 0 \\\n", | ||
" else 'neutral'" | ||
], | ||
"id": "efccd08d5b98a45f", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": [ | ||
"# Process flairs and add the post type to the dataframe\n", | ||
"df_posts_flairs['Type'] = df_posts_flairs[['Performance', 'Teamplay', 'Courtesy']].apply(flairs_to_type, axis=1)" | ||
], | ||
"id": "302b5eff474e7d17", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": "df_posts_flairs", | ||
"id": "baf9c9bd66f630e2", | ||
"outputs": [], | ||
"execution_count": null | ||
}, | ||
{ | ||
"metadata": {}, | ||
"cell_type": "code", | ||
"source": [ | ||
"# Export the dataframe to a JSON file\n", | ||
"df_posts_flairs.to_json(f'data/{region}_posts.json', indent=1, orient='records')" | ||
], | ||
"id": "5c61f8949161b427", | ||
"outputs": [], | ||
"execution_count": null | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |