Skip to content

Commit

Permalink
feat(data-tools): Add dbstats.ipynb and .gitignore files
Browse files Browse the repository at this point in the history
- Added dbstats.ipynb file to perform database queries and data processing
- Created a .gitignore file to exclude the 'data/' directory from version control
  • Loading branch information
SakuraIsayeki committed Mar 15, 2024
1 parent d0e0ca1 commit 616535e
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 0 deletions.
1 change: 1 addition & 0 deletions wowskarma_data_tools/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/
192 changes: 192 additions & 0 deletions wowskarma_data_tools/dbstats.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"cells": [
{
"metadata": {},
"cell_type": "code",
"source": "region = 'eu'",
"id": "bcac0e82dd6465d8",
"outputs": [],
"execution_count": null
},
{
"metadata": {
"SqlCellData": {
"data_source_name": "[email protected]",
"variableName$1": "df_players"
}
},
"cell_type": "code",
"source": [
"%%sql\n",
"/* SQL: Import players from database */\n",
"SELECT * FROM \"Players\""
],
"id": "dd6907f7abb39669",
"outputs": [],
"execution_count": null
},
{
"metadata": {
"SqlCellData": {
"data_source_name": "[email protected]",
"variableName$1": "df_posts"
}
},
"cell_type": "code",
"source": [
"%%sql\n",
"/* SQL: Import posts from database */\n",
"SELECT * FROM \"Posts\""
],
"id": "336f355903439be5",
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true
},
"source": [
"# Importing the libraries\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# Posts: The Flairs column is actually a bitfield, so we need to convert it to a list of flairs :\n",
"# Neutral=0\n",
"# Perfomance: Good=1 Bad=2\n",
"# Teamplay: Good=4 Bad=8\n",
"# Courtessy: Good=16 Bad=32\n",
"\n",
"# We will use the following function to convert the flairs bitfield into a series of nullable booleans\n",
"class PostFlairs:\n",
" Neutral = 0x00\n",
" PerformanceGood = 0x01\n",
" PerformanceBad = 0x02\n",
" TeamplayGood = 0x04\n",
" TeamplayBad = 0x08\n",
" CourtesyGood = 0x10\n",
" CourtesyBad = 0x20\n",
"\n",
"def flairs_to_maskvalue(flairs, neg_mask, pos_mask):\n",
" flairs_bit = bin(flairs)\n",
" \n",
" if flairs is None or 0: return 'neutral'\n",
" elif flairs & pos_mask != 0: return 'positive'\n",
" elif flairs & neg_mask != 0: return 'negative'\n",
" else: return 'neutral'\n",
"\n",
"def flairs_to_list(flairs):\n",
" return [\n",
" flairs_to_maskvalue(flairs, PostFlairs.PerformanceBad, PostFlairs.PerformanceGood), # Performance\n",
" flairs_to_maskvalue(flairs, PostFlairs.TeamplayBad, PostFlairs.TeamplayGood), # Teamplay\n",
" flairs_to_maskvalue(flairs, PostFlairs.CourtesyBad, PostFlairs.CourtesyGood) # Courtesy\n",
" ]"
],
"id": "fdbeb06bdfd97c4d",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# Process flairs and add each property to the dataframe\n",
"df_posts_flairs = df_posts.copy()\n",
"[df_posts_flairs['Performance'], df_posts_flairs['Teamplay'], df_posts_flairs['Courtesy']] = zip(*df_posts_flairs['Flairs'].apply(flairs_to_list))"
],
"id": "28207ecdd331f213",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "df_posts_flairs",
"id": "d7300e72dc20fd37",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# Define the post type as a median of the flairs : Negative gives -1, Neutral gives 0, Positive gives 1.\n",
"# The post type is determined as greater, lesser, or equal to 0, which gives us respectively the following types : Positive, Negative, Neutral\n",
"def flairs_to_type(metrics):\n",
" balance = 0\n",
" \n",
" for metric in metrics:\n",
" if metric == 'positive' : balance += 1\n",
" elif metric == 'negative': balance -= 1\n",
" \n",
" return 'positive' if balance > 0 \\\n",
" else 'negative' if balance < 0 \\\n",
" else 'neutral'"
],
"id": "efccd08d5b98a45f",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# Process flairs and add the post type to the dataframe\n",
"df_posts_flairs['Type'] = df_posts_flairs[['Performance', 'Teamplay', 'Courtesy']].apply(flairs_to_type, axis=1)"
],
"id": "302b5eff474e7d17",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "df_posts_flairs",
"id": "baf9c9bd66f630e2",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# Export the dataframe to a JSON file\n",
"df_posts_flairs.to_json(f'data/{region}_posts.json', indent=1, orient='records')"
],
"id": "5c61f8949161b427",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 616535e

Please sign in to comment.