Skip to content

Commit

Permalink
Demo app for Dash to connect to PostgreSQL (#259)
Browse files Browse the repository at this point in the history
* Demo dash app pgsql

* Close connections properly

* Cleaned up data

Only keep readable data and added more detailed description.

* Close connections properly

* Removed secrets

Added more detailed steps on how to upload dataset and configure secret variables in README

* More info in README

Additionally: removed Dockerfile and removed extra package from requirements.txt

* Reworded README

* Updated local testing in README

* Additional info on uploading data

* minor language adjustment

* Removed unnecessary code

Modified local testing instruction in README

* Modified instruction for gunicorn

* Added instruction on obtaining dataset

* Removed print statements
  • Loading branch information
bchen39 authored Jul 29, 2024
1 parent 46ec9af commit 59a76f0
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 0 deletions.
38 changes: 38 additions & 0 deletions examples/dash/dash-connect-pgsql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Dash App Connected to PostgreSQL Database

Interactive Dash Application, connected to PostgreSQL database.

![](app.png)

## Set up local testing environment
To use the app, store the below information locally into your `.env` for `upload.py`. These variables can be found on the `Parameters only` section under `connection details` from your [Neon](https://console.neon.tech/) PostgreSQL dashboard.
```
PGHOST='YOUR_HOST'
PGDATABASE='test'
PGUSER='test_owner'
PGPASSWORD='your_password'
```

Run `python -m pip install -r requirements.txt` to install all necessary packages.

## Upload dataset to your Postgres server
You can download the dataset I'm using [here](https://archive.ics.uci.edu/dataset/320/student+performance) and store them in the `data` folder. Next, `cd data` and run `python csv_mod.py` to obtain `student-mat-min.csv` and `student-por-min.csv`, the extracted dataset that we will be uploading. Run `python upload.py` locally to upload the dataset to your PostgreSQL.

## Local testing
Add the below lines to your `app.py`
```python
from dotenv import load_dotenv

# Load environment variables
load_dotenv(".env")
```

You should remove them once you are done with local testing to avoid error.

Run `gunicorn app:server run --bind 0.0.0.0:80`. You should be able to access the app at `0.0.0.0:80`.

## Upload to Ploomber Cloud
Compress and upload the below files for deployment. Make sure to specify the above environment variables in your [secrets](https://docs.cloud.ploomber.io/en/latest/user-guide/secrets.html). For more details, please refer to our [Dash deployment guide](https://docs.cloud.ploomber.io/en/latest/apps/dash.html)
- app.py
- assets/style.css
- requirements.txt
Binary file added examples/dash/dash-connect-pgsql/app.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
111 changes: 111 additions & 0 deletions examples/dash/dash-connect-pgsql/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px
import pandas as pd
from sqlalchemy import URL, create_engine
import os

# Connect to PostgreSQL database
connection_string = URL.create(
'postgresql',
username=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE")
)

DB_LIST = ['math', 'portuguese']

app = Dash(__name__)
server = app.server

app.title = "Student Data - Ploomber Cloud Dash Application with PostgreSQL"

app.layout = html.Div(
[
html.H1(children="Dash Application with PostgreSQL Demo", style={"textAlign": "center"}),
html.Div(
[
html.Div([
html.P(children="Database selection:"),
dcc.Dropdown(
DB_LIST, "math", id="db-selection",
className="drop-list"
)
], className="drop-wrapper"),
html.Div([
html.P(children="Select x, y axis and facet for scatter plot:"),
dcc.Dropdown(id="scatter-selection-x", className="drop-list"),
dcc.Dropdown(id="scatter-selection-y", className="drop-list"),
dcc.Dropdown(id="scatter-selection-facet", className="drop-list")
], className="drop-wrapper"),
html.Div([
html.P(children="Select x axis for bar chart:"),
dcc.Dropdown(id="bar-selection-x", className="drop-list")
], className="drop-wrapper")
]
),
dcc.Graph(id="graph-content"),
dcc.Graph(id="graph-bar")
]
)

@callback(
[
Output("scatter-selection-x", "options"),
Output("scatter-selection-y", "options"),
Output("scatter-selection-facet", "options"),
Output("bar-selection-x", "options"),
Output("scatter-selection-x", "value"),
Output("scatter-selection-y", "value"),
Output("scatter-selection-facet", "value"),
Output("bar-selection-x", "value"),
],
Input("db-selection", "value")
)
def update_dropdown(value):
"""Updates dropdown list based on selected db."""
e = create_engine(connection_string)
query = f"SELECT * FROM {value}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
cols = list(df.columns)
return cols[4:9], cols[9:], cols[1:4], cols[1:], cols[4], cols[9], cols[1], cols[1]

@callback(
Output("graph-content", "figure"),
[
Input("db-selection", "value"),
Input("scatter-selection-x", "value"),
Input("scatter-selection-y", "value"),
Input("scatter-selection-facet", "value"),
]
)
def update_graph(db_name, val_x, val_y, val_facet):
"""Updates scatter plot based on selected x and y axis."""
e = create_engine(connection_string)
query = f"SELECT * FROM {db_name}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
title = f"Distribution of student {val_y} based on {val_x}, separated by student {val_facet}"
return px.scatter(df, x=val_x, y=val_y, facet_col=val_facet, title=title)

@callback(
Output("graph-bar", "figure"),
[
Input("db-selection", "value"),
Input("bar-selection-x", "value"),
]
)
def update_bar(db_name, val_x):
e = create_engine(connection_string)
query = f"SELECT {val_x}, COUNT({val_x}) FROM {db_name} GROUP BY {val_x}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
title =f"Number of students based on {val_x}"
return px.bar(df, x=val_x, y="count", title=title)
11 changes: 11 additions & 0 deletions examples/dash/dash-connect-pgsql/assets/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.drop-wrapper {
display: flex;
}

.drop-list {
margin-left: 15px;
width: 150px;
white-space: pre;
text-overflow: ellipsis;
margin-top: 5px;
}
17 changes: 17 additions & 0 deletions examples/dash/dash-connect-pgsql/data/csv_mod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

csv_mods = {
"student-mat.csv": "student-mat-min.csv",
"student-por.csv": "student-por-min.csv"
}

for csv_cur, csv_conv in csv_mods.items():
df = pd.read_csv(csv_cur, sep=";")

# Select rows to keep
df = df[["school", "sex", "romantic_status", "age",
"mother_occupation", "father_occupation",
"health", "study_time", "absences", "final_grade"]]
df.to_csv(csv_conv)


6 changes: 6 additions & 0 deletions examples/dash/dash-connect-pgsql/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dash
pandas
gunicorn
sqlalchemy
psycopg2-binary
python-dotenv
30 changes: 30 additions & 0 deletions examples/dash/dash-connect-pgsql/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd
from sqlalchemy import URL, create_engine
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv(".env")

# Connect to PostgreSQL database
connection_string = URL.create(
'postgresql',
username=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE")
)

engine = create_engine(connection_string)

csv_files = {
"math": "data/student-mat-min.csv",
"portuguese": "data/student-por-min.csv"
}

for db_name, db_file in csv_files.items():
df = pd.read_csv(db_file, sep = ",") # Load data from csv file
df.to_sql(db_name, engine, if_exists='replace', index=False) # Upload data to database
print(f"Successfully uploaded database {db_name}")

engine.dispose() # Close connection

0 comments on commit 59a76f0

Please sign in to comment.