Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demo app for Dash to connect to PostgreSQL #259

Merged
merged 14 commits into from
Jul 29, 2024
38 changes: 38 additions & 0 deletions examples/dash/dash-connect-pgsql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Dash App Connected to PostgreSQL Database

Interactive Dash Application, connected to PostgreSQL database.
neelasha23 marked this conversation as resolved.
Show resolved Hide resolved

![](app.png)

## Set up local testing environment
To use the app, store the below information locally into your `.env` for `upload.py`. These variables can be found on the `Parameters only` section under `connection details` from your [Neon](https://console.neon.tech/) PostgreSQL dashboard.
```
PGHOST='YOUR_HOST'
PGDATABASE='test'
PGUSER='test_owner'
PGPASSWORD='your_password'
```

Run `python -m pip install -r requirements.txt` to install all necessary packages.

## Upload dataset to your Postgres server
bryannho marked this conversation as resolved.
Show resolved Hide resolved
You can download the dataset I'm using [here](https://archive.ics.uci.edu/dataset/320/student+performance) and store them in the `data` folder. Next, `cd data` and run `python csv_mod.py` to obtain `student-mat-min.csv` and `student-por-min.csv`, the extracted dataset that we will be uploading. Run `python upload.py` locally to upload the dataset to your PostgreSQL.

## Local testing
Add the below lines to your `app.py`
```python
from dotenv import load_dotenv

# Load environment variables
load_dotenv(".env")
```

You should remove them once you are done with local testing to avoid error.

Run `gunicorn app:server run --bind 0.0.0.0:80`. You should be able to access the app at `0.0.0.0:80`.

## Upload to Ploomber Cloud
Compress and upload the below files for deployment. Make sure to specify the above environment variables in your [secrets](https://docs.cloud.ploomber.io/en/latest/user-guide/secrets.html). For more details, please refer to our [Dash deployment guide](https://docs.cloud.ploomber.io/en/latest/apps/dash.html)
- app.py
- assets/style.css
- requirements.txt
Binary file added examples/dash/dash-connect-pgsql/app.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
111 changes: 111 additions & 0 deletions examples/dash/dash-connect-pgsql/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px
import pandas as pd
from sqlalchemy import URL, create_engine
import os

# Connect to PostgreSQL database
connection_string = URL.create(
'postgresql',
username=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE")
)

DB_LIST = ['math', 'portuguese']

app = Dash(__name__)
server = app.server

app.title = "Student Data - Ploomber Cloud Dash Application with PostgreSQL"

app.layout = html.Div(
[
html.H1(children="Dash Application with PostgreSQL Demo", style={"textAlign": "center"}),
html.Div(
[
html.Div([
html.P(children="Database selection:"),
dcc.Dropdown(
DB_LIST, "math", id="db-selection",
className="drop-list"
)
], className="drop-wrapper"),
html.Div([
html.P(children="Select x, y axis and facet for scatter plot:"),
dcc.Dropdown(id="scatter-selection-x", className="drop-list"),
dcc.Dropdown(id="scatter-selection-y", className="drop-list"),
dcc.Dropdown(id="scatter-selection-facet", className="drop-list")
], className="drop-wrapper"),
html.Div([
html.P(children="Select x axis for bar chart:"),
dcc.Dropdown(id="bar-selection-x", className="drop-list")
], className="drop-wrapper")
]
),
dcc.Graph(id="graph-content"),
dcc.Graph(id="graph-bar")
]
)

@callback(
[
Output("scatter-selection-x", "options"),
Output("scatter-selection-y", "options"),
Output("scatter-selection-facet", "options"),
Output("bar-selection-x", "options"),
Output("scatter-selection-x", "value"),
Output("scatter-selection-y", "value"),
Output("scatter-selection-facet", "value"),
Output("bar-selection-x", "value"),
],
Input("db-selection", "value")
)
def update_dropdown(value):
"""Updates dropdown list based on selected db."""
e = create_engine(connection_string)
query = f"SELECT * FROM {value}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
cols = list(df.columns)
return cols[4:9], cols[9:], cols[1:4], cols[1:], cols[4], cols[9], cols[1], cols[1]

@callback(
Output("graph-content", "figure"),
[
Input("db-selection", "value"),
Input("scatter-selection-x", "value"),
Input("scatter-selection-y", "value"),
Input("scatter-selection-facet", "value"),
]
)
def update_graph(db_name, val_x, val_y, val_facet):
"""Updates scatter plot based on selected x and y axis."""
e = create_engine(connection_string)
query = f"SELECT * FROM {db_name}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
title = f"Distribution of student {val_y} based on {val_x}, separated by student {val_facet}"
return px.scatter(df, x=val_x, y=val_y, facet_col=val_facet, title=title)

@callback(
Output("graph-bar", "figure"),
[
Input("db-selection", "value"),
Input("bar-selection-x", "value"),
]
)
def update_bar(db_name, val_x):
e = create_engine(connection_string)
query = f"SELECT {val_x}, COUNT({val_x}) FROM {db_name} GROUP BY {val_x}"
with e.connect() as conn:
df = pd.read_sql(query, conn)
conn.close()
e.dispose()
title =f"Number of students based on {val_x}"
return px.bar(df, x=val_x, y="count", title=title)
11 changes: 11 additions & 0 deletions examples/dash/dash-connect-pgsql/assets/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.drop-wrapper {
display: flex;
}

.drop-list {
margin-left: 15px;
width: 150px;
white-space: pre;
text-overflow: ellipsis;
margin-top: 5px;
}
17 changes: 17 additions & 0 deletions examples/dash/dash-connect-pgsql/data/csv_mod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

csv_mods = {
"student-mat.csv": "student-mat-min.csv",
"student-por.csv": "student-por-min.csv"
}

for csv_cur, csv_conv in csv_mods.items():
df = pd.read_csv(csv_cur, sep=";")

# Select rows to keep
df = df[["school", "sex", "romantic_status", "age",
"mother_occupation", "father_occupation",
"health", "study_time", "absences", "final_grade"]]
df.to_csv(csv_conv)


6 changes: 6 additions & 0 deletions examples/dash/dash-connect-pgsql/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dash
pandas
gunicorn
sqlalchemy
psycopg2-binary
python-dotenv
30 changes: 30 additions & 0 deletions examples/dash/dash-connect-pgsql/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd
from sqlalchemy import URL, create_engine
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv(".env")
bryannho marked this conversation as resolved.
Show resolved Hide resolved

# Connect to PostgreSQL database
connection_string = URL.create(
'postgresql',
username=os.getenv("PGUSER"),
password=os.getenv("PGPASSWORD"),
host=os.getenv("PGHOST"),
database=os.getenv("PGDATABASE")
)

engine = create_engine(connection_string)

csv_files = {
"math": "data/student-mat-min.csv",
"portuguese": "data/student-por-min.csv"
}

for db_name, db_file in csv_files.items():
df = pd.read_csv(db_file, sep = ",") # Load data from csv file
df.to_sql(db_name, engine, if_exists='replace', index=False) # Upload data to database
print(f"Successfully uploaded database {db_name}")

engine.dispose() # Close connection