Demo app for Dash to connect to PostgreSQL (#259)

* Demo dash app pgsql * Close connections properly * Cleaned up data Only keep readable data and added more detailed description. * Close connections properly * Removed secrets Added more detailed steps on how to upload dataset and configure secret variables in README * More info in README Additionally: removed Dockerfile and removed extra package from requirements.txt * Reworded README * Updated local testing in README * Additional info on uploading data * minor language adjustment * Removed unnecessary code Modified local testing instruction in README * Modified instruction for gunicorn * Added instruction on obtaining dataset * Removed print statements
ploomber · Jul 29, 2024 · 59a76f0 · 59a76f0
1 parent 46ec9af
commit 59a76f0
Show file tree

Hide file tree

Showing 7 changed files with 213 additions and 0 deletions.
diff --git a/examples/dash/dash-connect-pgsql/README.md b/examples/dash/dash-connect-pgsql/README.md
@@ -0,0 +1,38 @@
+# Dash App Connected to PostgreSQL Database
+
+Interactive Dash Application, connected to PostgreSQL database.
+
+![](app.png)
+
+## Set up local testing environment
+To use the app, store the below information locally into your `.env` for `upload.py`. These variables can be found on the `Parameters only` section under `connection details` from your [Neon](https://console.neon.tech/) PostgreSQL dashboard.
+```
+PGHOST='YOUR_HOST'
+PGDATABASE='test'
+PGUSER='test_owner'
+PGPASSWORD='your_password'
+```
+
+Run `python -m pip install -r requirements.txt` to install all necessary packages.
+
+## Upload dataset to your Postgres server
+You can download the dataset I'm using [here](https://archive.ics.uci.edu/dataset/320/student+performance) and store them in the `data` folder. Next, `cd data` and run `python csv_mod.py` to obtain `student-mat-min.csv` and `student-por-min.csv`, the extracted dataset that we will be uploading. Run `python upload.py` locally to upload the dataset to your PostgreSQL.
+
+## Local testing
+Add the below lines to your `app.py`
+```python
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv(".env")
+```
+
+You should remove them once you are done with local testing to avoid error.
+
+Run `gunicorn app:server run --bind 0.0.0.0:80`. You should be able to access the app at `0.0.0.0:80`.
+
+## Upload to Ploomber Cloud
+Compress and upload the below files for deployment. Make sure to specify the above environment variables in your [secrets](https://docs.cloud.ploomber.io/en/latest/user-guide/secrets.html). For more details, please refer to our [Dash deployment guide](https://docs.cloud.ploomber.io/en/latest/apps/dash.html)
+- app.py
+- assets/style.css
+- requirements.txt
diff --git a/examples/dash/dash-connect-pgsql/app.png b/examples/dash/dash-connect-pgsql/app.png
diff --git a/examples/dash/dash-connect-pgsql/app.py b/examples/dash/dash-connect-pgsql/app.py
@@ -0,0 +1,111 @@
+from dash import Dash, html, dcc, callback, Output, Input
+import plotly.express as px
+import pandas as pd
+from sqlalchemy import URL, create_engine
+import os
+
+# Connect to PostgreSQL database
+connection_string = URL.create(
+  'postgresql',
+  username=os.getenv("PGUSER"),
+  password=os.getenv("PGPASSWORD"),
+  host=os.getenv("PGHOST"),
+  database=os.getenv("PGDATABASE")
+)
+
+DB_LIST = ['math', 'portuguese']
+
+app = Dash(__name__)
+server = app.server
+
+app.title = "Student Data - Ploomber Cloud Dash Application with PostgreSQL"
+
+app.layout = html.Div(
+    [
+        html.H1(children="Dash Application with PostgreSQL Demo", style={"textAlign": "center"}),
+        html.Div(
+            [
+                html.Div([
+                    html.P(children="Database selection:"),
+                    dcc.Dropdown(
+                        DB_LIST, "math", id="db-selection", 
+                        className="drop-list"
+                    )
+                ], className="drop-wrapper"),
+                html.Div([
+                    html.P(children="Select x, y axis and facet for scatter plot:"),
+                    dcc.Dropdown(id="scatter-selection-x", className="drop-list"),
+                    dcc.Dropdown(id="scatter-selection-y", className="drop-list"),
+                    dcc.Dropdown(id="scatter-selection-facet", className="drop-list")
+                ], className="drop-wrapper"),
+                html.Div([
+                    html.P(children="Select x axis for bar chart:"),
+                    dcc.Dropdown(id="bar-selection-x", className="drop-list")
+                ], className="drop-wrapper")
+            ]
+        ),
+        dcc.Graph(id="graph-content"),
+        dcc.Graph(id="graph-bar")
+    ]
+)
+
+@callback(
+    [
+        Output("scatter-selection-x", "options"), 
+        Output("scatter-selection-y", "options"),
+        Output("scatter-selection-facet", "options"),
+        Output("bar-selection-x", "options"),
+        Output("scatter-selection-x", "value"),
+        Output("scatter-selection-y", "value"),
+        Output("scatter-selection-facet", "value"),
+        Output("bar-selection-x", "value"),
+    ], 
+    Input("db-selection", "value")
+)
+def update_dropdown(value):
+    """Updates dropdown list based on selected db."""
+    e = create_engine(connection_string)
+    query = f"SELECT * FROM {value}"
+    with e.connect() as conn:
+        df = pd.read_sql(query, conn)
+        conn.close()
+    e.dispose()
+    cols = list(df.columns)
+    return cols[4:9], cols[9:], cols[1:4], cols[1:], cols[4], cols[9], cols[1], cols[1]
+
+@callback(
+    Output("graph-content", "figure"), 
+    [
+        Input("db-selection", "value"),
+        Input("scatter-selection-x", "value"),
+        Input("scatter-selection-y", "value"),
+        Input("scatter-selection-facet", "value"),
+    ]
+)
+def update_graph(db_name, val_x, val_y, val_facet):
+    """Updates scatter plot based on selected x and y axis."""
+    e = create_engine(connection_string)
+    query = f"SELECT * FROM {db_name}"
+    with e.connect() as conn:
+        df = pd.read_sql(query, conn)
+        conn.close()
+    e.dispose()
+    title = f"Distribution of student {val_y} based on {val_x}, separated by student {val_facet}"
+    return px.scatter(df, x=val_x, y=val_y, facet_col=val_facet, title=title)
+
+@callback(
+    Output("graph-bar", "figure"), 
+    [
+        Input("db-selection", "value"),
+        Input("bar-selection-x", "value"),
+    ]
+)
+def update_bar(db_name, val_x):
+    e = create_engine(connection_string)
+    query = f"SELECT {val_x}, COUNT({val_x}) FROM {db_name} GROUP BY {val_x}"
+    with e.connect() as conn:
+        df = pd.read_sql(query, conn)
+        conn.close()
+    e.dispose()
+    title =f"Number of students based on {val_x}"
+    return px.bar(df, x=val_x, y="count", title=title)
diff --git a/examples/dash/dash-connect-pgsql/assets/style.css b/examples/dash/dash-connect-pgsql/assets/style.css
@@ -0,0 +1,11 @@
+.drop-wrapper {
+    display: flex;
+}
+
+.drop-list {
+    margin-left: 15px;
+    width: 150px;
+    white-space: pre;
+    text-overflow: ellipsis;
+    margin-top: 5px;
+}
diff --git a/examples/dash/dash-connect-pgsql/data/csv_mod.py b/examples/dash/dash-connect-pgsql/data/csv_mod.py
@@ -0,0 +1,17 @@
+import pandas as pd
+
+csv_mods = {
+    "student-mat.csv": "student-mat-min.csv",
+    "student-por.csv": "student-por-min.csv"
+}
+
+for csv_cur, csv_conv in csv_mods.items():
+    df = pd.read_csv(csv_cur, sep=";")
+
+    # Select rows to keep
+    df = df[["school", "sex", "romantic_status", "age", 
+        "mother_occupation", "father_occupation", 
+            "health", "study_time", "absences", "final_grade"]]
+    df.to_csv(csv_conv)
+
+
diff --git a/examples/dash/dash-connect-pgsql/requirements.txt b/examples/dash/dash-connect-pgsql/requirements.txt
@@ -0,0 +1,6 @@
+dash
+pandas
+gunicorn
+sqlalchemy
+psycopg2-binary
+python-dotenv
diff --git a/examples/dash/dash-connect-pgsql/upload.py b/examples/dash/dash-connect-pgsql/upload.py
@@ -0,0 +1,30 @@
+import pandas as pd
+from sqlalchemy import URL, create_engine
+import os
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv(".env")
+
+# Connect to PostgreSQL database
+connection_string = URL.create(
+  'postgresql',
+  username=os.getenv("PGUSER"),
+  password=os.getenv("PGPASSWORD"),
+  host=os.getenv("PGHOST"),
+  database=os.getenv("PGDATABASE")
+)
+
+engine = create_engine(connection_string)
+
+csv_files = { 
+    "math": "data/student-mat-min.csv",
+    "portuguese": "data/student-por-min.csv"
+}
+
+for db_name, db_file in csv_files.items():
+    df = pd.read_csv(db_file, sep = ",") # Load data from csv file
+    df.to_sql(db_name, engine, if_exists='replace', index=False) # Upload data to database
+    print(f"Successfully uploaded database {db_name}")
+
+engine.dispose() # Close connection