Skip to content

Commit

Permalink
Merge pull request #277 from EHDEN/dev
Browse files Browse the repository at this point in the history
Update Master with #274 #275 #276
  • Loading branch information
joaorafaelalmeida authored Mar 10, 2023
2 parents 1c848a4 + 9ec21fd commit dc525ff
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 5 deletions.
13 changes: 13 additions & 0 deletions dashboard_viewer/dashboard_viewer/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@
"materialized_queries_manager",
"tabsManager",
"uploader",
"corsheaders",
]

MIDDLEWARE = [
"corsheaders.middleware.CorsMiddleware",
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
Expand Down Expand Up @@ -119,6 +121,17 @@

WSGI_APPLICATION = "dashboard_viewer.wsgi.application"

# CORS ALLOWED ORIGINS - Separate each allowed domain by comma

if os.environ.get("CORS_ALLOWED_ORIGINS"):
CORS_ALLOWED_ORIGINS = [
e
for e in os.environ.get("CORS_ALLOWED_ORIGINS").split(",")
if e != None and e != ""
]

else:
CORS_ALLOWED_ORIGINS = []

# Database
DATABASES = {
Expand Down
1 change: 1 addition & 0 deletions dashboard_viewer/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ django-bootstrap4==3.0.1 # forms beautifier
django-celery-results==2.2.0 # reports results from workers
django-compressor==2.4.1 # combines and minifies linked and inline Javascript or CSS in a Django template into cacheable static files
django-constance==2.8.0 # to implement settings that can change at runtime
django-cors-headers==3.13.0 # used to receive cross domain requests and allow them
django-markdownify==0.9.0 # markdown to html
django-model-utils==4.2.0 # get specific type of subclasses after requesting buttons on tabsManager app
django-sass-processor==1.1 # automate scss devolopment
Expand Down
9 changes: 7 additions & 2 deletions dashboard_viewer/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#
# This file is autogenerated by pip-compile with python 3.9
# To update, run:
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile requirements.in
#
Expand Down Expand Up @@ -34,12 +34,15 @@ click-plugins==1.1.1
# via celery
click-repl==0.2.0
# via celery
colorama==0.4.6
# via click
django==3.2.13
# via
# -r requirements.in
# django-appconf
# django-bootstrap-datepicker-plus
# django-bootstrap4
# django-cors-headers
# django-markdownify
# django-model-utils
# django-redis
Expand All @@ -57,6 +60,8 @@ django-compressor==2.4.1
# via -r requirements.in
django-constance==2.8.0
# via -r requirements.in
django-cors-headers==3.13.0
# via -r requirements.in
django-markdownify==0.9.0
# via -r requirements.in
django-model-utils==4.2.0
Expand Down
108 changes: 107 additions & 1 deletion dashboard_viewer/uploader/file_handler/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
import numpy
import pandas
from django.conf import settings
from django.core.cache import caches
from django.db import connections, transaction, utils
from redis_rw_lock import RWLock

from uploader.models import DataSource, UploadHistory
from materialized_queries_manager.models import MaterializedQuery
from uploader.models import AchillesResults, DataSource, UploadHistory


class FileChecksException(Exception):
Expand Down Expand Up @@ -37,6 +41,9 @@ class MissingFieldValue(FileChecksException):
class EqualFileAlreadyUploaded(FileChecksException):
pass

class FileDataCorrupted(FileChecksException):
pass


def _generate_file_reader(uploaded_file):
"""
Expand Down Expand Up @@ -311,3 +318,102 @@ def check_for_duplicated_files(uploaded_file, data_source_id):
pass

#### Added For Checksum ##########################

def upload_data_to_tmp_table(data_source_id, file_metadata, pending_upload):

cache = caches["workers_locks"]

with RWLock(
cache.client.get_client(), "celery_worker_updating", RWLock.WRITE, expire=None
):

# Upload New Data to a "temporary" table
pending_upload.uploaded_file.seek(0)

reader = pandas.read_csv(
pending_upload.uploaded_file,
header=0,
dtype=file_metadata["types"],
skip_blank_lines=False,
index_col=False,
names=file_metadata["columns"],
chunksize=500,
)

all_mat_views = MaterializedQuery.objects.exclude(matviewname__contains="tmp")

mat_views = {}

for mat_view in all_mat_views:
tmp_mat_view_name = mat_view.to_dict()["matviewname"] + "_tmp"

# To run the mat views (with data) against the "temporary table"
# To run for all mat views, as the data source can become with draft equal to true

tmp_definition = mat_view.to_dict()["definition"].replace(
"achilles_results", "achilles_results_tmp"
)

mat_views[tmp_mat_view_name] = [
tmp_definition,
]

# since draft can change with time, we must run the queries for all types of draft, namely with draft = true and draft = false
if "draft = false" in tmp_definition:
mat_views[tmp_mat_view_name].append(
tmp_definition.replace("draft = false", "draft = true")
)

# Create "Temporary Upload" table, to store the data being uploaded
# Refresh of Materialized views does not allow the refresh in Temporary Tables

with transaction.atomic(), connections[
"achilles"
].cursor() as cursor, settings.ACHILLES_DB_SQLALCHEMY_ENGINE.connect() as pandas_connection, pandas_connection.begin():
try:
cursor.execute("DROP TABLE IF EXISTS achilles_results_tmp CASCADE")
cursor.execute(
"CREATE TABLE IF NOT EXISTS achilles_results_tmp AS SELECT * FROM "
+ AchillesResults._meta.db_table
+ " WHERE FALSE"
)
cursor.execute("CREATE SEQUENCE IF NOT EXISTS achilles_results_tmp_seq_id")
cursor.execute(
"ALTER TABLE achilles_results_tmp ALTER COLUMN id SET DEFAULT nextval('achilles_results_tmp_seq_id')"
)
cursor.execute(
"ALTER TABLE achilles_results_tmp ALTER COLUMN id SET NOT NULL"
)

# Upload data into "Temporary Table", similar structure to the actual upload process
for chunk in reader:
chunk = chunk[chunk["stratum_1"].isin(["0"]) == False]
chunk = chunk.assign(data_source_id=data_source_id)
chunk.to_sql(
"achilles_results_tmp",
pandas_connection,
if_exists="append",
index=False,
)
except Exception:
raise InvalidCSVFile("Error processing the file")

# Fetch Materialized Views
# The option here is to change the definitions of the original mat views and replace
# the achilles_results references to achilles_results_tmp

with transaction.atomic(), connections["achilles"].cursor() as cursor:
try:
for tmp_mat_view_name in mat_views: # noqa
for tmp_definition in mat_views[tmp_mat_view_name]:
cursor.execute(
f"CREATE MATERIALIZED VIEW {tmp_mat_view_name} AS {tmp_definition}"
)
cursor.execute(f"DROP MATERIALIZED VIEW {tmp_mat_view_name}")
except utils.DataError:
cursor.execute("DROP TABLE IF EXISTS achilles_results_tmp CASCADE")
raise FileDataCorrupted("Uploaded file is not valid")

# Delete Temprary Upload data and its dependent (Materialzied Views)
with transaction.atomic(), connections["achilles"].cursor() as cursor:
cursor.execute("DROP TABLE IF EXISTS achilles_results_tmp CASCADE")
9 changes: 9 additions & 0 deletions dashboard_viewer/uploader/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .file_handler.checks import (
check_for_duplicated_files,
extract_data_from_uploaded_file,
upload_data_to_tmp_table,
)
from .file_handler.updates import update_achilles_results_data
from .models import AchillesResults, PendingUpload, UploadHistory
Expand Down Expand Up @@ -52,6 +53,14 @@ def upload_results_file(pending_upload_id: int):
pending_upload.uploaded_file
)

logger.info(
"Validating if data is not corrupted [datasource %d, pending upload %d]",
data_source.id,
pending_upload_id,
)

upload_data_to_tmp_table(data_source.id, file_metadata, pending_upload)

cache = caches["workers_locks"]

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
</style>
</head>
<body>
<h4>No data available: This database was not yet mapped into the CDM</h4>
<h4>No data is available at this moment</h4>
</body>
</html>
4 changes: 3 additions & 1 deletion dashboard_viewer/uploader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,14 @@ def data_source_dashboard(request, data_source):

if data_source.uploadhistory_set.exists():
config = constance.config
return HttpResponseRedirect(
resp = str(
f"{config.SUPERSET_HOST}/superset/dashboard/{config.DATABASE_DASHBOARD_IDENTIFIER}/"
"?standalone=1"
f'&preselect_filters={{"{config.DATABASE_FILTER_ID}":{{"acronym":["{data_source.acronym}"]}}}}'
)

return JsonResponse({"link": resp})

# This way if there is at least one successfull upload it will redirect to the dashboards
# We could only check if the last upload for the data source was sucessfull
# -> This will not show data but it can bring a more useful message since the new data may not be processed and the graphics will contain old data
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ x-dashboard-environment: &dashboard-environment
REDIS_CELERY_WORKERS_LOCKS_DB: 5
SECRET_KEY: ${DASHBOARD_VIEWER_SECRET_KEY}
DASHBOARD_VIEWER_ENV: development
CORS_ALLOWED_ORIGINS: ${CORS_ALLOWED_ORIGINS}

version: "3.7"
services:
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ x-dashboard-environment: &dashboard-environment
DASHBOARD_VIEWER_ENV: ${INSTALLATION_ENV}
SINGLE_APPLICATION_MODE: ${SINGLE_APPLICATION_MODE}
MAIN_APPLICATION_HOST: ${MAIN_APPLICATION_HOST}
CORS_ALLOWED_ORIGINS: ${CORS_ALLOWED_ORIGINS}

version: "3.7"
services:
Expand Down

0 comments on commit dc525ff

Please sign in to comment.