Merge pull request #1057 from devinit/develop

Develop to master prior to new prod release
devinit · Mar 4, 2024 · e6a68d9 · e6a68d9
2 parents 181e57c + 206b4c5
commit e6a68d9
Show file tree

Hide file tree

Showing 45 changed files with 5,214 additions and 2,588 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -21,13 +21,13 @@ jobs:
         node-version: [16.x]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Use Node.js ${{ matrix.node-version }}
-      uses: actions/setup-node@v3
+      uses: actions/setup-node@v4
       with:
         node-version: ${{ matrix.node-version }}
 
-    - uses: actions/cache@v3
+    - uses: actions/cache@v4
       with:
         path: ~/.npm
         key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -52,14 +52,14 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.7
-      uses: actions/setup-python@v4
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v5
       with:
-        python-version: '3.7'
+        python-version: '3.8'
         architecture: 'x64'
 
-    - uses: actions/cache@v3
+    - uses: actions/cache@v4
       with:
         path: ~/.cache/pip
         key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
@@ -95,7 +95,7 @@ jobs:
       pull-requests: write
       contents: write
     steps:
-      - uses: fastify/github-action-merge-dependabot@v3.6.0
+      - uses: fastify/github-action-merge-dependabot@v3.10.1
         with:
           github-token: ${{secrets.PA_TOKEN}}
 
@@ -109,7 +109,7 @@ jobs:
       ', github.ref) || startsWith(github.ref, 'refs/tags/v')
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set env for develop branch
       if: endsWith(github.ref, '/develop') || endsWith(github.ref, '-beta')
       run: |
@@ -133,31 +133,29 @@ jobs:
 
     - name: copy deploy scripts to dev server
       if: endsWith(github.ref, '/develop') || endsWith(github.ref, '-beta')
-      uses: appleboy/scp-action@master
-      env:
-        HOST: ${{ env.HOST }}
-        USERNAME: ${{ env.USERNAME }}
-        PORT: ${{ secrets.PORT }}
-        KEY: ${{ secrets.KEY }}
+      uses: appleboy/[email protected]
       with:
+        host: ${{ env.HOST }}
+        username: ${{ env.USERNAME }}
+        port: ${{ secrets.PORT }}
+        key: ${{ secrets.KEY }}
         source: "deploy_script.sh,deploy.sh"
         target: "."
 
     - name: copy deploy script to production server
       if: startsWith(github.ref, 'refs/tags/v') && !endsWith(github.ref, '-beta')
-      uses: appleboy/scp-action@master
-      env:
-        HOST: ${{ env.HOST }}
-        USERNAME: ${{ env.USERNAME }}
-        PORT: ${{ secrets.PORT }}
-        KEY: ${{ secrets.KEY }}
+      uses: appleboy/[email protected]
       with:
+        host: ${{ env.HOST }}
+        username: ${{ env.USERNAME }}
+        port: ${{ secrets.PORT }}
+        key: ${{ secrets.KEY }}
         source: "deploy.sh"
         target: "."
 
     - name: ssh into remote dev server for develop branch
       if: endsWith(github.ref, '/develop')
-      uses: appleboy/ssh-action@master
+      uses: appleboy/ssh-action@v1.0.3
       with:
         host: ${{ env.HOST }}
         username: ${{ env.USERNAME }}
@@ -170,7 +168,7 @@ jobs:
 
     - name: ssh into remote dev server for beta tag
       if: endsWith(github.ref, '-beta')
-      uses: appleboy/ssh-action@master
+      uses: appleboy/ssh-action@v1.0.3
       with:
         host: ${{ env.HOST }}
         username: ${{ env.USERNAME }}
@@ -186,7 +184,7 @@ jobs:
 
     - name: ssh into remote production server
       if: startsWith(github.ref, 'refs/tags/v') && !endsWith(github.ref, '-beta')
-      uses: appleboy/ssh-action@master
+      uses: appleboy/ssh-action@v1.0.3
       with:
         host: ${{ env.HOST }}
         username: ${{ env.USERNAME }}
@@ -201,7 +199,7 @@ jobs:
           ./deploy.sh $version
 
     - name: Build JS
-      uses: appleboy/ssh-action@master
+      uses: appleboy/ssh-action@v1.0.3
       with:
         host: ${{ env.HOST }}
         username: ${{ env.USERNAME }}
@@ -228,13 +226,13 @@ jobs:
         node-version: [16.x]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Use Node.js ${{ matrix.node-version }}
-      uses: actions/setup-node@v3
+      uses: actions/setup-node@v4
       with:
         node-version: ${{ matrix.node-version }}
 
-    - uses: actions/cache@v3
+    - uses: actions/cache@v4
       with:
         path: ~/.npm
         key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}

diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # start with a base image
-FROM python:3.7
+FROM python:3.8
 LABEL maintainer="akmiller01 <Alex Miller, [email protected]>"
 
 RUN mkdir /src

diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ The new and improved DDW Analyst UI interface
 
 ### Docker Deployment
 
-1. Make sure you're starting with a clean DB volume, so Docker knows to create the new User 
+1. Make sure you're starting with a clean DB volume, so Docker knows to create the new User
 
         docker-compose down` `docker volume rm metadata2
 
@@ -162,7 +162,11 @@ To create a test development DB, for local development (e.g. virtualenv steps be
 
         npm install
 
-12. Start frontend dev environment which watches and collects static files:
+12. Dynamic API base URL setting
+
+    Add an `API_BASE_URL` in the `.env` file and assign it either a localhost, staging, or production url. If not set, this defaults to the url of the current environment in which the application is running.
+
+13. Start frontend dev environment which watches and collects static files:
 
         npm start
 

diff --git a/core/query.py b/core/query.py
@@ -3,7 +3,7 @@
 from django.db import transaction
 from core.pypika_utils import QueryBuilder
 from data.db_manager import fetch_data, analyse_query, run_query
-from core.models import FrozenData, OperationStep, Source, Operation, FrozenData
+from core.models import FrozenData, OperationStep, Source, Operation, FrozenData, SourceColumnMap
 from pypika import Table, Query
 from pypika import functions as pypika_fn
 from core.pypika_fts_utils import TableQueryBuilder
@@ -50,11 +50,15 @@ def delete_archive(id):
         frozen_data = FrozenData.objects.get(pk=id)
         table_name = frozen_data.frozen_db_table
         # Delete from sources table and operation steps
-        frozen_source = Source.objects.filter(repo='archives', active_mirror_name=table_name)
+        frozen_source = Source.objects.filter(schema='archives', active_mirror_name=table_name)
         operation_step_qs = OperationStep.objects.filter(source_id__in=frozen_source)
+        frozen_column_maps = SourceColumnMap.objects.filter(source_id__in=frozen_source)
         operation = Operation.objects.filter(pk__in=operation_step_qs.values_list('operation_id', flat=True))
         operation.delete()
         operation_step_qs.delete()
+        #Delete frozen column maps before deleting frozen source
+        for column_map in frozen_column_maps:
+            column_map.delete()
         frozen_source.delete()
         frozen_data.delete()
         query_builder = TableQueryBuilder(table_name, "archives")

diff --git a/data/management/commands/update_csv_files.py b/data/management/commands/update_csv_files.py
@@ -43,6 +43,7 @@ def handle(self, *args, **options):
 
                     if options['validate'] and COLUMNS_META_FILE_NAME in file_content.path:
                         self.stdout.write("Checking columns...", ending='\n')
+                        self.stdout.flush()
                         self.check_column_mapping(options["path"])
         except Exception as e:
             raise CommandError(e)
@@ -72,20 +73,22 @@ def update_affected_columns(self, source, column):
         if steps.count():
             self.stdout.write("{} steps found using the obsolete {} column in the {} table".format(
                 steps.count(), column.name, source.active_mirror_name), ending='\n')
+            self.stdout.flush()
         for step in steps:
             operation = step.operation
             columns = operation.logs.get('columns', []) if operation.logs else []
             if not column.name in columns:
                 columns.append(column.name)
-            steps = operation.logs.get('steps', []) if operation.logs else []
-            if not step.step_id in steps:
-                steps.append(step.step_id)
+            log_steps = operation.logs.get('steps', []) if operation.logs else []
+            if not step.step_id in log_steps:
+                log_steps.append(step.step_id)
             operation.logs = {
                 'type': 'warning',
                 'message': 'Obsolete Columns',
                 'columns': columns,
-                'steps': steps
+                'steps': log_steps
             }
+            operation.count_rows = False
             operation.save()
 
             step_columns = step.logs.get('columns', []) if step.logs else []

diff --git a/data_updates/Python/download_oecd.py b/data_updates/Python/download_oecd.py
@@ -112,7 +112,10 @@ def download(scrape_path, download_path, output_folder_prefix):
     to_get.sort()
 
     # Download the files in order.
+    bad_file_names = [" ", "README "]
     for file_to_get in to_get:
+        if file_to_get in bad_file_names:
+            continue
 
         # Replace the "_" in the file ids with "-".
         uri_suffix = files_to_download[file_to_get].replace("_", "-")

diff --git a/data_updates/Python/iati_refresh.py b/data_updates/Python/iati_refresh.py
@@ -2,10 +2,11 @@
 import json
 import progressbar
 import sqlalchemy
-from sqlalchemy import create_engine, MetaData, Table, Column, String
+from sqlalchemy import create_engine, MetaData, Table, Column, String, insert
 from sqlalchemy.types import Boolean
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
+import time
 
 
 DATA_SCHEMA = "repo"
@@ -35,12 +36,14 @@ def requests_retry_session(
 def fetch_datasets():
     results = []
     api_url = "https://iatiregistry.org/api/3/action/package_search?rows=1000"
-    response = requests_retry_session().get(url=api_url, timeout=30).content
+    response = requests_retry_session().get(url=api_url, timeout=300).content
     json_response = json.loads(response)
     full_count = json_response["result"]["count"]
     current_count = len(json_response["result"]["results"])
     results += [{"id": resource["package_id"], "hash": resource["hash"], "url": resource["url"]} for result in json_response["result"]["results"] for resource in result["resources"]]
     while current_count < full_count:
+        time.sleep(1)
+        print("{}/{}".format(current_count, full_count))
         next_api_url = "{}&start={}".format(api_url, current_count)
         response = requests_retry_session().get(url=next_api_url, timeout=30).content
         json_response = json.loads(response)
@@ -52,15 +55,15 @@ def fetch_datasets():
 def main():
     engine = create_engine('postgresql://analyst_ui_user:analyst_ui_pass@db:5432/analyst_ui')
     # engine = create_engine('postgresql://postgres@:5432/analyst_ui')
-    conn = engine.connect()
-    meta = MetaData(engine)
-    meta.reflect()
+
+    meta = MetaData()
+    meta.reflect(engine)
 
     all_datasets = fetch_datasets()
     new_count = 0
 
     try:
-        datasets = Table(DATA_TABLENAME, meta, schema=DATA_SCHEMA, autoload=True)
+        datasets = Table(DATA_TABLENAME, meta, schema=DATA_SCHEMA, autoload_with=engine)
     except sqlalchemy.exc.NoSuchTableError:  # First run
         datasets = Table(
             DATA_TABLENAME,
@@ -76,13 +79,16 @@ def main():
         )
         meta.create_all(engine)
         new_count += len(all_datasets)
-        conn.execute(datasets.insert(), all_datasets)
+        with engine.begin() as conn:
+            conn.execute(insert(datasets).values(all_datasets))
 
     all_dataset_ids = [dataset["id"] for dataset in all_datasets]
-    cached_datasets = conn.execute(datasets.select()).fetchall()
-    cached_dataset_ids = [dataset["id"] for dataset in cached_datasets]
+    with engine.begin() as conn:
+        cached_datasets = conn.execute(datasets.select()).fetchall()
+    cached_dataset_ids = [dataset.id for dataset in cached_datasets]
     stale_dataset_ids = list(set(cached_dataset_ids) - set(all_dataset_ids))
-    conn.execute(datasets.update().where(datasets.c.id.in_(stale_dataset_ids)).values(new=False, modified=False, stale=True, error=False))
+    with engine.begin() as conn:
+        conn.execute(datasets.update().where(datasets.c.id.in_(stale_dataset_ids)).values(new=False, modified=False, stale=True, error=False))
 
     stale_count = len(stale_dataset_ids)
     modified_count = 0
@@ -93,18 +99,21 @@ def main():
             dataset["modified"] = False
             dataset["stale"] = False
             dataset["error"] = False
-            conn.execute(datasets.insert(dataset))
+            with engine.begin() as conn:
+                conn.execute(insert(datasets).values(dataset))
             new_count += 1
         except sqlalchemy.exc.IntegrityError:  # Dataset ID already exists
-            cached_dataset = conn.execute(datasets.select().where(datasets.c.id == dataset["id"])).fetchone()
-            if cached_dataset["hash"] == dataset["hash"]:  # If the hashes match, carry on
+            with engine.begin() as conn:
+                cached_dataset = conn.execute(datasets.select().where(datasets.c.id == dataset["id"])).fetchone()
+            if cached_dataset.hash == dataset["hash"]:  # If the hashes match, carry on
                 continue
             else:  # Otherwise, mark it modified and update the metadata
                 dataset["new"] = False
                 dataset["modified"] = True
                 dataset["stale"] = False  # If for some reason, we pick up a previously stale dataset
                 dataset["error"] = False
-                conn.execute(datasets.update().where(datasets.c.id == dataset["id"]).values(dataset))
+                with engine.begin() as conn:
+                    conn.execute(datasets.update().where(datasets.c.id == dataset["id"]).values(dataset))
                 modified_count += 1
 
     engine.dispose()