Skip to content

Commit

Permalink
Merge pull request #1057 from devinit/develop
Browse files Browse the repository at this point in the history
Develop to master prior to new prod release
  • Loading branch information
akmiller01 committed Mar 4, 2024
2 parents 181e57c + 206b4c5 commit e6a68d9
Show file tree
Hide file tree
Showing 45 changed files with 5,214 additions and 2,588 deletions.
56 changes: 27 additions & 29 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ jobs:
node-version: [16.x]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}

- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
Expand All @@ -52,14 +52,14 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v4
- uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v5
with:
python-version: '3.7'
python-version: '3.8'
architecture: 'x64'

- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
Expand Down Expand Up @@ -95,7 +95,7 @@ jobs:
pull-requests: write
contents: write
steps:
- uses: fastify/github-action-merge-dependabot@v3.6.0
- uses: fastify/github-action-merge-dependabot@v3.10.1
with:
github-token: ${{secrets.PA_TOKEN}}

Expand All @@ -109,7 +109,7 @@ jobs:
', github.ref) || startsWith(github.ref, 'refs/tags/v')

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set env for develop branch
if: endsWith(github.ref, '/develop') || endsWith(github.ref, '-beta')
run: |
Expand All @@ -133,31 +133,29 @@ jobs:

- name: copy deploy scripts to dev server
if: endsWith(github.ref, '/develop') || endsWith(github.ref, '-beta')
uses: appleboy/scp-action@master
env:
HOST: ${{ env.HOST }}
USERNAME: ${{ env.USERNAME }}
PORT: ${{ secrets.PORT }}
KEY: ${{ secrets.KEY }}
uses: appleboy/[email protected]
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
port: ${{ secrets.PORT }}
key: ${{ secrets.KEY }}
source: "deploy_script.sh,deploy.sh"
target: "."

- name: copy deploy script to production server
if: startsWith(github.ref, 'refs/tags/v') && !endsWith(github.ref, '-beta')
uses: appleboy/scp-action@master
env:
HOST: ${{ env.HOST }}
USERNAME: ${{ env.USERNAME }}
PORT: ${{ secrets.PORT }}
KEY: ${{ secrets.KEY }}
uses: appleboy/[email protected]
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
port: ${{ secrets.PORT }}
key: ${{ secrets.KEY }}
source: "deploy.sh"
target: "."

- name: ssh into remote dev server for develop branch
if: endsWith(github.ref, '/develop')
uses: appleboy/ssh-action@master
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
Expand All @@ -170,7 +168,7 @@ jobs:
- name: ssh into remote dev server for beta tag
if: endsWith(github.ref, '-beta')
uses: appleboy/ssh-action@master
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
Expand All @@ -186,7 +184,7 @@ jobs:
- name: ssh into remote production server
if: startsWith(github.ref, 'refs/tags/v') && !endsWith(github.ref, '-beta')
uses: appleboy/ssh-action@master
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
Expand All @@ -201,7 +199,7 @@ jobs:
./deploy.sh $version
- name: Build JS
uses: appleboy/ssh-action@master
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ env.HOST }}
username: ${{ env.USERNAME }}
Expand All @@ -228,13 +226,13 @@ jobs:
node-version: [16.x]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}

- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# start with a base image
FROM python:3.7
FROM python:3.8
LABEL maintainer="akmiller01 <Alex Miller, [email protected]>"

RUN mkdir /src
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The new and improved DDW Analyst UI interface

### Docker Deployment

1. Make sure you're starting with a clean DB volume, so Docker knows to create the new User
1. Make sure you're starting with a clean DB volume, so Docker knows to create the new User

docker-compose down` `docker volume rm metadata2

Expand Down Expand Up @@ -162,7 +162,11 @@ To create a test development DB, for local development (e.g. virtualenv steps be

npm install

12. Start frontend dev environment which watches and collects static files:
12. Dynamic API base URL setting

Add an `API_BASE_URL` in the `.env` file and assign it either a localhost, staging, or production url. If not set, this defaults to the url of the current environment in which the application is running.

13. Start frontend dev environment which watches and collects static files:

npm start

Expand Down
8 changes: 6 additions & 2 deletions core/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.db import transaction
from core.pypika_utils import QueryBuilder
from data.db_manager import fetch_data, analyse_query, run_query
from core.models import FrozenData, OperationStep, Source, Operation, FrozenData
from core.models import FrozenData, OperationStep, Source, Operation, FrozenData, SourceColumnMap
from pypika import Table, Query
from pypika import functions as pypika_fn
from core.pypika_fts_utils import TableQueryBuilder
Expand Down Expand Up @@ -50,11 +50,15 @@ def delete_archive(id):
frozen_data = FrozenData.objects.get(pk=id)
table_name = frozen_data.frozen_db_table
# Delete from sources table and operation steps
frozen_source = Source.objects.filter(repo='archives', active_mirror_name=table_name)
frozen_source = Source.objects.filter(schema='archives', active_mirror_name=table_name)
operation_step_qs = OperationStep.objects.filter(source_id__in=frozen_source)
frozen_column_maps = SourceColumnMap.objects.filter(source_id__in=frozen_source)
operation = Operation.objects.filter(pk__in=operation_step_qs.values_list('operation_id', flat=True))
operation.delete()
operation_step_qs.delete()
#Delete frozen column maps before deleting frozen source
for column_map in frozen_column_maps:
column_map.delete()
frozen_source.delete()
frozen_data.delete()
query_builder = TableQueryBuilder(table_name, "archives")
Expand Down
11 changes: 7 additions & 4 deletions data/management/commands/update_csv_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def handle(self, *args, **options):

if options['validate'] and COLUMNS_META_FILE_NAME in file_content.path:
self.stdout.write("Checking columns...", ending='\n')
self.stdout.flush()
self.check_column_mapping(options["path"])
except Exception as e:
raise CommandError(e)
Expand Down Expand Up @@ -72,20 +73,22 @@ def update_affected_columns(self, source, column):
if steps.count():
self.stdout.write("{} steps found using the obsolete {} column in the {} table".format(
steps.count(), column.name, source.active_mirror_name), ending='\n')
self.stdout.flush()
for step in steps:
operation = step.operation
columns = operation.logs.get('columns', []) if operation.logs else []
if not column.name in columns:
columns.append(column.name)
steps = operation.logs.get('steps', []) if operation.logs else []
if not step.step_id in steps:
steps.append(step.step_id)
log_steps = operation.logs.get('steps', []) if operation.logs else []
if not step.step_id in log_steps:
log_steps.append(step.step_id)
operation.logs = {
'type': 'warning',
'message': 'Obsolete Columns',
'columns': columns,
'steps': steps
'steps': log_steps
}
operation.count_rows = False
operation.save()

step_columns = step.logs.get('columns', []) if step.logs else []
Expand Down
3 changes: 3 additions & 0 deletions data_updates/Python/download_oecd.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ def download(scrape_path, download_path, output_folder_prefix):
to_get.sort()

# Download the files in order.
bad_file_names = [" ", "README "]
for file_to_get in to_get:
if file_to_get in bad_file_names:
continue

# Replace the "_" in the file ids with "-".
uri_suffix = files_to_download[file_to_get].replace("_", "-")
Expand Down
37 changes: 23 additions & 14 deletions data_updates/Python/iati_refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import json
import progressbar
import sqlalchemy
from sqlalchemy import create_engine, MetaData, Table, Column, String
from sqlalchemy import create_engine, MetaData, Table, Column, String, insert
from sqlalchemy.types import Boolean
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import time


DATA_SCHEMA = "repo"
Expand Down Expand Up @@ -35,12 +36,14 @@ def requests_retry_session(
def fetch_datasets():
results = []
api_url = "https://iatiregistry.org/api/3/action/package_search?rows=1000"
response = requests_retry_session().get(url=api_url, timeout=30).content
response = requests_retry_session().get(url=api_url, timeout=300).content
json_response = json.loads(response)
full_count = json_response["result"]["count"]
current_count = len(json_response["result"]["results"])
results += [{"id": resource["package_id"], "hash": resource["hash"], "url": resource["url"]} for result in json_response["result"]["results"] for resource in result["resources"]]
while current_count < full_count:
time.sleep(1)
print("{}/{}".format(current_count, full_count))
next_api_url = "{}&start={}".format(api_url, current_count)
response = requests_retry_session().get(url=next_api_url, timeout=30).content
json_response = json.loads(response)
Expand All @@ -52,15 +55,15 @@ def fetch_datasets():
def main():
engine = create_engine('postgresql://analyst_ui_user:analyst_ui_pass@db:5432/analyst_ui')
# engine = create_engine('postgresql://postgres@:5432/analyst_ui')
conn = engine.connect()
meta = MetaData(engine)
meta.reflect()

meta = MetaData()
meta.reflect(engine)

all_datasets = fetch_datasets()
new_count = 0

try:
datasets = Table(DATA_TABLENAME, meta, schema=DATA_SCHEMA, autoload=True)
datasets = Table(DATA_TABLENAME, meta, schema=DATA_SCHEMA, autoload_with=engine)
except sqlalchemy.exc.NoSuchTableError: # First run
datasets = Table(
DATA_TABLENAME,
Expand All @@ -76,13 +79,16 @@ def main():
)
meta.create_all(engine)
new_count += len(all_datasets)
conn.execute(datasets.insert(), all_datasets)
with engine.begin() as conn:
conn.execute(insert(datasets).values(all_datasets))

all_dataset_ids = [dataset["id"] for dataset in all_datasets]
cached_datasets = conn.execute(datasets.select()).fetchall()
cached_dataset_ids = [dataset["id"] for dataset in cached_datasets]
with engine.begin() as conn:
cached_datasets = conn.execute(datasets.select()).fetchall()
cached_dataset_ids = [dataset.id for dataset in cached_datasets]
stale_dataset_ids = list(set(cached_dataset_ids) - set(all_dataset_ids))
conn.execute(datasets.update().where(datasets.c.id.in_(stale_dataset_ids)).values(new=False, modified=False, stale=True, error=False))
with engine.begin() as conn:
conn.execute(datasets.update().where(datasets.c.id.in_(stale_dataset_ids)).values(new=False, modified=False, stale=True, error=False))

stale_count = len(stale_dataset_ids)
modified_count = 0
Expand All @@ -93,18 +99,21 @@ def main():
dataset["modified"] = False
dataset["stale"] = False
dataset["error"] = False
conn.execute(datasets.insert(dataset))
with engine.begin() as conn:
conn.execute(insert(datasets).values(dataset))
new_count += 1
except sqlalchemy.exc.IntegrityError: # Dataset ID already exists
cached_dataset = conn.execute(datasets.select().where(datasets.c.id == dataset["id"])).fetchone()
if cached_dataset["hash"] == dataset["hash"]: # If the hashes match, carry on
with engine.begin() as conn:
cached_dataset = conn.execute(datasets.select().where(datasets.c.id == dataset["id"])).fetchone()
if cached_dataset.hash == dataset["hash"]: # If the hashes match, carry on
continue
else: # Otherwise, mark it modified and update the metadata
dataset["new"] = False
dataset["modified"] = True
dataset["stale"] = False # If for some reason, we pick up a previously stale dataset
dataset["error"] = False
conn.execute(datasets.update().where(datasets.c.id == dataset["id"]).values(dataset))
with engine.begin() as conn:
conn.execute(datasets.update().where(datasets.c.id == dataset["id"]).values(dataset))
modified_count += 1

engine.dispose()
Expand Down
Loading

0 comments on commit e6a68d9

Please sign in to comment.