-
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #300 from coderxio/jrlegrand/excipients
New mart: Products to inactive ingredients (excipients)
- Loading branch information
Showing
11 changed files
with
261 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,5 +17,5 @@ plugins | |
# Desktop Services Store | ||
.DS_Store | ||
|
||
#GCP | ||
gcp.json | ||
# GCP | ||
gcp.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import pendulum | ||
|
||
from airflow_operator import create_dag | ||
from airflow.providers.postgres.operators.postgres import PostgresOperator | ||
from airflow.decorators import task | ||
|
||
from common_dag_tasks import extract, transform, generate_sql_list, get_ds_folder | ||
from sagerx import read_sql_file | ||
|
||
dag_id = "fda_unii" | ||
|
||
dag = create_dag( | ||
dag_id=dag_id, | ||
schedule="0 4 * * *", | ||
start_date=pendulum.yesterday(), | ||
catchup=False, | ||
concurrency=2, | ||
) | ||
|
||
with dag: | ||
url= "https://precision.fda.gov/uniisearch/archive/latest/UNII_Data.zip" | ||
ds_folder = get_ds_folder(dag_id) | ||
|
||
extract_task = extract(dag_id,url) | ||
transform_task = transform(dag_id) | ||
|
||
@task | ||
def get_file_name(data_path) -> str: | ||
import re | ||
import os | ||
import logging | ||
|
||
logging.info(f'Data path: {data_path}') | ||
|
||
file_name = '' | ||
# note: extract_task contains the path to /opt/data/fda_unii/UNII_Data/ | ||
# example file_name: UNII_Records_22Jun2024.txt | ||
for subfile in os.listdir(data_path): | ||
if re.match("UNII_Records", subfile): | ||
file_name = subfile | ||
|
||
if file_name == '': | ||
logging.error('Could not find file_name.') | ||
|
||
return file_name | ||
|
||
file_name_task = get_file_name(extract_task) | ||
|
||
sql_tasks = [] | ||
for sql in generate_sql_list(dag_id): | ||
sql_path = ds_folder / sql | ||
task_id = sql[:-4] #remove .sql | ||
sql_task = PostgresOperator( | ||
task_id=task_id, | ||
postgres_conn_id="postgres_default", | ||
sql=read_sql_file(sql_path).format( | ||
data_path=extract_task, | ||
file_name=file_name_task | ||
), | ||
dag=dag | ||
) | ||
sql_tasks.append(sql_task) | ||
|
||
file_name_task >> sql_tasks >> transform_task |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* sagerx_lake.fda_unii */ | ||
DROP TABLE IF EXISTS sagerx_lake.fda_unii CASCADE; | ||
|
||
CREATE TABLE sagerx_lake.fda_unii ( | ||
unii TEXT NOT NULL, | ||
display_name TEXT, | ||
rn TEXT, | ||
ec TEXT, | ||
ncit TEXT, | ||
rxcui TEXT, | ||
pubchem TEXT, | ||
epa_comptox TEXT, | ||
catalogue_of_life TEXT, | ||
itis TEXT, | ||
ncbi TEXT, | ||
plants TEXT, | ||
grin TEXT, | ||
mpns TEXT, | ||
inn_id TEXT, | ||
usan_id TEXT, | ||
mf TEXT, | ||
inchikey TEXT, | ||
smiles TEXT, | ||
ingredient_type TEXT, | ||
substance_type TEXT, | ||
uuid TEXT, | ||
dailymed TEXT | ||
); | ||
|
||
COPY sagerx_lake.fda_unii | ||
FROM '{data_path}/{file_name}' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
version: 2 | ||
|
||
sources: | ||
- name: fda_unii | ||
description: FDA UNII codes. | ||
schema: sagerx_lake | ||
tables: | ||
- name: fda_unii | ||
desciption: FDA UNII codes. |
18 changes: 18 additions & 0 deletions
18
dbt/sagerx/models/staging/fda_unii/stg_fda_unii__unii_codes.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
-- stg_fda_unii__unii_codes.sql | ||
|
||
with | ||
|
||
fda_unii as ( | ||
select * from {{ source('fda_unii', 'fda_unii') }} | ||
) | ||
|
||
select | ||
unii | ||
, display_name | ||
, rxcui | ||
, pubchem | ||
, rn | ||
, ncit | ||
, ncbi | ||
, dailymed | ||
from fda_unii |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
version: 2 | ||
|
||
seeds: | ||
- name: usp_preservatives | ||
description: | | ||
A list of CAS RN identifiers and USP product names obtained manually from | ||
searching the [USP catalog](https://store.usp.org/preservatives/category/USP-1213) | ||
for products in the "Preservatives" category. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
cas_rn,usp_product_name | ||
17927-65-0,Aluminum Sulfate (2 g) | ||
60-00-4,Edetic Acid (200 mg) | ||
79-09-4,Propionic Acid (1.5 mL/ampule; 3 ampules) | ||
6001-64-5,Chlorobutanol (200 mg) | ||
59-51-8,Racemethionine (200 mg) | ||
128-37-0,Butylated Hydroxytoluene (500 mg) | ||
5793-89-5,Calcium Saccharate (200 mg) | ||
121-00-6,3-tert-Butyl-4-hydroxyanisole (200 mg) | ||
137-40-6,Sodium Propionate (200 mg) | ||
89-65-6,Erythorbic Acid (50 mg) | ||
122-99-6,Phenoxyethanol (500 mg) (2-Phenoxyethanol) | ||
94-13-3,Propylparaben (200 mg) | ||
8001-54-5,Benzalkonium Chloride (5 mL of approx. 4% aqueous solution) | ||
7681-57-4,Sodium Metabisulfite (2 X 500 mg) | ||
110-44-1,Sorbic Acid (1 g) | ||
100-51-6,Benzyl Alcohol (500 mg/ampule) | ||
99-76-3,Methylparaben (125 mg) | ||
590-00-1,Potassium Sorbate (1 g) | ||
24634-61-5,Potassium Sorbate (1 g) | ||
532-32-1,Sodium Benzoate (1 g) | ||
88-32-4,2-tert-Butyl-4-hydroxyanisole (200 mg) | ||
120-47-8,Ethylparaben (200 mg) | ||
90-64-2,Mandelic Acid (500 mg) | ||
121-79-9,Propyl Gallate (200 mg) | ||
4075-81-4,Calcium Propionate (100 mg) | ||
94-26-8,Butylparaben (200 mg) | ||
39236-46-9,Imidurea (200 mg) | ||
520-45-6,Dehydroacetic Acid (200 mg) | ||
57-09-0,Cetrimonium Bromide (1 g) |