Skip to content

Commit

Permalink
wip: debug syh dr data model
Browse files Browse the repository at this point in the history
  • Loading branch information
jaanphare committed Apr 6, 2024
1 parent 6f595c6 commit 6dc81b7
Show file tree
Hide file tree
Showing 15 changed files with 185 additions and 128 deletions.
16 changes: 12 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,29 @@ cd healthcare_data
python scripts/generate_syh_dr_data_models.py ~/data/syh_dr https://www.ahrq.gov/sites/default/files/wysiwyg/data/SyH-DR-Codebook.pdf
```

2. Generate the synthetic healthcare data (takes ~5 minutes):
2. Generate the synthetic healthcare data (takes ~2.5 minutes, with 8 threads on a Macbook):

```bash
dbt run
dbt run --threads 8
```

3. Verify that you can query the data on the command line:

```bash


## To build a specific data model:
## To build a specific data model

Use `--select` in dbt:

```bash
syhdr_medicare_outpatient_2016
dbt run --select "syhdr_medicare_outpatient_2016"
```

## To build a specific figure for visualization with Observable Framework

Use `--select` in dbt to select models, e.g. in order to build all histograms:

```bash
dbt run --select "*histogram*"
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

WITH commercial_data AS (
SELECT
PLAN_PMT_AMT AS Payment,
COUNT(*) AS count,
'Commercial' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_commercial_inpatient_2016.parquet')
GROUP BY PLAN_PMT_AMT
),
medicaid_data AS (
SELECT
PLAN_PMT_AMT AS Payment,
COUNT(*) AS count,
'Medicaid' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_medicaid_inpatient_2016.parquet')
GROUP BY PLAN_PMT_AMT
),
medicare_data AS (
SELECT
PLAN_PMT_AMT AS Payment,
COUNT(*) AS count,
'Medicare' AS Insurance
FROM read_parquet('/Users/me/data/syh_dr/syhdr_medicare_inpatient_2016.parquet')
GROUP BY PLAN_PMT_AMT
),
combined_data AS (
SELECT * FROM commercial_data
UNION ALL
SELECT * FROM medicaid_data
UNION ALL
SELECT * FROM medicare_data
)
SELECT
Payment,
count,
Insurance
FROM combined_data
ORDER BY Insurance, Payment
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
FACILITY_ID::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
FACILITY_ID::UBIGINT,
CLM_CNTL_NUM::NUMERIC,
AT_SPCLTY::VARCHAR,
SRVC_BEG_DATE::VARCHAR,
SRVC_END_DATE::VARCHAR,
LOS::VARCHAR,
SRVC_BEG_DATE::DATE,
SRVC_END_DATE::DATE,
LOS::UINTEGER,
ADMSN_TYPE::VARCHAR,
TOB_CD::VARCHAR,
CLM_TYPE_CD::VARCHAR,
Expand Down Expand Up @@ -101,6 +101,6 @@ SELECT
CPT_PRCDR_CD_33::VARCHAR,
CPT_PRCDR_CD_34::VARCHAR,
CPT_PRCDR_CD_35::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_commercial_inpatient_2016.CSV', header=True, null_padding=true, types={'CPT_PRCDR_CD_1': 'VARCHAR', 'CPT_PRCDR_CD_2': 'VARCHAR', 'CPT_PRCDR_CD_3': 'VARCHAR', 'CPT_PRCDR_CD_4': 'VARCHAR', 'CPT_PRCDR_CD_5': 'VARCHAR', 'CPT_PRCDR_CD_6': 'VARCHAR', 'CPT_PRCDR_CD_7': 'VARCHAR', 'CPT_PRCDR_CD_8': 'VARCHAR', 'CPT_PRCDR_CD_9': 'VARCHAR', 'CPT_PRCDR_CD_10': 'VARCHAR', 'CPT_PRCDR_CD_11': 'VARCHAR', 'CPT_PRCDR_CD_12': 'VARCHAR', 'CPT_PRCDR_CD_13': 'VARCHAR', 'CPT_PRCDR_CD_14': 'VARCHAR', 'CPT_PRCDR_CD_15': 'VARCHAR', 'CPT_PRCDR_CD_16': 'VARCHAR', 'CPT_PRCDR_CD_17': 'VARCHAR', 'CPT_PRCDR_CD_18': 'VARCHAR', 'CPT_PRCDR_CD_19': 'VARCHAR', 'CPT_PRCDR_CD_20': 'VARCHAR', 'CPT_PRCDR_CD_21': 'VARCHAR', 'CPT_PRCDR_CD_22': 'VARCHAR', 'CPT_PRCDR_CD_23': 'VARCHAR', 'CPT_PRCDR_CD_24': 'VARCHAR', 'CPT_PRCDR_CD_25': 'VARCHAR', 'CPT_PRCDR_CD_26': 'VARCHAR', 'CPT_PRCDR_CD_27': 'VARCHAR', 'CPT_PRCDR_CD_28': 'VARCHAR', 'CPT_PRCDR_CD_29': 'VARCHAR', 'CPT_PRCDR_CD_30': 'VARCHAR', 'CPT_PRCDR_CD_31': 'VARCHAR', 'CPT_PRCDR_CD_32': 'VARCHAR', 'CPT_PRCDR_CD_33': 'VARCHAR', 'CPT_PRCDR_CD_34': 'VARCHAR', 'CPT_PRCDR_CD_35': 'VARCHAR'}, ignore_errors=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
FACILITY_ID::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
FACILITY_ID::UBIGINT,
CLM_CNTL_NUM::NUMERIC,
AT_SPCLTY::VARCHAR,
SRVC_BEG_DATE::VARCHAR,
SRVC_END_DATE::VARCHAR,
LOS::VARCHAR,
SRVC_BEG_DATE::DATE,
SRVC_END_DATE::DATE,
LOS::UINTEGER,
ADMSN_TYPE::VARCHAR,
TOB_CD::VARCHAR,
CLM_TYPE_CD::VARCHAR,
Expand Down Expand Up @@ -101,6 +101,6 @@ SELECT
CPT_PRCDR_CD_33::VARCHAR,
CPT_PRCDR_CD_34::VARCHAR,
CPT_PRCDR_CD_35::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_commercial_outpatient_2016.CSV', header=True, null_padding=true, types={'CPT_PRCDR_CD_1': 'VARCHAR', 'CPT_PRCDR_CD_2': 'VARCHAR', 'CPT_PRCDR_CD_3': 'VARCHAR', 'CPT_PRCDR_CD_4': 'VARCHAR', 'CPT_PRCDR_CD_5': 'VARCHAR', 'CPT_PRCDR_CD_6': 'VARCHAR', 'CPT_PRCDR_CD_7': 'VARCHAR', 'CPT_PRCDR_CD_8': 'VARCHAR', 'CPT_PRCDR_CD_9': 'VARCHAR', 'CPT_PRCDR_CD_10': 'VARCHAR', 'CPT_PRCDR_CD_11': 'VARCHAR', 'CPT_PRCDR_CD_12': 'VARCHAR', 'CPT_PRCDR_CD_13': 'VARCHAR', 'CPT_PRCDR_CD_14': 'VARCHAR', 'CPT_PRCDR_CD_15': 'VARCHAR', 'CPT_PRCDR_CD_16': 'VARCHAR', 'CPT_PRCDR_CD_17': 'VARCHAR', 'CPT_PRCDR_CD_18': 'VARCHAR', 'CPT_PRCDR_CD_19': 'VARCHAR', 'CPT_PRCDR_CD_20': 'VARCHAR', 'CPT_PRCDR_CD_21': 'VARCHAR', 'CPT_PRCDR_CD_22': 'VARCHAR', 'CPT_PRCDR_CD_23': 'VARCHAR', 'CPT_PRCDR_CD_24': 'VARCHAR', 'CPT_PRCDR_CD_25': 'VARCHAR', 'CPT_PRCDR_CD_26': 'VARCHAR', 'CPT_PRCDR_CD_27': 'VARCHAR', 'CPT_PRCDR_CD_28': 'VARCHAR', 'CPT_PRCDR_CD_29': 'VARCHAR', 'CPT_PRCDR_CD_30': 'VARCHAR', 'CPT_PRCDR_CD_31': 'VARCHAR', 'CPT_PRCDR_CD_32': 'VARCHAR', 'CPT_PRCDR_CD_33': 'VARCHAR', 'CPT_PRCDR_CD_34': 'VARCHAR', 'CPT_PRCDR_CD_35': 'VARCHAR'}, ignore_errors=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
AGE_LOW::VARCHAR,
AGE_HIGH::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
AGE_LOW::NUMERIC,
AGE_HIGH::NUMERIC,
SEX_IDENT_CD::VARCHAR,
STATE_CD::VARCHAR,
COUNTY_FIPS_CD::VARCHAR,
ZIP_CD::VARCHAR,
PHRMCY_CVRG_1::VARCHAR,
PHRMCY_CVRG_1::NUMERIC,
PHRMCY_CVRG_2::VARCHAR,
PHRMCY_CVRG_3::VARCHAR,
PHRMCY_CVRG_4::VARCHAR,
Expand All @@ -21,8 +21,8 @@ SELECT
PHRMCY_CVRG_9::VARCHAR,
PHRMCY_CVRG_10::VARCHAR,
PHRMCY_CVRG_11::VARCHAR,
PHRMCY_CVRG_12::VARCHAR,
CMRCL_INSRC_1::VARCHAR,
PHRMCY_CVRG_12::NUMERIC,
CMRCL_INSRC_1::NUMERIC,
CMRCL_INSRC_2::VARCHAR,
CMRCL_INSRC_3::VARCHAR,
CMRCL_INSRC_4::VARCHAR,
Expand All @@ -33,5 +33,5 @@ SELECT
CMRCL_INSRC_9::VARCHAR,
CMRCL_INSRC_10::VARCHAR,
CMRCL_INSRC_11::VARCHAR,
CMRCL_INSRC_12::VARCHAR
CMRCL_INSRC_12::NUMERIC
FROM read_csv('/Users/me/data/syh_dr/syhdr_commercial_person_2016.CSV', header=True, null_padding=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
PHMCY_CLM_NUM::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
PHMCY_CLM_NUM::NUMERIC,
CLM_CNTL_NUM::NUMERIC,
LINE_NBR::VARCHAR,
FILL_DT::VARCHAR,
FILL_DT::DATE,
SYNTHETIC_DRUG_ID::VARCHAR,
GENERIC_DRUG_NAME::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_commercial_pharmacy_2016.CSV', header=True, null_padding=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
FACILITY_ID::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
FACILITY_ID::UBIGINT,
CLM_CNTL_NUM::NUMERIC,
AT_SPCLTY::VARCHAR,
SRVC_BEG_DATE::VARCHAR,
SRVC_END_DATE::VARCHAR,
LOS::VARCHAR,
SRVC_BEG_DATE::DATE,
SRVC_END_DATE::DATE,
LOS::UINTEGER,
ADMSN_TYPE::VARCHAR,
TOB_CD::VARCHAR,
CLM_TYPE_CD::VARCHAR,
Expand Down Expand Up @@ -101,6 +101,6 @@ SELECT
CPT_PRCDR_CD_33::VARCHAR,
CPT_PRCDR_CD_34::VARCHAR,
CPT_PRCDR_CD_35::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_medicaid_inpatient_2016.CSV', header=True, null_padding=true, types={'CPT_PRCDR_CD_1': 'VARCHAR', 'CPT_PRCDR_CD_2': 'VARCHAR', 'CPT_PRCDR_CD_3': 'VARCHAR', 'CPT_PRCDR_CD_4': 'VARCHAR', 'CPT_PRCDR_CD_5': 'VARCHAR', 'CPT_PRCDR_CD_6': 'VARCHAR', 'CPT_PRCDR_CD_7': 'VARCHAR', 'CPT_PRCDR_CD_8': 'VARCHAR', 'CPT_PRCDR_CD_9': 'VARCHAR', 'CPT_PRCDR_CD_10': 'VARCHAR', 'CPT_PRCDR_CD_11': 'VARCHAR', 'CPT_PRCDR_CD_12': 'VARCHAR', 'CPT_PRCDR_CD_13': 'VARCHAR', 'CPT_PRCDR_CD_14': 'VARCHAR', 'CPT_PRCDR_CD_15': 'VARCHAR', 'CPT_PRCDR_CD_16': 'VARCHAR', 'CPT_PRCDR_CD_17': 'VARCHAR', 'CPT_PRCDR_CD_18': 'VARCHAR', 'CPT_PRCDR_CD_19': 'VARCHAR', 'CPT_PRCDR_CD_20': 'VARCHAR', 'CPT_PRCDR_CD_21': 'VARCHAR', 'CPT_PRCDR_CD_22': 'VARCHAR', 'CPT_PRCDR_CD_23': 'VARCHAR', 'CPT_PRCDR_CD_24': 'VARCHAR', 'CPT_PRCDR_CD_25': 'VARCHAR', 'CPT_PRCDR_CD_26': 'VARCHAR', 'CPT_PRCDR_CD_27': 'VARCHAR', 'CPT_PRCDR_CD_28': 'VARCHAR', 'CPT_PRCDR_CD_29': 'VARCHAR', 'CPT_PRCDR_CD_30': 'VARCHAR', 'CPT_PRCDR_CD_31': 'VARCHAR', 'CPT_PRCDR_CD_32': 'VARCHAR', 'CPT_PRCDR_CD_33': 'VARCHAR', 'CPT_PRCDR_CD_34': 'VARCHAR', 'CPT_PRCDR_CD_35': 'VARCHAR'}, ignore_errors=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
FACILITY_ID::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
FACILITY_ID::UBIGINT,
CLM_CNTL_NUM::NUMERIC,
AT_SPCLTY::VARCHAR,
SRVC_BEG_DATE::VARCHAR,
SRVC_END_DATE::VARCHAR,
LOS::VARCHAR,
SRVC_BEG_DATE::DATE,
SRVC_END_DATE::DATE,
LOS::UINTEGER,
ADMSN_TYPE::VARCHAR,
TOB_CD::VARCHAR,
CLM_TYPE_CD::VARCHAR,
Expand Down Expand Up @@ -101,6 +101,6 @@ SELECT
CPT_PRCDR_CD_33::VARCHAR,
CPT_PRCDR_CD_34::VARCHAR,
CPT_PRCDR_CD_35::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_medicaid_outpatient_2016.CSV', header=True, null_padding=true, types={'CPT_PRCDR_CD_1': 'VARCHAR', 'CPT_PRCDR_CD_2': 'VARCHAR', 'CPT_PRCDR_CD_3': 'VARCHAR', 'CPT_PRCDR_CD_4': 'VARCHAR', 'CPT_PRCDR_CD_5': 'VARCHAR', 'CPT_PRCDR_CD_6': 'VARCHAR', 'CPT_PRCDR_CD_7': 'VARCHAR', 'CPT_PRCDR_CD_8': 'VARCHAR', 'CPT_PRCDR_CD_9': 'VARCHAR', 'CPT_PRCDR_CD_10': 'VARCHAR', 'CPT_PRCDR_CD_11': 'VARCHAR', 'CPT_PRCDR_CD_12': 'VARCHAR', 'CPT_PRCDR_CD_13': 'VARCHAR', 'CPT_PRCDR_CD_14': 'VARCHAR', 'CPT_PRCDR_CD_15': 'VARCHAR', 'CPT_PRCDR_CD_16': 'VARCHAR', 'CPT_PRCDR_CD_17': 'VARCHAR', 'CPT_PRCDR_CD_18': 'VARCHAR', 'CPT_PRCDR_CD_19': 'VARCHAR', 'CPT_PRCDR_CD_20': 'VARCHAR', 'CPT_PRCDR_CD_21': 'VARCHAR', 'CPT_PRCDR_CD_22': 'VARCHAR', 'CPT_PRCDR_CD_23': 'VARCHAR', 'CPT_PRCDR_CD_24': 'VARCHAR', 'CPT_PRCDR_CD_25': 'VARCHAR', 'CPT_PRCDR_CD_26': 'VARCHAR', 'CPT_PRCDR_CD_27': 'VARCHAR', 'CPT_PRCDR_CD_28': 'VARCHAR', 'CPT_PRCDR_CD_29': 'VARCHAR', 'CPT_PRCDR_CD_30': 'VARCHAR', 'CPT_PRCDR_CD_31': 'VARCHAR', 'CPT_PRCDR_CD_32': 'VARCHAR', 'CPT_PRCDR_CD_33': 'VARCHAR', 'CPT_PRCDR_CD_34': 'VARCHAR', 'CPT_PRCDR_CD_35': 'VARCHAR'}, ignore_errors=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
MCAID_BENE_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
AGE_LOW::VARCHAR,
AGE_HIGH::VARCHAR,
PERSON_ID::UBIGINT,
MCAID_BENE_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
AGE_LOW::NUMERIC,
AGE_HIGH::NUMERIC,
SEX_IDENT_CD::VARCHAR,
RACE_CD::VARCHAR,
MCAID_SBMTTG_ST_CD::VARCHAR,
STATE_CD::VARCHAR,
COUNTY_FIPS_CD::VARCHAR,
ZIP_CD::VARCHAR,
RSN_ENRLMT_CD::VARCHAR,
MDCD_ENRLMT_1::VARCHAR,
MDCD_ENRLMT_1::NUMERIC,
MDCD_ENRLMT_2::VARCHAR,
MDCD_ENRLMT_3::VARCHAR,
MDCD_ENRLMT_4::VARCHAR,
Expand All @@ -25,8 +25,8 @@ SELECT
MDCD_ENRLMT_9::VARCHAR,
MDCD_ENRLMT_10::VARCHAR,
MDCD_ENRLMT_11::VARCHAR,
MDCD_ENRLMT_12::VARCHAR,
MDCD_MCO_ENRLMT_1::VARCHAR,
MDCD_ENRLMT_12::NUMERIC,
MDCD_MCO_ENRLMT_1::NUMERIC,
MDCD_MCO_ENRLMT_2::VARCHAR,
MDCD_MCO_ENRLMT_3::VARCHAR,
MDCD_MCO_ENRLMT_4::VARCHAR,
Expand All @@ -37,10 +37,10 @@ SELECT
MDCD_MCO_ENRLMT_9::VARCHAR,
MDCD_MCO_ENRLMT_10::VARCHAR,
MDCD_MCO_ENRLMT_11::VARCHAR,
MDCD_MCO_ENRLMT_12::VARCHAR,
MDCD_CHIP_ENRLMT::VARCHAR,
MDCD_MCO_ENRLMT_12::NUMERIC,
MDCD_CHIP_ENRLMT::NUMERIC,
RSTRCTD_BNFTS_IND::VARCHAR,
DUAL_ELGBL_1::VARCHAR,
DUAL_ELGBL_1::NUMERIC,
DUAL_ELGBL_2::VARCHAR,
DUAL_ELGBL_3::VARCHAR,
DUAL_ELGBL_4::VARCHAR,
Expand All @@ -51,5 +51,5 @@ SELECT
DUAL_ELGBL_9::VARCHAR,
DUAL_ELGBL_10::VARCHAR,
DUAL_ELGBL_11::VARCHAR,
DUAL_ELGBL_12::VARCHAR
DUAL_ELGBL_12::NUMERIC
FROM read_csv('/Users/me/data/syh_dr/syhdr_medicaid_person_2016.CSV', header=True, null_padding=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
PHMCY_CLM_NUM::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
PHMCY_CLM_NUM::NUMERIC,
CLM_CNTL_NUM::NUMERIC,
LINE_NBR::VARCHAR,
FILL_DT::VARCHAR,
FILL_DT::DATE,
SYNTHETIC_DRUG_ID::VARCHAR,
GENERIC_DRUG_NAME::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_medicaid_pharmacy_2016.CSV', header=True, null_padding=true)
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
PERSON_ID::VARCHAR,
PERSON_WGHT::VARCHAR,
FACILITY_ID::VARCHAR,
CLM_CNTL_NUM::VARCHAR,
PERSON_ID::UBIGINT,
PERSON_WGHT::NUMERIC,
FACILITY_ID::UBIGINT,
CLM_CNTL_NUM::NUMERIC,
AT_SPCLTY::VARCHAR,
SRVC_BEG_DATE::VARCHAR,
SRVC_END_DATE::VARCHAR,
LOS::VARCHAR,
SRVC_BEG_DATE::DATE,
SRVC_END_DATE::DATE,
LOS::UINTEGER,
ADMSN_TYPE::VARCHAR,
TOB_CD::VARCHAR,
CLM_TYPE_CD::VARCHAR,
Expand Down Expand Up @@ -101,6 +101,6 @@ SELECT
CPT_PRCDR_CD_33::VARCHAR,
CPT_PRCDR_CD_34::VARCHAR,
CPT_PRCDR_CD_35::VARCHAR,
PLAN_PMT_AMT::VARCHAR,
TOT_CHRG_AMT::VARCHAR
replace(replace(PLAN_PMT_AMT, '$', ''), ',', '')::FLOAT,
replace(replace(TOT_CHRG_AMT, '$', ''), ',', '')::FLOAT
FROM read_csv('/Users/me/data/syh_dr/syhdr_medicare_inpatient_2016.CSV', header=True, null_padding=true, types={'CPT_PRCDR_CD_1': 'VARCHAR', 'CPT_PRCDR_CD_2': 'VARCHAR', 'CPT_PRCDR_CD_3': 'VARCHAR', 'CPT_PRCDR_CD_4': 'VARCHAR', 'CPT_PRCDR_CD_5': 'VARCHAR', 'CPT_PRCDR_CD_6': 'VARCHAR', 'CPT_PRCDR_CD_7': 'VARCHAR', 'CPT_PRCDR_CD_8': 'VARCHAR', 'CPT_PRCDR_CD_9': 'VARCHAR', 'CPT_PRCDR_CD_10': 'VARCHAR', 'CPT_PRCDR_CD_11': 'VARCHAR', 'CPT_PRCDR_CD_12': 'VARCHAR', 'CPT_PRCDR_CD_13': 'VARCHAR', 'CPT_PRCDR_CD_14': 'VARCHAR', 'CPT_PRCDR_CD_15': 'VARCHAR', 'CPT_PRCDR_CD_16': 'VARCHAR', 'CPT_PRCDR_CD_17': 'VARCHAR', 'CPT_PRCDR_CD_18': 'VARCHAR', 'CPT_PRCDR_CD_19': 'VARCHAR', 'CPT_PRCDR_CD_20': 'VARCHAR', 'CPT_PRCDR_CD_21': 'VARCHAR', 'CPT_PRCDR_CD_22': 'VARCHAR', 'CPT_PRCDR_CD_23': 'VARCHAR', 'CPT_PRCDR_CD_24': 'VARCHAR', 'CPT_PRCDR_CD_25': 'VARCHAR', 'CPT_PRCDR_CD_26': 'VARCHAR', 'CPT_PRCDR_CD_27': 'VARCHAR', 'CPT_PRCDR_CD_28': 'VARCHAR', 'CPT_PRCDR_CD_29': 'VARCHAR', 'CPT_PRCDR_CD_30': 'VARCHAR', 'CPT_PRCDR_CD_31': 'VARCHAR', 'CPT_PRCDR_CD_32': 'VARCHAR', 'CPT_PRCDR_CD_33': 'VARCHAR', 'CPT_PRCDR_CD_34': 'VARCHAR', 'CPT_PRCDR_CD_35': 'VARCHAR'}, ignore_errors=true)
Loading

0 comments on commit 6dc81b7

Please sign in to comment.