-
Notifications
You must be signed in to change notification settings - Fork 7
APP-356 Convert POTNTL_DUP_INV_SUM (Potential Duplicate Investigations) #3131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
60e01c0
4970f48
a8399b9
a62cbfe
c73d401
e7ad140
fdce598
fae8225
fad7088
ba5b29b
b01e674
a7a9b99
1aabe0d
44e6f1c
0679b42
09ffc64
3fab55c
ecd93e6
501788f
f6610c0
f2fd4bb
3349cd9
22764c7
f64f9a9
e375a87
e6359e9
ca04489
7f3f99d
d239bc3
9ba225d
bf6e1a5
688dc51
287be73
3ecaf99
a4a9dce
db9e350
7ad22fe
aee7399
5201f3e
9ea33ba
6c5fa6b
95d38b6
da5bf06
d814b1d
1293fe3
86f46a3
6bb51ec
3a0e74d
955a6b8
756f107
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| -- Migrate the NBSCUSTOM.SAS library to the nbs_sr_05 python library | ||
|
|
||
| USE [NBS_ODSE] | ||
|
|
||
| DECLARE @pyLib VARCHAR(50) = 'potntl_dup_inv_sum' | ||
| DECLARE @sasLib VARCHAR(50) = 'POTNTL_DUP_INV_SUM.SAS' | ||
| DECLARE @desc VARCHAR(300) = 'Potential Duplicate Investigations - Identifies potential duplicate investigations for the same patient with the | ||
| same disease within a user-specified number of days.' | ||
|
|
||
| IF EXISTS (SELECT * FROM [dbo].[Report_Library] WHERE UPPER(library_name) = @sasLib) | ||
| BEGIN | ||
| UPDATE [dbo].[Report_Library] | ||
| SET | ||
| library_name = @pyLib, | ||
| runner = 'python', | ||
| desc_txt = @desc, | ||
| last_chg_time = CURRENT_TIMESTAMP, | ||
| last_chg_user_id = 99999999 | ||
| WHERE | ||
| UPPER(library_name) = @sasLib; | ||
| END | ||
| ELSE | ||
| BEGIN | ||
| -- Create a row for this library | ||
| INSERT INTO [dbo].[Report_Library] ( | ||
| library_name, | ||
| desc_txt, | ||
| runner, | ||
| is_builtin_ind, | ||
| add_time, | ||
| add_user_id, | ||
| last_chg_time, | ||
| last_chg_user_id | ||
| ) VALUES ( | ||
| @pyLib, | ||
| @desc, | ||
| 'python', | ||
| 'Y', | ||
| CURRENT_TIMESTAMP, | ||
| 99999999, | ||
| CURRENT_TIMESTAMP, | ||
| 99999999 | ||
| ); | ||
| END |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| from src.db_transaction import Transaction | ||
| from src.models import ReportResult | ||
|
|
||
|
|
||
| def execute( | ||
| trx: Transaction, | ||
| subset_query: str, | ||
| data_source_name: str, | ||
| days_value: None | int, | ||
| **kwargs, | ||
| ): | ||
| """Potential Duplicate Investigations. | ||
|
|
||
| Identifies potential duplicate investigations for the same patient, | ||
| with the same disease, within a user-specified number of days. | ||
| """ | ||
| # Only use default if days_value is None (not provided) | ||
| # If days_value is 0, treat it as 0 (not default) | ||
| # days_value = kwargs.get('days_value') | ||
| if days_value is None: | ||
| days_value = 3650 | ||
|
|
||
| full_query = f""" | ||
| WITH subset AS ({subset_query}) | ||
| -- Capture SQL Server's physical row order | ||
| , source_order AS ( | ||
| SELECT | ||
| *, | ||
| ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS sas_row_num | ||
| FROM subset | ||
| ) | ||
| , clean_data AS ( | ||
| SELECT | ||
| PATIENT_LOCAL_ID, | ||
| PATIENT_FIRST_NAME, | ||
| PATIENT_LAST_NAME, | ||
| PATIENT_DOB, | ||
| INVESTIGATION_LOCAL_ID, | ||
| DISEASE, | ||
| CASE_STATUS, | ||
| EVENT_DATE, | ||
| EVENT_DATE_TYPE, | ||
| MMWR_YEAR, | ||
| NOTIFICATION_STATUS, | ||
| DISEASE_CD, | ||
| sas_row_num | ||
| FROM source_order | ||
| WHERE EVENT_DATE IS NOT NULL | ||
| AND PATIENT_LOCAL_ID IS NOT NULL | ||
| AND DISEASE_CD IS NOT NULL | ||
| ) | ||
| -- Calculate days since previous and until next event | ||
| , datediff_calc AS ( | ||
| SELECT | ||
| *, | ||
| DATEDIFF(day, | ||
| LAG(EVENT_DATE) OVER ( | ||
| PARTITION BY | ||
| PATIENT_LOCAL_ID, | ||
| DISEASE_CD | ||
| ORDER BY EVENT_DATE, sas_row_num | ||
| ), | ||
| EVENT_DATE | ||
| ) AS days_since_prev, | ||
| DATEDIFF(day, | ||
| EVENT_DATE, | ||
| LEAD(EVENT_DATE) OVER ( | ||
| PARTITION BY PATIENT_LOCAL_ID, | ||
| DISEASE_CD | ||
| ORDER BY EVENT_DATE, sas_row_num | ||
| ) | ||
| ) AS days_until_next | ||
| FROM clean_data | ||
| ) | ||
| -- Count events for each patient and disease to identify potential duplicates | ||
| , event_counts AS ( | ||
| SELECT | ||
| PATIENT_LOCAL_ID, | ||
| DISEASE_CD, | ||
| COUNT(*) AS event_count | ||
| FROM clean_data | ||
| GROUP BY PATIENT_LOCAL_ID, DISEASE_CD | ||
| ) | ||
| -- Final selection of potential duplicates based on days thresholds | ||
| SELECT | ||
| d.PATIENT_LOCAL_ID AS [Patient Local ID], | ||
| d.PATIENT_FIRST_NAME AS [Patient First Name], | ||
| d.PATIENT_LAST_NAME AS [Patient Last Name], | ||
| d.PATIENT_DOB AS DOB, | ||
| d.INVESTIGATION_LOCAL_ID AS [Investigation Local ID], | ||
| d.DISEASE AS Disease, | ||
| d.CASE_STATUS AS [Case Status], | ||
| d.EVENT_DATE AS [Event Date], | ||
| d.EVENT_DATE_TYPE AS [Event Date Type], | ||
| d.MMWR_YEAR AS [MMWR Year], | ||
| d.NOTIFICATION_STATUS AS [Notification Record Status], | ||
| d.DISEASE_CD AS [Disease Code] | ||
| FROM datediff_calc d | ||
| JOIN event_counts c | ||
| ON d.PATIENT_LOCAL_ID = c.PATIENT_LOCAL_ID | ||
| AND d.DISEASE_CD = c.DISEASE_CD | ||
| WHERE c.event_count > 1 | ||
| AND ( | ||
| (d.days_since_prev IS NOT NULL AND d.days_since_prev <= {days_value}) | ||
| OR (d.days_until_next IS NOT NULL AND d.days_until_next <= {days_value}) | ||
| ) | ||
| ORDER BY | ||
| d.PATIENT_LOCAL_ID COLLATE Latin1_General_BIN, | ||
| d.DISEASE_CD COLLATE Latin1_General_BIN, | ||
| d.EVENT_DATE, | ||
| d.sas_row_num | ||
| """ | ||
|
|
||
| content = trx.query(full_query) | ||
|
|
||
| header = 'Potential Duplicate Investigations' | ||
| subheader = f'Duplicate Investigations Time Frame: {days_value} Days' | ||
|
|
||
| return ReportResult( | ||
| content_type='table', content=content, header=header, subheader=subheader | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ class ReportSpec(BaseModel): | |
| library_name: str = Field(min_length=1) | ||
| data_source_name: str = Field(min_length=1) | ||
| subset_query: str = Field(min_length=1) | ||
| days_value: int | None = None # Specific to potntl_dup_inv_sum | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (thought, nb): Probably not worth addressing at this moment, but if there end up being multiple reports that require unique properties like this, maybe we end up sketching out a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea def a good idea to revisit as we work through more translations! |
||
|
|
||
|
|
||
| # column names and values | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -153,6 +153,7 @@ def get_faker_sql(schema_name: str) -> str: | |||
|
|
||||
| # KLUDGE: NULL writing is not always correct | ||||
| result = result.replace(' nan,', ' NULL,') | ||||
| result = result.replace('nan', ' NULL') | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (q, nb): why did we need to add this one? there's a risk that a valid part of a string with |
||||
| result = result.replace(' nan)', ' NULL)') | ||||
| result = result.replace(' <NA>,', ' NULL,') | ||||
| result = result.replace(' <NA>)', ' NULL)') | ||||
|
|
@@ -167,7 +168,7 @@ def get_tables_from_faker(schema_name: str) -> tuple[list[str], list[str]]: | |||
| schema = yaml.safe_load(f.read()) | ||||
|
|
||||
| db_tables = [t['table_name'] for t in schema['tables']] | ||||
| fk_tables = schema['config']['nbs']['fk_tables'] | ||||
| fk_tables = schema['config'].get('nbs', {}).get('fk_tables', []) | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (q, nb): What's this change for?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so if there are no fk tables specified (not always relevant), then we default to empty list |
||||
|
|
||||
| return (db_tables, fk_tables) | ||||
|
|
||||
|
|
@@ -234,6 +235,7 @@ def insert_fake_data( | |||
| with db_transaction(conn_string) as trx: | ||||
| # Tables with foreign keys pointing to the table we want to replace need to | ||||
| # be backed up and cleared out to avoid FK constraint violations | ||||
|
|
||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| for fk_table in fk_tables: | ||||
| temp_fk_table = temp_name(fk_table) | ||||
| trx.execute( | ||||
|
|
||||
Uh oh!
There was an error while loading. Please reload this page.