From e6da3738530ee64ea57edca5b47db89933b4649b Mon Sep 17 00:00:00 2001 From: ^ Date: Tue, 30 Apr 2024 14:04:15 +0100 Subject: [PATCH] fix: more dedup fixes on mp_stripdiffs --- all.sas | 10 +++++++--- base/mp_stripdiffs.sas | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/all.sas b/all.sas index a3620d0..fbe7730 100644 --- a/all.sas +++ b/all.sas @@ -13483,7 +13483,7 @@ create table &ds1 (drop=libref dsn) as /* extract key values only */ %let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks)); create table &ds2 as - select key_hash, + select distinct key_hash, tgtvar_nm, tgtvar_type, coalescec(oldval_char,newval_char) as charval, @@ -13497,9 +13497,9 @@ create table &ds2 as %local pk; data _null_; set &ds2; - by key_hash; + by key_hash processed_dttm; call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l'); - if last.key_hash then stop; + if last.processed_dttm then stop; run; %let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar)); @@ -13521,6 +13521,7 @@ run; %mp_ds2squeeze(&ds3,outds=&ds3) %mp_ds2squeeze(&ds4,outds=&ds4) +/* now merge to get all key values and de-dup */ %let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged)); data &ds5; length key_hash $32 processed_dttm 8; @@ -13528,6 +13529,9 @@ data &ds5; by key_hash; if not missing(key_hash); run; +proc sort data=&ds5 nodupkey; + by &pk; +run; /* join to base table for preliminary stage DS */ proc sql; diff --git a/base/mp_stripdiffs.sas b/base/mp_stripdiffs.sas index 3243d2b..c4e1ed6 100644 --- a/base/mp_stripdiffs.sas +++ b/base/mp_stripdiffs.sas @@ -98,7 +98,7 @@ create table &ds1 (drop=libref dsn) as /* extract key values only */ %let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks)); create table &ds2 as - select key_hash, + select distinct key_hash, tgtvar_nm, tgtvar_type, coalescec(oldval_char,newval_char) as charval, @@ -112,9 +112,9 @@ create table &ds2 as %local pk; data _null_; set &ds2; - by key_hash; + by key_hash processed_dttm; call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l'); - if last.key_hash then stop; + if last.processed_dttm then stop; run; %let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar)); @@ -136,6 +136,7 @@ run; %mp_ds2squeeze(&ds3,outds=&ds3) %mp_ds2squeeze(&ds4,outds=&ds4) +/* now merge to get all key values and de-dup */ %let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged)); data &ds5; length key_hash $32 processed_dttm 8; @@ -143,6 +144,9 @@ data &ds5; by key_hash; if not missing(key_hash); run; +proc sort data=&ds5 nodupkey; + by &pk; +run; /* join to base table for preliminary stage DS */ proc sql;