Skip to content

Commit

Permalink
Merge pull request #369 from sasjs/368-enable-filter-by-variable-name…
Browse files Browse the repository at this point in the history
…-in-mp_filter-series

feat: enable filter by variable name in mp filter series
  • Loading branch information
allanbowe authored Dec 31, 2023
2 parents ec14b9c + c874b31 commit 6da578e
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 25 deletions.
111 changes: 99 additions & 12 deletions all.sas
Original file line number Diff line number Diff line change
Expand Up @@ -6377,15 +6377,14 @@ drop table &ds1, &ds2;
/**
* Sanitise the values based on valid value lists, then strip out
* quotes, commas, periods and spaces.
* Only numeric values should remain
*/
%local reason_cd nobs;
%let nobs=0;
data &outds;
/*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
OPERATOR_NM $10 RAW_VALUE $4000;*/
set &inds;
length reason_cd $4032 vtype $1 vnum dsid 8 tmp $4000;
set &inds end=last;
length reason_cd $4032 vtype vtype2 $1 vnum dsid 8 tmp $4000;
drop tmp;

/* quick check to ensure column exists */
Expand All @@ -6401,7 +6400,8 @@ data &outds;
end;

/* need to open the dataset to get the column type */
dsid=open("&targetds","i");
retain dsid;
if _n_=1 then dsid=open("&targetds","i");
if dsid>0 then do;
vnum=varnum(dsid,VARIABLE_NM);
if vnum<1 then do;
Expand All @@ -6411,11 +6411,19 @@ data &outds;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
return;
goto endstep;
end;
/* now we can get the type */
else vtype=vartype(dsid,vnum);
end;
else do;
REASON_CD=cats("Could not open &targetds");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
stop;
end;

/* closed list checks */
if GROUP_LOGIC not in ('AND','OR') then do;
Expand Down Expand Up @@ -6450,15 +6458,40 @@ data &outds;
end;

/* special missing logic */
if vtype='N'
and OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE')
and cats(upcase(raw_value)) in (
if vtype='N' & OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE') then do;
if cats(upcase(raw_value)) in (
'.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
'.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
)
then do;
/* valid numeric - exit data step loop */
return;
then do;
/* valid numeric - exit data step loop */
return;
end;
else if subpad(upcase(raw_value),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
vnum=varnum(dsid,subpad(raw_value,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;
end;

/* special logic */
Expand All @@ -6480,6 +6513,32 @@ data &outds;
if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
tmp=scan(raw_value1,i,',');
if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
if OPERATOR_NM ='BETWEEN' and subpad(upcase(tmp),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
/* is not valid syntax for IN or NOT IN */
vnum=varnum(dsid,subpad(tmp,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;
REASON_CD='Non Numeric value provided';
putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
call symputx('reason_cd',reason_cd,'l');
Expand All @@ -6504,14 +6563,42 @@ data &outds;

/* output records that contain values other than digits and spaces */
if notdigit(compress(raw_value3,' '))>0 then do;
if vtype='C' and subpad(upcase(raw_value),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
vnum=varnum(dsid,subpad(raw_value,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Char Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;

putlog raw_value3= $hex32.;
REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
putlog REASON_CD= raw_value= raw_value1= raw_value2= raw_value3=;
putlog (_all_)(=);
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
end;

endstep:
if last then rc=close(dsid);
run;


Expand Down
111 changes: 99 additions & 12 deletions base/mp_filtercheck.sas
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,14 @@
/**
* Sanitise the values based on valid value lists, then strip out
* quotes, commas, periods and spaces.
* Only numeric values should remain
*/
%local reason_cd nobs;
%let nobs=0;
data &outds;
/*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
OPERATOR_NM $10 RAW_VALUE $4000;*/
set &inds;
length reason_cd $4032 vtype $1 vnum dsid 8 tmp $4000;
set &inds end=last;
length reason_cd $4032 vtype vtype2 $1 vnum dsid 8 tmp $4000;
drop tmp;

/* quick check to ensure column exists */
Expand All @@ -110,7 +109,8 @@ data &outds;
end;

/* need to open the dataset to get the column type */
dsid=open("&targetds","i");
retain dsid;
if _n_=1 then dsid=open("&targetds","i");
if dsid>0 then do;
vnum=varnum(dsid,VARIABLE_NM);
if vnum<1 then do;
Expand All @@ -120,11 +120,19 @@ data &outds;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
return;
goto endstep;
end;
/* now we can get the type */
else vtype=vartype(dsid,vnum);
end;
else do;
REASON_CD=cats("Could not open &targetds");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
stop;
end;

/* closed list checks */
if GROUP_LOGIC not in ('AND','OR') then do;
Expand Down Expand Up @@ -159,15 +167,40 @@ data &outds;
end;

/* special missing logic */
if vtype='N'
and OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE')
and cats(upcase(raw_value)) in (
if vtype='N' & OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE') then do;
if cats(upcase(raw_value)) in (
'.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
'.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
)
then do;
/* valid numeric - exit data step loop */
return;
then do;
/* valid numeric - exit data step loop */
return;
end;
else if subpad(upcase(raw_value),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
vnum=varnum(dsid,subpad(raw_value,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;
end;

/* special logic */
Expand All @@ -189,6 +222,32 @@ data &outds;
if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
tmp=scan(raw_value1,i,',');
if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
if OPERATOR_NM ='BETWEEN' and subpad(upcase(tmp),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
/* is not valid syntax for IN or NOT IN */
vnum=varnum(dsid,subpad(tmp,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;
REASON_CD='Non Numeric value provided';
putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
call symputx('reason_cd',reason_cd,'l');
Expand All @@ -213,14 +272,42 @@ data &outds;

/* output records that contain values other than digits and spaces */
if notdigit(compress(raw_value3,' '))>0 then do;
if vtype='C' and subpad(upcase(raw_value),1,1) in (
'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
)
then do;
/* check if the raw_value contains a valid variable NAME */
vnum=varnum(dsid,subpad(raw_value,1,32));
if vnum>0 then do;
/* now we can get the type */
vtype2=vartype(dsid,vnum);
/* check type matches */
if vtype2=vtype then do;
/* valid target var - exit loop */
return;
end;
else do;
REASON_CD=cats("Compared Char Type (",vtype2,") is not (",vtype,")");
putlog REASON_CD= dsid=;
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
goto endstep;
end;
end;
end;

putlog raw_value3= $hex32.;
REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
putlog REASON_CD= raw_value= raw_value1= raw_value2= raw_value3=;
putlog (_all_)(=);
call symputx('reason_cd',reason_cd,'l');
call symputx('nobs',_n_,'l');
output;
end;

endstep:
if last then rc=close(dsid);
run;


Expand Down
28 changes: 27 additions & 1 deletion tests/base/mp_filtercheck.test.sas
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ AND,AND,1,age,=,.A
AND,AND,1,height,<,.B
AND,AND,1,age,IN,"(.a,.b,.)"
AND,AND,1,age,IN,"(.A)"

AND,AND,1,AGE,=,AGE
AND,AND,1,AGE,<,Weight
AND,AND,1,AGE,BETWEEN,"HEIGHT AND WEIGHT"
AND,OR,2,Name,=,name
;;;;
run;

Expand Down Expand Up @@ -204,3 +207,26 @@ run;
outds=work.test_results
)
%let syscc=0;


/* invalid IN value (cannot use var names) */
data work.inds;
infile datalines4 dsd;
input GROUP_LOGIC:$3. SUBGROUP_LOGIC:$3. SUBGROUP_ID:8. VARIABLE_NM:$32.
OPERATOR_NM:$10. RAW_VALUE:$4000.;
datalines4;
AND,AND,1,AGE,NOT IN,"(height, age)"
;;;;
run;

%mp_filtercheck(work.inds,
targetds=work.class,
outds=work.badrecords,
abort=NO
)
%let syscc=0;
%mp_assertdsobs(work.badrecords,
desc=Invalid IN syntax,
test=HASOBS,
outds=work.test_results
)

0 comments on commit 6da578e

Please sign in to comment.