Skip to content

Commit

Permalink
modification according AE output with SOC & PT
Browse files Browse the repository at this point in the history
  • Loading branch information
NusaibahIbr committed Nov 25, 2024
1 parent e5447f8 commit d8c1d68
Showing 1 changed file with 66 additions and 142 deletions.
208 changes: 66 additions & 142 deletions R/compare_soc.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,187 +2,111 @@
# output: if difference, dataframe with each row details of difference
# if no difference, dataframe displaying table R and table SAS side by side.
# author: Nusaibah

compare_soc <- function(tabR,tabSAS){
colnames(tabR)=gsub("all_patients_G","grade",colnames(tabR))
tab=list()
if (nrow(tabR)!=nrow(tabSAS)){#warning("Different number of grade levels")
if (nrow(tabR)!=nrow(tabSAS)){#warning("Different number of SOC")
if (nrow(tabR)<nrow(tabSAS)){

tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,tabSAS[which(!(tabSAS$grade%in% tabR$grade)),"grade"])
tab$soc=c(tab$soc,tabSAS[which(!(tabSAS$soc%in% tabR$soc)),"soc"])
tab$table=c(tab$table,"R")
tab$main=c(tab$main,"Missing grade level")

# tab$Mineur=c(tab$Mineur,
# paste0("Grade ",tabSAS[!(which(tabSAS$grade%in% tabR$grade)),"grade"], " missing in R table"))
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){
tab$term=c(tab$term,tabSAS[which(!(tabSAS$term_%in% tabR$term_)),"term_"])
tab$main=c(tab$main,"Missing PT item")
}else{

tab$main=c(tab$main,"Missing SOC item")
}
}else if (nrow(tabR)>nrow(tabSAS)){
tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,tabR[which(!(tabR$grade%in% tabSAS$grade)),"grade"])
tab$soc=c(tab$soc,tabR[which(!(tabR$soc%in% tabSAS$soc)),"soc"])
tab$table=c(tab$table,"SAS")
tab$main=c(tab$main,"Missing grade level")

# tab$Mineur=c(tab$Mineur,
# paste0("Grade ",tabR[!(which(tabR$grade%in% tabSAS$grade)),"grade"], " missing in SAS table"))
if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){
tab$term=c(tab$term,tabR[which(!(tabR$term_%in% tabSAS$term_)),"term_"])
tab$main=c(tab$main,"Missing PT item")
}else{

tab$main=c(tab$main,"Missing SOC item")
}
}
}

if (ncol(tabR)!=ncol(tabSAS)){#warning("Different number of arm")
if (ncol(tabR)!=ncol(tabSAS)){#warning("Different number of column (arm or grade)")
if (ncol(tabR)<ncol(tabSAS)){

tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,tabSAS[!(which(tabSAS$grade%in% tabR$grade)),"grade"])
tab$grade=c(tab$grade,colnames(tabSAS)[-(which(colnames(tabSAS) %in% colnames(tabR)))])
tab$table=c(tab$table,"R")
tab$main=c(tab$main,"Missing arm")
tab$main=c(tab$main,"Missing grade")
tab$valueR=c(tab$valueR,NA)
tab$valueSAS=c(tab$valueSAS,"Filled")



# tab$Mineur=c(tab$Mineur,
# paste0("Grade ",tabSAS[!(which(tabSAS$grade%in% tabR$grade)),"grade"], " missing in R table"))

}else if (nrow(tabR)>nrow(tabSAS)){
}else if (ncol(tabR)>ncol(tabSAS)){

tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,tabR[!(which(tabR$grade%in% tabSAS$grade)),"grade"])
tab$grade=c(tab$grade,colnames(tabR)[-(which(colnames(tabR) %in% colnames(tabSAS)))])
tab$table=c(tab$table,"SAS")
tab$main=c(tab$main,"Missing arm")
tab$main=c(tab$main,"Missing grade")
tab$valueR=c(tab$valueR,"Filled")
tab$valueSAS=c(tab$valueSAS,NA)

# tab$Mineur=c(tab$Mineur,
# paste0("Grade ",tabR[!(which(tabR$grade%in% tabSAS$grade)),"grade"], " missing in SAS table"))

}}
if (all(dim(tabR)==dim(tabSAS)# & tabR$grade==tabSAS$grade
)){
# warning("Check: same dimension of tables & same grade") # à fusionner avec la condition similaire mais grade different
df=tabR%>%arrange(grade)%>%full_join(tabSAS,by="grade",suffix = c(".r",".sas"))

indice=df%>%
filter(grade %in% rbind(setdiff(tabR,tabSAS),setdiff(tabSAS,tabR))$grade)

indice2=which(df[,paste0(tabR%>%select(-grade)%>%colnames(),paste=".r")]!=df[,paste0(tabSAS%>%select(-grade)%>%colnames(),paste=".sas")],
arr.ind=TRUE)
indice2[,"col"]=indice2[,"col"]+1 # parce qu'on avait retiré le grade

if (all(dim(tabR)==dim(tabSAS)) | all(is.na(tabR[-(which(colnames(tabR) %in% colnames(tabSAS)))])) ){# warning("Check: same dimension of tables & same grade")
tabR=tabR[(which(colnames(tabR) %in% colnames(tabSAS)))]


if ("term_"%in% c(colnames(tabR),colnames(tabSAS))){
df=tabR%>%arrange(soc,term_)%>%
pivot_longer(-c("soc","term_"),names_to = "grade",values_to = "count")%>%
mutate(table="R")%>%
full_join(tabSAS%>%
pivot_longer(-c("soc","term_"),names_to = "grade",values_to = "count")%>%
mutate(table="SAS"),
by=c("soc","term_","grade"),
suffix = c(".r",".sas"))
indice=df[which(df$count.r!=df$count.sas | (is.na(df$count.r) & !is.na(df$count.sas)) |
(!is.na(df$count.r) & is.na(df$count.sas))
,arr.ind = T),]

}else{df=tabR%>%arrange(soc)%>%full_join(tabSAS,by="soc",suffix = c(".r",".sas"))


df=tabR%>%arrange(soc)%>%
pivot_longer(-c("soc"),names_to = "grade",values_to = "count")%>%
mutate(table="R")%>%
full_join(tabSAS%>%
pivot_longer(-c("soc"),names_to = "grade",values_to = "count")%>%
mutate(table="SAS"),
by=c("soc","grade"),
suffix = c(".r",".sas"))
indice=df[which(df$count.r!=df$count.sas | (is.na(df$count.r) & !is.na(df$count.sas)) |
(!is.na(df$count.r) & is.na(df$count.sas))
,arr.ind = T),]
}
if (nrow(indice)!=0){
# print(indice)
# warning(paste0("Comparison result: Warning! Different outputs.",
# nrow(indice)," mismatching between the two tables. Above, the indices."))
for (i in 1:nrow(indice)){
if (is.na(indice[i,"N1.r"])){

tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,indice[i,"grade"])
tab$table=c(tab$table,"R")
tab$main=c(tab$main,"Missing grade level")
tab$valueR=c(tab$valueR,NA)
tab$valueSAS=c(tab$valueSAS,paste0(indice[i,"N1.sas"],"(",indice[i,"pct1.sas"],"%)"))



# tab$Mineur=c(tab$Mineur,
# paste0("Missing grade ",indice[i,"grade"]," in table R"))
}else if (is.na(indice[i,"N1.sas"])){
tab$level=c(tab$level,"Mineur")
tab$grade=c(tab$grade,indice[i,"grade"])
tab$table=c(tab$table,"SAS")
tab$main=c(tab$main,"Missing grade level")

tab$valueR=c(tab$valueR,paste0(indice[i,"N1.r"],"(",indice[i,"pct1.r"],"%)"))
tab$valueSAS=c(tab$valueSAS,NA)



# tab$Mineur=c(tab$Mineur,
# paste0("Missing grade ",indice[i,"grade"]," in table SAS"))
}else if(indice2[i,"col"]%%2==0){

tab$level=c(tab$level,"MAJEUR")
tab$grade=c(tab$grade,df[indice2[i,"row"],"grade"])
tab$table=c(tab$table,"Both")
tab$main=c(tab$main,"Different number")
tab$valueR=c(tab$valueR,tabR[indice2[i,"row"],indice2[i,"col"]])
tab$valueSAS=c(tab$valueSAS,tabSAS[indice2[i,"row"],indice2[i,"col"]])



# tab$MAJEUR=c(tab$MAJEUR,
# paste0("Grade ",df[indice[i,"row"],"grade"],
# ". Different number: table SAS N=",tabSAS[indice[i,"row"],indice[i,"col"]], "; table R N=", tabR[indice[i,"row"],indice[i,"col"]]))
}else if (indice2[i,"col"]%%2==1){


tab$level=c(tab$level,"MAJEUR")
tab$grade=c(tab$grade,df[indice2[i,"row"],"grade"])
tab$table=c(tab$table,"Both")
tab$main=c(tab$main,"Different percentage")
tab$valueR=c(tab$valueR,tabR[indice2[i,"row"],indice2[i,"col"]])
tab$valueSAS=c(tab$valueSAS,tabSAS[indice2[i,"row"],indice2[i,"col"]])




# tab$MAJEUR=c(tab$MAJEUR,
# paste0("Grade ",df[indice[i,"row"],"grade"],
# ". Different percentage: table SAS ",tabSAS[indice[i,"row"],indice[i,"col"]], "%; table R ", tabR[indice[i,"row"],indice[i,"col"]],"%."))
}

tab =indice%>%
mutate(level="MAJEUR",
table="Both",
main="Different value")%>%
dplyr::rename(valueR="count.r",
valueSAS="count.sas")%>%
select(-c("table.r","table.sas"))

}
}



# else if(all(dim(tabR)==dim(tabSAS)) & !all(tabR$grade==tabSAS$grade)){
#
#
# warning("Check: same dimension of tables BUT missing grade(s)")
# df=tabR%>%arrange(grade)%>%full_join(tabSAS,by="grade",suffix = c(".r",".sas"))
#
#
# indice=df%>%
# filter(grade %in% c(setdiff(tabR$grade,tabSAS$grade),setdiff(tabSAS$grade,tabR$grade)))
#
#
# if (nrow(indice)!=0){
# print(indice)
# warning(paste0("Comparison result: Warning! Different outputs. ",
# nrow(indice)," mismatching between the two tables. Above, the indices."))
# for (i in 1:nrow(indice)){
#
# if (is.na(indice[i,"N1.r"])){
#
# tab$level=c(tab$level,"Mineur")
# tab$grade=c(tab$grade,indice[i,"grade"])
# tab$table=c(tab$table,"R")
# tab$main=c(tab$main,"Missing grade levels")
#
#
# # tab$Mineur=c(tab$Mineur,
# # paste0("Missing grade ",indice[i,"grade"]," in table R"))
# }else if (is.na(indice[i,"N1.sas"])){
# tab$level=c(tab$level,"Mineur")
# tab$grade=c(tab$grade,indice[i,"grade"])
# tab$table=c(tab$table,"SAS")
# tab$main=c(tab$main,"Missing grade levels")
#
#
#
# # tab$Mineur=c(tab$Mineur,
# # paste0("Missing grade ",indice[i,"grade"]," in table SAS"))
# }
#
# }
#
#
# }
#
}else{ # warning("Comparison result: same outputs")
tab=cbind(data.frame("R table"=""),tabR,data.frame("SAS table"=""),tabSAS)
}else{ # warning("Comparison result: same outputs")
tab=cbind(data.frame("R table"=""),tabR,data.frame("SAS table"=""),tabSAS)
}
tab=as.data.frame(tab)
return(tab)
}


0 comments on commit d8c1d68

Please sign in to comment.