Skip to content

Commit

Permalink
Address issue #9
Browse files Browse the repository at this point in the history
- Implement a function to remove constant metrics and categorical metrics
- Implement a function to get correlation groups according to VarClus
  • Loading branch information
Jirayus committed Mar 25, 2019
1 parent a692a3e commit 16baa39
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 19 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(plot,summarizedvc)
export(AutoSpearman)
export(check.constant.categorical)
export(fit)
export(get.importance)
export(loadDefectDataset)
Expand Down
27 changes: 8 additions & 19 deletions R/AutoSpearman.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @param vif.threshold a numeric for a threshold of VIF score (default = 5)
#' @param verbose TRUE for printing
#' @keywords AutoSpearman
#' @examples
#' @examples
#' Data = loadDefectDataset('groovy-1_5_7','jira')
#' AutoSpearman(dataset = Data$data, metrics = Data$indep)
#' @export
Expand All @@ -22,25 +22,14 @@ AutoSpearman <-
spearman.threshold = 0.7,
vif.threshold = 5,
verbose = F) {
# Check constant metrics and categorical metrics
metrics <- check.constant.categorical(dataset, metrics)

# Check constant metrics
constant <- apply(dataset[, metrics], 2, function(x) max(x) == min(x))
constant <- names(constant[constant == TRUE])
# Remove constant metrics
if(length(constant) > 0){
metrics <- metrics[!metrics %in% constant]
}

# Check categorical metrics
category <- sapply(dataset[, metrics], class)
category <- names(category[category=="character"])
# Remove categorical metrics from Spearman Analysis
if(length(category) > 0){
metrics <- metrics[!metrics %in% category]
}
spearman.metrics <-
get.automated.spearman(dataset, metrics, spearman.threshold, verbose)
AutoSpearman.metrics <-
stepwise.vif(dataset, spearman.metrics, vif.threshold, verbose)

spearman.metrics <- get.automated.spearman(dataset, metrics, spearman.threshold, verbose)
AutoSpearman.metrics <- stepwise.vif(dataset, spearman.metrics, vif.threshold, verbose)

return(c(AutoSpearman.metrics,category))
return(AutoSpearman.metrics)
}
32 changes: 32 additions & 0 deletions R/check.constant.categorical.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#' Check for constant metrics and categorical metrics
#'
#' @param dataset a data frame for data
#' @param metrics a characters or a vector of characters for independent variables
#' @keywords constant categorical
#' @examples
#' Data = loadDefectDataset('groovy-1_5_7','jira')
#' check.constant.categorical(dataset = Data$data, metrics = Data$indep)
#' @export
check.constant.categorical <-
function(dataset,
metrics) {
# Check constant metrics
constant <-
apply(dataset[, metrics], 2, function(x)
max(x) == min(x))
constant <- names(constant[constant == TRUE])
# Remove constant metrics
if (length(constant) > 0) {
metrics <- metrics[!metrics %in% constant]
}

# Check categorical metrics
category <- sapply(dataset[, metrics], class)
category <- names(category[category == "character"])
# Remove categorical metrics from Spearman Analysis
if (length(category) > 0) {
metrics <- metrics[!metrics %in% category]
}

return(metrics)
}
4 changes: 4 additions & 0 deletions R/get.vc.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#' @keywords VarClus

get.vc <- function(dataset, metrics, similarity = 'spearman', varclus.threshold = 0.7){

# Check constant metrics and categorical metrics
metrics <- check.constant.categorical(dataset, metrics)

f <- as.formula(paste("~", paste(metrics, collapse = " + ")))
vc <-
Hmisc::varclus(f,
Expand Down
30 changes: 30 additions & 0 deletions R/get.vc.correlation.groups.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Get correlation groups according to VarClus based on the absolute Spearman correlation coefficients between metrics
#'
#' This function makes life simple by providing a VarClus.
#' @param dataset a data frame for data
#' @param metrics a vector of characters or a vector of characters for independent variables
#' @param similarity a character for similarity measures (e.g., Spearman rank correlation), default = spearman
#' @param varclus.threshold a numeric for correlation coefficient threshold value
#' @importFrom Hmisc varclus
#' @keywords VarClus

get.vc.correlation.groups <- function(dataset, metrics, similarity = 'spearman', varclus.threshold = 0.7){

# Check constant metrics and categorical metrics
metrics <- check.constant.categorical(dataset, metrics)

f <- as.formula(paste("~", paste(metrics, collapse = " + ")))
vc <-
Hmisc::varclus(f,
similarity = similarity,
data = dataset[, metrics],
trans = "abs")

var.clusters <-
cutree(vc$hclust, h = (1 - varclus.threshold))
melted.data <- melt(var.clusters)
varclus.correlation.groups <- data.frame(metrics = row.names(melted.data), rank = var.clusters)
row.names(varclus.correlation.groups) <- NULL

return(varclus.correlation.groups)
}
22 changes: 22 additions & 0 deletions man/check.constant.categorical.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/get.vc.correlation.groups.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 16baa39

Please sign in to comment.