From dd690d52b52efef26d2c3bfebe3eef8bc0cf3d2d Mon Sep 17 00:00:00 2001 From: Nathalie Vialaneix <nathalie.vialaneix@inrae.fr> Date: Wed, 11 Jan 2023 16:32:12 +0100 Subject: [PATCH] finished implementing 'selection' option for plot --- R/SFCB.R | 4 ++- R/plot_functions.R | 83 ++++++++++++++++++++++++++++++++++++---------- man/SFCB.Rd | 4 ++- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/R/SFCB.R b/R/SFCB.R index e4da742..8d4cd14 100644 --- a/R/SFCB.R +++ b/R/SFCB.R @@ -272,11 +272,13 @@ print.SFCB <- function(x, ...) { plot.SFCB <- function(x, plot.type = c("dendrogram", "selection", "importance"), shape.imp = c("boxplot", "histogram"), - sel.type = c("importance", "selection")) { + sel.type = c("importance", "selection"), + threshold = "none") { args <- list("x" = x) args$"plot.type" <- match.arg(plot.type) args$"shape.imp" <- match.arg(shape.imp) args$"sel.type" <- match.arg(sel.type) + args$"threshold" <- threshold plot_function <- sprintf("plot_%s", plot.type) plot_function <- eval(as.name(plot_function)) diff --git a/R/plot_functions.R b/R/plot_functions.R index 1fc915a..f0db370 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -35,9 +35,25 @@ plot_importance <- function(x, shape.imp) { return(p) } -plot_selection <- function(x, sel.type) { +plot_selection <- function(x, sel.type, threshold) { is_selected <- "selected" %in% names(x) - if (sel.type == "selection" && !is_selected) + + if (threshold != "none") { + if (!is.numeric(threshold)) stop("'threshold' must be numeric.") + + if (is_selected) + stop(paste("A selection method has already been used: 'threshold' must", + "be 'none'.")) + + if (sel.type != "selection") { + warning(paste("A 'threshold' has passed to the plot function while", + "'sel.type' is not 'selection'. Automatically switching it", + "to 'selection'.")) + sel.type <- "selection" + } + } + + if (sel.type == "selection" && !is_selected && threshold == "none") stop(paste("Variable selection was not used on this result. Choose", "sel.type = 'importance'")) @@ -46,6 +62,29 @@ plot_selection <- function(x, sel.type) { basics <- ncol(x$importances[[1]]) != nrepeats var_names <- names(x$groups[[1]]) + if (threshold != "none") { + if (!basics) { # not basics, 'at' + best <- tapply(x$mse$mse, x$mse$clust, which.min) + x$selected <- mapply(function(a, b) { rownames(a)[a[, b] > threshold] }, + x$importances, best, SIMPLIFY = FALSE) + } else { + best <- tapply(x$mse$mse, x$mse$clust, which.min) + x$selected <- mapply(function(a, b) { + bmean <- paste0("mean", b) + bmean <- rownames(a)[a[, bmean] > threshold] + bsd <- paste0("sd", b) + bsd <- rownames(a)[a[, bsd] > threshold] + bsd <- na.omit(bsd) + variables <- unique(c(bmean, bsd)) + variables <- var_names[var_names %in% variables] + out <- data.frame("variable" = variables, + "in.mean" = variables %in% bmean, + "in.sd" = variables %in% bsd) + return(out) + }, x$importances, best, SIMPLIFY = FALSE) + } + } + if (sel.type == "selection") { # not basics, 'at' if (lobj == 1 & !basics) { dfline <- make_line_df(x$selected[[1]], var_names) @@ -247,22 +286,30 @@ fill_na <- function(dataset, var) { make_line_df <- function(selected, var_names, level = 1) { which_sel <- match(selected, var_names) - diff_which <- diff(which_sel) - starts <- which(diff_which > 1) + 1 - starts <- c(which_sel[1], which_sel[starts]) - ends <- which(diff_which > 1) - ends <- c(which_sel[ends], which_sel[length(which_sel)]) - df <- data.frame("xstart" = starts, "ystart" = rep(level, length(starts)), - "xend" = ends, "yend" = rep(level, length(starts))) - which_sel <- setdiff(seq_along(var_names), which_sel) - diff_which <- diff(which_sel) - starts <- which(diff_which > 1) + 1 - starts <- c(which_sel[1], which_sel[starts]) - ends <- which(diff_which > 1) - ends <- c(which_sel[ends], which_sel[length(which_sel)]) - df2 <- data.frame("xstart" = starts, "ystart" = rep(0, length(starts)), - "xend" = ends, "yend" = rep(0, length(starts))) - df <- rbind(df, df2) + if (length(which_sel) > 0 & length(which_sel) < length(var_names)) { + diff_which <- diff(which_sel) + starts <- which(diff_which > 1) + 1 + starts <- c(which_sel[1], which_sel[starts]) + ends <- which(diff_which > 1) + ends <- c(which_sel[ends], which_sel[length(which_sel)]) + df <- data.frame("xstart" = starts, "ystart" = rep(level, length(starts)), + "xend" = ends, "yend" = rep(level, length(starts))) + which_sel <- setdiff(seq_along(var_names), which_sel) + diff_which <- diff(which_sel) + starts <- which(diff_which > 1) + 1 + starts <- c(which_sel[1], which_sel[starts]) + ends <- which(diff_which > 1) + ends <- c(which_sel[ends], which_sel[length(which_sel)]) + df2 <- data.frame("xstart" = starts, "ystart" = rep(0, length(starts)), + "xend" = ends, "yend" = rep(0, length(starts))) + df <- rbind(df, df2) + } else if (length(which_sel) == 0) { + df <- data.frame("xstart" = 1, "ystart" = 0, "xend" = length(var_names), + "yend" = 0) + } else { + df <- data.frame("xstart" = 1, "ystart" = level, "xend" = length(var_names), + "yend" = level) + } return(df) } diff --git a/man/SFCB.Rd b/man/SFCB.Rd index 48c020f..5459969 100644 --- a/man/SFCB.Rd +++ b/man/SFCB.Rd @@ -15,7 +15,9 @@ \method{plot}{SFCB}( x, plot.type = c("dendrogram", "selection", "importance"), - shape.imp = c("boxplot", "histogram") + shape.imp = c("boxplot", "histogram"), + sel.type = c("importance", "selection"), + threshold = "none" ) } \arguments{ -- GitLab