From 3c985031d53e3f57924f30fceefa10432aa54100 Mon Sep 17 00:00:00 2001
From: Nathalie Vialaneix <nathalie.vialaneix@inrae.fr>
Date: Sat, 11 Mar 2023 15:23:00 +0100
Subject: [PATCH] methods: added the file missing two commits ahead (moved
 methods in a dedicated file)

---
 R/sfcb_methods.R | 273 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 273 insertions(+)
 create mode 100644 R/sfcb_methods.R

diff --git a/R/sfcb_methods.R b/R/sfcb_methods.R
new file mode 100644
index 0000000..1a048b3
--- /dev/null
+++ b/R/sfcb_methods.R
@@ -0,0 +1,273 @@
+# Methods for SFCB-class ####
+#' @title Methods for SFCB objects
+#' @name SFCB-class
+#' @export
+#' @aliases summary.SFCB
+#' @aliases print.SFCB
+#' @aliases plot.SFCB
+#' @aliases extract_at.SFCB
+#' @aliases quality.SFCB
+#' @aliases extract_at
+#' @aliases quality
+#' @aliases SFCB-class
+#' @description Print, plot, manipulate or compute quality for outputs of the
+#' \code{\link{sfcb}} function (\code{SFCB} object)
+#' @param object a \code{SFCB} object
+#' @param x a \code{SFCB} object
+#' @param ... not used
+#' @param plot.type type of the plot. Default to \code{"dendrogram"} (see
+#' Details)
+#' @param sel.type when \code{plot.type == "selection"}, criterion on which to
+#' base the selection. Default to \code{"importance"}
+#' @param threshold numeric. When \code{plot.type == "importance"}, numeric
+#' threshold to perform a selection (if none has been done before for \code{x})
+#' based on thresholding of importance. Default to \code{"none"} in which case
+#' no thresholding is performed
+#' @param shape.imp when \code{plot.type == "importance"}, type of plot to
+#' represent the importance. Default to \code{"boxplot"}
+#' @param quality.crit character vector (length 1 or 2) indicating one or two
+#' quality criteria to display. The values have to be taken in \{\code{"mse"},
+#' \code{"Precision"}, \code{"Recall"}, \code{"ARI"}, \code{"NMI"}\}
+#' @param ... not used
+#' @param at numeric vector. Set of the number of intervals to extract for 
+#' @param ground_truth numeric vector of ground truth. Target variables 
+#' to compute qualities correspond to non-zero entries of this vector
+#' @param threshold numeric value. If not \code{NULL}, selection of variables to
+#' compute qualities is based on a threshold of importance values
+#' \code{extract_at}
+#' @author {Remi Servien, \email{remi.servien@inrae.fr}\cr
+#' Nathalie Vialaneix, \email{nathalie.vialaneix@inrae.fr}}
+#' @details The \code{plot} functions can be used in four different ways to 
+#' extract information from the \code{SFCB} object: \itemize{
+#'  \item \code{plot.type == "dendrogram"} displays the dendrogram obtained at
+#'  the clustering step of the method. Depending on the cases, the dendrogram
+#'  comes with additional information on clusters, variable selections and/or
+#'  importance values;
+#'  \item \code{plot.type == "selection"} displays either the evolution of the
+#'  importance for the simulation with the best (smallest) MSE for each time
+#'  step in the range of the functional predictor or the evolution of the
+#'  selected intervals along the whole range of the functional prediction also 
+#'  for the best MSE;
+#'  \item \code{plot.type == "importance"} displays a summary of the importance
+#'  values over the whole range of the functional predictor and for the 
+#'  different experiments. This summary can take the form of a boxplot or of
+#'  an histogram;
+#'  \item \code{plot.type == "quality"} displays one or two quality distribution
+#'  with respect to the different experiments and different number of intervals.
+#' }
+#' @seealso \code{\link{sfcb}}
+#' @examples 
+#' data(truffles)
+#' out1 <- sfcb(rainfall, truffles, group.method = "adjclust", 
+#'              summary.method = "pls", selection.method = "relief")
+#' summary(out1)
+#' 
+#' \dontrun{
+#' plot(out1)
+#' plot(out1, plot.type = "selection")
+#' plot(out1, plot.type = "importance")
+#' }
+#' 
+#' out2 <- sfcb(rainfall, truffles, group.method = "adjclust", 
+#'              summary.method = "basics", selection.method = "none",
+#'              range.at = c(5, 7))
+#' out3 <- extract_at(out2, at = 6)
+#' summary(out3)
+#' 
+summary.SFCB <- function(object, ...) {
+  cat("\nCall:\n")
+  print(object$call)
+  cat("\nSFCB object with:\n")
+  lobj <- length(object$groups)
+  if (lobj == 1) {
+    cat("    -", length(unique(object$groups[[1]])), "interval(s)\n")
+  } else {
+    minl <- length(unique(object$groups[[1]]))
+    maxl <- length(unique(object$groups[[lobj]]))
+    cat("    -", minl, "-", maxl, "interval(s)\n")
+  }
+  if ("selected" %in% names(object)) {
+    if (!is.null(ncol(object$selected[[1]]))) {
+      selected <- lapply(object$selected, function(alist) {
+        apply(alist[, 2:3], 1, sum)
+      })
+    } else selected <- object$selected
+    if (lobj == 1) {
+      selected <- object$groups[[1]][selected[[1]]]
+      cat("    -", length(unique(selected)), "selected interval(s)\n")
+    } else {
+      selected <- mapply(function(a, b) a[b], object$groups, selected)
+      selected <- sapply(selected, function(x) length(unique(x)))
+      cat("    -", min(selected), "-", max(selected), "selected interval(s)\n")
+    }
+  }
+  cat("    -", nrow(object$mse) / lobj, "repeats\n")
+  cat("    - MSE ranging in [", min(object$mse$mse), ", ", max(object$mse$mse),
+      "]\n", sep = "")
+  if ("computational.times" %in% names(object)) {
+    cat("    - computational time (total):", sum(object$computational.times), 
+        "(seconds)\n\n")
+  }
+  if ("quality" %in% names(object)) {
+    cat("    - precision wrt ground truth in [", 
+        min(object$quality$Precision, na.rm = TRUE), ", ", 
+        max(object$quality$Precision, na.rm = TRUE), "]\n", sep = "")
+    cat("    - recall wrt ground truth in [", 
+        min(object$quality$Recall, na.rm = TRUE), ", ", 
+        max(object$quality$Recall, na.rm = TRUE), "]\n", sep = "")
+    if ("threshold" %in% names(object))
+      cat("    for threshold: ", object$threshold, "\n", sep = "")
+    cat("\n")
+  }
+  return(invisible(NULL))
+}
+
+#' @export
+#' @rdname SFCB-class
+print.SFCB <- function(x, ...) {
+  summary(x)
+}
+
+#' @rdname SFCB-class
+#' @export
+plot.SFCB <- function(x, ...,
+                      plot.type = c("dendrogram", "selection", "importance",
+                                    "quality"),
+                      sel.type = c("importance", "selection"),
+                      threshold = "none", shape.imp = c("boxplot", "histogram"),
+                      quality.crit = "mse") {
+  args <- list("x" = x)
+  plot.type <- match.arg(plot.type)
+  args$"shape.imp" <- match.arg(shape.imp)
+  args$"sel.type" <- match.arg(sel.type)
+  args$"threshold" <- threshold
+  args$"quality.crit" <- quality.crit
+  
+  plot_function <- sprintf("plot_%s", plot.type)
+  plot_function <- eval(as.name(plot_function))
+  args <- args[names(formals(plot_function))]
+  
+  p <- do.call("plot_function", args)
+  if (plot.type != "dendrogram") return(p)
+  
+  return(invisible())
+}
+
+#' @rdname SFCB-class
+#' @export
+extract_at <- function(object, at) {
+  UseMethod("extract_at")
+}
+
+#' @export
+extract_at.SFCB <- function(object, at) {
+  
+  extract_call <- match.call()
+  
+  if (!is.numeric(at)) stop("'at' must be a numeric vector")
+  orig_at <- names(object$groups)
+  at <- sort(at)
+  selected <- match(at, orig_at)
+  if (anyNA(selected)) {
+    stop("'at' must be included in the range of tested groups for 'object'.")
+  }
+  
+  out <- object
+  out$groups <- out$groups[selected]
+  out$summaries <- out$summaries[selected]
+  out$mse <- out$mse[out$mse$clust %in% at, ]
+  out$importances <- out$importances[selected]
+  if ("computational.times" %in% names(out)) out$"computational.times" <- NULL
+  if ("selected" %in% names(out)) out$selected <- out$selected[selected]
+  
+  out$call <- extract_call
+  
+  return(out)
+}
+
+#' @rdname SFCB-class
+#' @export
+quality <- function(object, ground_truth, threshold = NULL) {
+  UseMethod("quality")
+}
+
+#' @export
+#' @importFrom aricode ARI NID NMI AMI NVI
+quality.SFCB <- function(object, ground_truth, threshold = NULL) {
+  
+  if (length(ground_truth) != length(object$groups[[1]]))
+    stop(paste("'ground_truth' must have a length identical to initial number ",
+               "of variables."))
+
+  if (!is.null(threshold) && !is.numeric(threshold) && threshold <= 0) 
+    stop("'threshold' must be a positive number or NULL.")
+  
+  if (is.null(threshold) & !("selected" %in% names(object))) 
+    stop(paste("No selected interval in this 'SFCB' object and no 'threshold'",
+               "provided. Can not compute quality measures..."))
+  
+  all_at <- unique(object$mse$clust)
+  basics <- ncol(object$importances[[1]]) != (nrow(object$mse) / length(all_at))
+  out_obj <- object
+  
+  ground_truth_f <- sapply(ground_truth != 0, ifelse, yes = 1, no = 0)
+  ground_truth_f <- factor(ground_truth_f)
+  range_names <- names(object$groups)
+  var_names <- names(object$groups[[1]])
+  if (!is.null(threshold)) {
+    if (basics) {
+      object$importances <- lapply(object$importances, function(alist) {
+        colmean <- grep("mean", colnames(alist))
+        colsd <- grep("sd", colnames(alist))
+        impm <- lapply(seq_along(colmean), function(ind) {
+          pmax(alist[, colmean[ind]], alist[, colsd[ind]], na.rm = TRUE)
+        })
+        impm <- Reduce(cbind, impm)
+        rownames(impm) <- rownames(alist)
+        return(impm)
+      })
+    }
+    nbrep <- ncol(object$importances[[1]])
+    out <- lapply(object$importances, function(imp) {
+      quals <-  lapply(1:ncol(imp), 
+                       function(col) rownames(imp)[imp[, col] > threshold])
+      quals <- lapply(quals, compute_qualities, var_names, ground_truth_f)
+      quals <- Reduce(rbind, quals)
+      return(quals)
+    })
+    out <- data.frame("clust" = rep(range_names, sapply(out, nrow)),
+                      "repeats" = rep(1:nbrep, length(all_at)),
+                      Reduce(rbind, out))
+    rownames(out) <- NULL
+  } else {
+    if (basics) {
+      object$selected <- lapply(object$selected, function(alist) {
+        out <- alist$"in.mean" | alist$"in.sd"
+        out <- alist$variable[out]
+        return(out)
+      })
+    }
+    out <- lapply(object$selected, compute_qualities, var_names = var_names, 
+                  ground_truth_f = ground_truth_f)
+    out <- data.frame("clust" = range_names, Reduce(rbind, out))
+    rownames(out) <- NULL
+  }
+  
+  out_obj$truth <- ground_truth
+  out_obj$quality <- out
+  if (!is.null(threshold)) out_obj$threshold <- threshold
+  return(out_obj)
+}
+
+compute_qualities <- function(asel, var_names, ground_truth_f) {
+  sel_f <- sapply(var_names %in% asel, ifelse, yes = 1, no = 0)
+  sel_f <- factor(sel_f)
+  nbpos <- sum(sel_f == "1" & ground_truth_f == "1")
+  tpr <- nbpos / sum(sel_f == "1")
+  recall <- nbpos / sum(ground_truth_f == "1")
+  ari <- ARI(sel_f, ground_truth_f)
+  nmi <- NMI(sel_f, ground_truth_f)
+  out <- data.frame("Precision" = tpr, "Recall" = recall, "ARI" = ari, 
+                    "NMI" = nmi)
+  return(out)
+}
-- 
GitLab