diff --git a/R/treediff.R b/R/treediff.R index 2532a91f90f4d2d4043d2abb99eabdbd5f521a98..023d7f134029c2ee313ed79add47d45326e7b88d 100644 --- a/R/treediff.R +++ b/R/treediff.R @@ -2,6 +2,9 @@ #' #' @description Perform the treediff test to compare two sets of trees. #' +#' @aliases summary.treediff +#' @aliases print.treeTest +#' #' @details This function compares two sets of trees using a p-value aggregation #' method. The p-values are obtained by the treediff method, as described in #' (Neuvial \emph{et al.}, 2023). @@ -16,8 +19,13 @@ #' different from that of \code{trees1}. #' @param replicates A numeric vector of length 2 with the number of replicates #' for each condition. +#' @param scale Logical. If \code{TRUE}, the trees are all rescalled to have a +#' minimum height equal to 0 (translation) and a maximum height equal to 1 +#' (dilatation). +#' @param order_labels Logical. If \code{TRUE}, take a reference tree and keep +#' track of leave order. #' -#' @return An object of class \code{treeTest} with the following entries: +#' @return An object of class \code{treeTest} with the following entries: #' \itemize{ #' \item{p.value}{ the p-value for the treediff test.} #' \item{statistic}{ the value of the Student's statistic of each leaf pair of @@ -30,7 +38,7 @@ #' conditions.} #' } #' -#' @author Gwendaelle Cardenac\cr +#' @author Gwendaëlle Cardenas\cr #' Marie Chavent \email{marie.chavent@u-bordeaux.fr}\cr #' Sylvain Foissac \email{sylvain.foissac@inrae.fr}\cr #' Pierre Neuvial \email{pierre.neuvial@math.univ-toulouse.fr}\cr @@ -88,7 +96,9 @@ #' ## 4 p-values, one for each cluster #' tree_pvals$p.value -treediff <- function(trees1, trees2, replicates) { +treediff <- function(trees1, trees2, replicates, scale = FALSE, + order_labels = FALSE) { + # Check if `replicates` is numeric vector if (inherits(replicates, "numeric") != TRUE) { stop("`replicates` is not a numeric vector") @@ -101,8 +111,8 @@ treediff <- function(trees1, trees2, replicates) { # Check if the number of clusters is equal for both conditions if (length(trees1) / replicates[1] != length(trees2) / replicates[2]) { - stop(paste("The number of clusters is different between conditions (or", - "`replicates` is not correct).")) + stop("The number of clusters is different between conditions (or ", + "`replicates` is not correct).") } # Check the number of leaves is the same for each cluster @@ -120,14 +130,36 @@ treediff <- function(trees1, trees2, replicates) { # Merge trees from both conditions trees <- c(trees1, trees2) + ## if order_labels is TRUE, take a reference tree and keep track of leave order + if (order_labels) { + ref_order <- trees1[[1]]$labels + labels_perm <- lapply(trees, function(atree) match(ref_order, atree$labels)) + } + # Compute cophenetic distance coph_dist <- sapply(trees, cophenetic, simplify = FALSE) + # normalize + if (scale) { + coph_dist <- normalize_trees(coph_dist) + } + # Convert cophenetic distances to vector - coph_vect <- lapply(coph_dist, function(adist) { - adist <- as.matrix(adist) - return(adist[upper.tri(adist)]) - }) + if (order_labels) { + coph_vect <- mapply(function(adist, anorder) { + adist <- as.matrix(adist) + adist <- adist[anorder, anorder] + return(adist[upper.tri(adist)]) + }, coph_dist, labels_perm) + coph_vect <- lapply(1:ncol(coph_vect), function(col) { + return(as.vector(coph_vect[,col])) + }) + } else { + coph_vect <- lapply(coph_dist, function(adist) { + adist <- as.matrix(adist) + return(adist[upper.tri(adist)]) + }) + } # Compute squeeze factor outs <- compute_squeeze(coph_vect, replicates) @@ -156,13 +188,25 @@ treediff <- function(trees1, trees2, replicates) { return(out) } -print.treeTest <- function(x, prefix = "\t",...){ +#' @export +#' @rdname treediff + +print.treeTest <- function(x, prefix = "\t",...) { cat("\n") cat(strwrap(x$method, prefix = prefix), sep = "\n") cat("\n") cat("data: ", x$data.name, "\n", sep = "") - options("max.print" = 5) - print(colsplit(x$p.value, " ", "p.value :")) + print(colsplit(x$p.value, " ", "p.values :"), max = 5) +} + +#' @method summary treediff +#' @export +#' @rdname treediff + +summary.treediff <- function(object, ...) { + cat("\nSummary\n\n") + cat(" Class : ", class(object),"\n\n") + print(object) } compute_squeeze <- function(dist_coph, replicates) { @@ -261,3 +305,12 @@ compute_pvalue <- function(average_coph, squeezed_var, replicates) { # Return the output data frame return(out) } + +normalize_trees <- function(coph_dist) { + # tree_cophs is a list of 'dist' objects as returned by the function cophenetic + all_minima <- sapply(coph_dist, min) + out <- mapply("-", coph_dist, all_minima, SIMPLIFY = FALSE) + all_maxima <- sapply(out, max) + out <- mapply("/", out, all_maxima, SIMPLIFY = FALSE) + return(out) +}