diff --git a/DESCRIPTION b/DESCRIPTION index ebe51cb97b9563163b70851fca93a7d65034bffe..6cb44998bda58e6edb201fbfa4cb85c5b53f589f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Authors@R: person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), comment = c(ORCID = "YOUR-ORCID-ID")) Description: What the package does (one paragraph). -Imports: dplyr, limma, stats +Imports: dplyr, limma, reshape2, stats License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a license Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 0f83aa016788f2d0a0b5756222042acc216645ab..33c009a882565055865fa9060a8bee8058798e0a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(fisher_mix) +S3method(print,treeTest) +S3method(summary,treediff) export(treediff) importFrom(dplyr,"%>%") importFrom(dplyr,group_by) @@ -9,4 +10,3 @@ importFrom(limma,squeezeVar) importFrom(reshape2,colsplit) importFrom(stats,cophenetic) importFrom(stats,pt) -importFrom(stats,rf) diff --git a/R/treediff.R b/R/treediff.R index 023d7f134029c2ee313ed79add47d45326e7b88d..7e95dcd1b97f39421ef0736905d84e069601a6e4 100644 --- a/R/treediff.R +++ b/R/treediff.R @@ -20,8 +20,7 @@ #' @param replicates A numeric vector of length 2 with the number of replicates #' for each condition. #' @param scale Logical. If \code{TRUE}, the trees are all rescalled to have a -#' minimum height equal to 0 (translation) and a maximum height equal to 1 -#' (dilatation). +#' minimum height equal to 0 and a maximum height equal to 1. #' @param order_labels Logical. If \code{TRUE}, take a reference tree and keep #' track of leave order. #' @@ -60,7 +59,6 @@ #' @importFrom stats cophenetic #' @importFrom stats pt #' -#' #' @examples #' leaves <- c(100, 120, 50, 80) #' @@ -139,7 +137,7 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE, # Compute cophenetic distance coph_dist <- sapply(trees, cophenetic, simplify = FALSE) - # normalize + # Normalize if (scale) { coph_dist <- normalize_trees(coph_dist) } @@ -151,6 +149,8 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE, adist <- adist[anorder, anorder] return(adist[upper.tri(adist)]) }, coph_dist, labels_perm) + + # Convert the result to a list of vectors coph_vect <- lapply(1:ncol(coph_vect), function(col) { return(as.vector(coph_vect[,col])) }) @@ -191,11 +191,17 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE, #' @export #' @rdname treediff -print.treeTest <- function(x, prefix = "\t",...) { +print.treeTest <- function(x, ...) { + + # print method cat("\n") - cat(strwrap(x$method, prefix = prefix), sep = "\n") + cat(strwrap(x$method, prefix = "\t"), sep = "\n") cat("\n") + + # Print the name of the data used in the test cat("data: ", x$data.name, "\n", sep = "") + + # print the first 5 p-values print(colsplit(x$p.value, " ", "p.values :"), max = 5) } @@ -204,9 +210,25 @@ print.treeTest <- function(x, prefix = "\t",...) { #' @rdname treediff summary.treediff <- function(object, ...) { - cat("\nSummary\n\n") - cat(" Class : ", class(object),"\n\n") + + # Print a header for the summary + cat("\nSummary\n") + + # Print the class of the object + cat("\tClass : ", class(object)) + cat("\n") + + # Print the test output print(object) + cat("\n") + + # Print a summary of the p.value + cat("p.value\n") + print(summary(object$p.value)) + cat("\n") + + # Print a summary of the `statistic` and `p.value.indiv` + summary(data.frame("statistic" = object$statistic, "p.value.indiv" = object$p.value.indiv)) } compute_squeeze <- function(dist_coph, replicates) { @@ -254,16 +276,19 @@ compute_squeeze <- function(dist_coph, replicates) { colSums(Reduce(rbind, dist_coph[where_clust])^2) }) + # `data.frame` with the sum of squared values for both groups and the cluster + # number sum_sq_coph <- data.frame("set1" = Reduce(c, sum_sq_coph_trees1), "set2" = Reduce(c, sum_sq_coph_trees2), "cluster" = rep(unique(clusters1), cluster_length)) + # Compute the variance using the sum of squared values and the average values variances <- sum_sq_coph[, 1:2] - sq_average_coph variances <- rowSums(variances) variances <- as.vector(variances / (n1 + n2 - 2)) # Calculate squeezed variance - squeezed_var <- squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE) + squeezed_var <- suppressWarnings(squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE)) # Return list of average_coph and squeezed_var return(list("average_coph" = average_coph, "squeezed_var" = squeezed_var)) @@ -307,10 +332,19 @@ compute_pvalue <- function(average_coph, squeezed_var, replicates) { } normalize_trees <- function(coph_dist) { - # tree_cophs is a list of 'dist' objects as returned by the function cophenetic + + # Find the minimum value for each tree and store in all_minima all_minima <- sapply(coph_dist, min) + + # Subtract all_minima from each tree in coph_dist and store in out out <- mapply("-", coph_dist, all_minima, SIMPLIFY = FALSE) + + # Find the maximum value for each tree and store in all_maxima all_maxima <- sapply(out, max) + + # Divide each distance by all_maxima for each tree and store in out out <- mapply("/", out, all_maxima, SIMPLIFY = FALSE) + + # Return coph_dist normalized return(out) } diff --git a/man/treediff.Rd b/man/treediff.Rd index 4c69c8a1d6045c497c9b6b2a89d60d392af7f387..0287b3650e82aca7d26f6b98f7079412f3843226 100644 --- a/man/treediff.Rd +++ b/man/treediff.Rd @@ -2,9 +2,15 @@ % Please edit documentation in R/treediff.R \name{treediff} \alias{treediff} +\alias{summary.treediff} +\alias{print.treeTest} \title{Treediff} \usage{ -treediff(trees1, trees2, replicates) +treediff(trees1, trees2, replicates, scale = FALSE, order_labels = FALSE) + +\method{print}{treeTest}(x, ...) + +\method{summary}{treediff}(object, ...) } \arguments{ \item{trees1}{A list of trees corresponding to the first condition (set). @@ -19,6 +25,12 @@ different from that of \code{trees1}.} \item{replicates}{A numeric vector of length 2 with the number of replicates for each condition.} + +\item{scale}{Logical. If \code{TRUE}, the trees are all rescalled to have a +minimum height equal to 0 and a maximum height equal to 1.} + +\item{order_labels}{Logical. If \code{TRUE}, take a reference tree and keep +track of leave order.} } \value{ An object of class \code{treeTest} with the following entries: @@ -84,7 +96,7 @@ structure between families of trees. \emph{Preprint submitted for publication}. } \author{ -Gwendaelle Cardenac\cr +Gwendaëlle Cardenas\cr Marie Chavent \email{marie.chavent@u-bordeaux.fr}\cr Sylvain Foissac \email{sylvain.foissac@inrae.fr}\cr Pierre Neuvial \email{pierre.neuvial@math.univ-toulouse.fr}\cr diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000000000000000000000000000000000000..5ac89fa3504df350235873abdfb19728b3937d1f --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library("testthat") +library("treediff") + +test_check("treediff") \ No newline at end of file diff --git a/tests/testthat/test-treediff.R b/tests/testthat/test-treediff.R index 66b335215c8d3be3a00c2577eab2fc506d8a886f..cacb50d1bfe867d1c7b1e2900c71d8770172506f 100644 --- a/tests/testthat/test-treediff.R +++ b/tests/testthat/test-treediff.R @@ -1,17 +1,21 @@ +# 4 clusters leaves <- c(100,120,50,80) -trees <- sapply(leaves, FUN = function(l){ - base_data <- matrix(rnorm(2000), nrow = l, ncol = 200) +# Generate 4 trees from trees1 and trees from trees2 for each number of leaves +trees <- sapply(leaves, FUN = function(leaf){ + base_data <- matrix(rnorm(2000), nrow = leaf, ncol = 200) set1 <- replicate(4, sample(1:100, 50, replace = FALSE)) set2 <- replicate(6, sample(101:200, 50, replace = FALSE)) + # Compute hierarchical clustering for each tree in set1 trees1 <- apply(set1, 2, function(asample) { samples <- base_data[, asample] out <- hclust(dist(samples), method = "ward.D2") return(out) }) + # Compute hierarchical clustering for each tree in set2 trees2 <- apply(set2, 2, function(asample) { samples <- base_data[, asample] out <- hclust(dist(samples), method = "ward.D2") @@ -20,15 +24,109 @@ trees <- sapply(leaves, FUN = function(l){ return(list("trees1" = trees1, "trees2" = trees2)) }) +# Unlist the trees trees1 <- unlist(trees[1,], recursive = FALSE) trees2 <- unlist(trees[2,], recursive = FALSE) + +# Set the number of replicates replicates = c(4, 6) test_that("'treediff' works for simple cases", { + + # Call the function to be tested res <- treediff(trees1, trees2, replicates) + # Check the output object has the expected names expect_named(res, c("method", "data.name", "p.value", "statistic", "p.value.indiv")) - expect_equal(length(res$statistic), length(res$p.value.indiv)) + # Check the output object has the expected class + expect_s3_class(res, "treeTest") + + # Check that `p.value` is a numeric value + expect_is(res$p.value, "numeric") + + # Check that `statistic` is a numeric value + expect_is(res$statistic, "numeric") + + # Check that `p.value.indiv` is a numeric value + expect_is(res$p.value.indiv, "numeric") + + # Check that `method` is a character string + expect_is(res$method, "character") + + # Check that `data.name` is a character string + expect_is(res$data.name, "character") + + # Check the length of `statistic` matches `p.value.indiv` + expect_length(res$statistic, length(res$p.value.indiv)) + + # Check the length of `p-value` matches the number of clusters + expect_length(res$p.value, 4) + + # Check the length `statistic` matches the expected number of pairwise + # comparisons between the clusters + expect_length(res$statistic, sum(choose(leaves, 2))) +}) + +## Test errors + + +test_that("Test errors", { + # Test if replicates is numeric vector + expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = "abc"), + "`replicates` is not a numeric vector") + + # Test if replicates is a vector of length 2 + expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = c(1,2,3)), + "`replicates` must be a vector of length 2.") + + # Test if output is a list with class treeTest + out <- treediff(trees1, trees2, c(4,6)) + expect_true(inherits(out, "treeTest")) + + # Test if the number of leaves in each cluster is the same between the two + # sets of trees + trees2[[5]]$order <- c(trees2[[5]]$order, 101) + expect_error(treediff(trees1, trees2, c(4, 6)), "the number of leaves in one or more clusters is different between the two sets of trees.") +}) + + +# compute_squeeze +test_that("Test for compute_squeeze", { + + # Create a test data matrix + test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20) + replicates <- c(10,10) + + # Call the function + res <- compute_squeeze(test_matrix, replicates) + + # Compare the result with the expected value + expect_named(res, c("average_coph", "squeezed_var")) + expect_is(res, "list") + expect_is(res$average_coph, "data.frame") + expect_is(res$squeezed_var, "list") + expect_is(res$squeezed_var$var.post, "numeric") +}) + +# compute_pvalue +test_that("Test for compute_pvalue", { + + # Set up example inputs + test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20) + replicates <- c(10,10) + + # Calculate the average and variance of the test matrix + res <- compute_squeeze(test_matrix, replicates) + + # Call the function + pvals <- compute_pvalue(res$average_coph, res$squeezed_var, replicates) + + expect_named(pvals, c("statistics", "p.value", "cluster", "p")) + + # Check that the output is a data frame + expect_is(pvals, "data.frame") + expect_length(pvals$p.value, 1225) + expect_length(pvals, 4) })