corrections

76b94f1f · CARDENAS GWENDAELLE · cbac3d7e · 76b94f1f · 76b94f1f · 76b94f1f
Commit 76b94f1f authored 2 years ago by CARDENAS GWENDAELLE
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,7 +5,7 @@ Authors@R:
    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
           comment = c(ORCID = "YOUR-ORCID-ID"))
 Description: What the package does (one paragraph).
-Imports: dplyr, limma, stats
+Imports: dplyr, limma, reshape2, stats
 License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
    license
 Encoding: UTF-8

--- a/NAMESPACE
+++ b/NAMESPACE
 # Generated by roxygen2: do not edit by hand

-export(fisher_mix)
+S3method(print,treeTest)
+S3method(summary,treediff)
 export(treediff)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,group_by)
@@ -9,4 +10,3 @@ importFrom(limma,squeezeVar)
 importFrom(reshape2,colsplit)
 importFrom(stats,cophenetic)
 importFrom(stats,pt)
-importFrom(stats,rf)
--- a/R/treediff.R
+++ b/R/treediff.R
@@ -20,8 +20,7 @@
 #' @param replicates A numeric vector of length 2 with the number of replicates
 #' for each condition.
 #' @param scale Logical. If \code{TRUE}, the trees are all rescalled to have a
-#' minimum height equal to 0 (translation) and a maximum height equal to 1
-#' (dilatation).
+#' minimum height equal to 0 and a maximum height equal to 1.
 #' @param order_labels Logical. If \code{TRUE}, take a reference tree and keep
 #' track of leave order.
 #'
@@ -60,7 +59,6 @@
 #' @importFrom stats cophenetic
 #' @importFrom stats pt
 #'
-#'
 #' @examples
 #' leaves <- c(100, 120, 50, 80)
 #'
@@ -139,7 +137,7 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
  # Compute cophenetic distance
  coph_dist <- sapply(trees, cophenetic, simplify = FALSE)

-  # normalize
+  # Normalize
  if (scale) {
    coph_dist <- normalize_trees(coph_dist)
  }
@@ -151,6 +149,8 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
      adist <- adist[anorder, anorder]
      return(adist[upper.tri(adist)])
    }, coph_dist, labels_perm)
+
+    # Convert the result to a list of vectors
    coph_vect <- lapply(1:ncol(coph_vect), function(col) {
      return(as.vector(coph_vect[,col]))
    })
@@ -191,11 +191,17 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
 #' @export
 #' @rdname treediff

-print.treeTest <- function(x, prefix = "\t",...) {
+print.treeTest <- function(x, ...) {
+
+  # print method
  cat("\n")
-  cat(strwrap(x$method, prefix = prefix), sep = "\n")
+  cat(strwrap(x$method, prefix = "\t"), sep = "\n")
  cat("\n")
+
+  # Print the name of the data used in the test
  cat("data:  ", x$data.name, "\n", sep = "")
+
+  # print the first 5 p-values
  print(colsplit(x$p.value, " ", "p.values :"), max = 5)
 }

@@ -204,9 +210,25 @@ print.treeTest <- function(x, prefix = "\t",...) {
 #' @rdname treediff

 summary.treediff <- function(object, ...) {
-  cat("\nSummary\n\n")
-  cat("      Class : ", class(object),"\n\n")
+
+  # Print a header for the summary
+  cat("\nSummary\n")
+
+  # Print the class of the object
+  cat("\tClass : ", class(object))
+  cat("\n")
+
+  # Print the test output
  print(object)
+  cat("\n")
+
+  # Print a summary of the p.value
+  cat("p.value\n")
+  print(summary(object$p.value))
+  cat("\n")
+
+  # Print a summary of the `statistic` and `p.value.indiv`
+  summary(data.frame("statistic" = object$statistic, "p.value.indiv" = object$p.value.indiv))
 }

 compute_squeeze <- function(dist_coph, replicates) {
@@ -254,16 +276,19 @@ compute_squeeze <- function(dist_coph, replicates) {
    colSums(Reduce(rbind, dist_coph[where_clust])^2)
  })

+  # `data.frame` with the sum of squared values for both groups and the cluster
+  # number
  sum_sq_coph <- data.frame("set1" = Reduce(c, sum_sq_coph_trees1),
                            "set2" = Reduce(c, sum_sq_coph_trees2),
                            "cluster" = rep(unique(clusters1), cluster_length))

+  # Compute the variance using the sum of squared values and the average values
  variances <- sum_sq_coph[, 1:2] - sq_average_coph
  variances <- rowSums(variances)
  variances <- as.vector(variances / (n1 + n2 - 2))

  # Calculate squeezed variance
-  squeezed_var <- squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE)
+  squeezed_var <- suppressWarnings(squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE))

  # Return list of average_coph and squeezed_var
  return(list("average_coph" = average_coph, "squeezed_var" = squeezed_var))
@@ -307,10 +332,19 @@ compute_pvalue <- function(average_coph, squeezed_var, replicates) {
 }

 normalize_trees <- function(coph_dist) {
-  # tree_cophs is a list of 'dist' objects as returned by the function cophenetic
+
+  # Find the minimum value for each tree and store in all_minima
  all_minima <- sapply(coph_dist, min)
+
+  # Subtract all_minima from each tree in coph_dist and store in out
  out <- mapply("-", coph_dist, all_minima, SIMPLIFY = FALSE)
+
+  # Find the maximum value for each tree and store in all_maxima
  all_maxima <- sapply(out, max)
+
+  # Divide each distance by all_maxima for each tree and store in out
  out <- mapply("/", out, all_maxima, SIMPLIFY = FALSE)
+
+  # Return coph_dist normalized
  return(out)
 }
--- a/man/treediff.Rd
+++ b/man/treediff.Rd
@@ -2,9 +2,15 @@
 % Please edit documentation in R/treediff.R
 \name{treediff}
 \alias{treediff}
+\alias{summary.treediff}
+\alias{print.treeTest}
 \title{Treediff}
 \usage{
-treediff(trees1, trees2, replicates)
+treediff(trees1, trees2, replicates, scale = FALSE, order_labels = FALSE)
+
+\method{print}{treeTest}(x, ...)
+
+\method{summary}{treediff}(object, ...)
 }
 \arguments{
 \item{trees1}{A list of trees corresponding to the first condition (set).
@@ -19,6 +25,12 @@ different from that of \code{trees1}.}

 \item{replicates}{A numeric vector of length 2 with the number of replicates
 for each condition.}
+
+\item{scale}{Logical. If \code{TRUE}, the trees are all rescalled to have a
+minimum height equal to 0 and a maximum height equal to 1.}
+
+\item{order_labels}{Logical. If \code{TRUE}, take a reference tree and keep
+track of leave order.}
 }
 \value{
 An object of class \code{treeTest} with the following entries:
@@ -84,7 +96,7 @@ structure between families of trees. \emph{Preprint submitted for
 publication}.
 }
 \author{
-Gwendaelle Cardenac\cr
+Gwendaëlle Cardenas\cr
 Marie Chavent \email{marie.chavent@u-bordeaux.fr}\cr
 Sylvain Foissac \email{sylvain.foissac@inrae.fr}\cr
 Pierre Neuvial \email{pierre.neuvial@math.univ-toulouse.fr}\cr

--- a/tests/testthat.R
+++ b/tests/testthat.R
+library("testthat")
+library("treediff")
+
+test_check("treediff")
\ No newline at end of file
--- a/tests/testthat/test-treediff.R
+++ b/tests/testthat/test-treediff.R
+# 4 clusters
 leaves <- c(100,120,50,80)

-trees <- sapply(leaves, FUN = function(l){
-  base_data <- matrix(rnorm(2000), nrow = l, ncol = 200)
+# Generate 4 trees from trees1 and  trees from trees2 for each number of leaves
+trees <- sapply(leaves, FUN = function(leaf){
+  base_data <- matrix(rnorm(2000), nrow = leaf, ncol = 200)

  set1 <- replicate(4, sample(1:100, 50, replace = FALSE))
  set2 <- replicate(6, sample(101:200, 50, replace = FALSE))

+  # Compute hierarchical clustering for each tree in set1
  trees1 <- apply(set1, 2, function(asample) {
    samples <- base_data[, asample]
    out <- hclust(dist(samples), method = "ward.D2")
    return(out)
  })

+  # Compute hierarchical clustering for each tree in set2
  trees2 <- apply(set2, 2, function(asample) {
    samples <- base_data[, asample]
    out <- hclust(dist(samples), method = "ward.D2")
@@ -20,15 +24,109 @@ trees <- sapply(leaves, FUN = function(l){
  return(list("trees1" = trees1, "trees2" = trees2))
 })

+# Unlist the trees
 trees1 <- unlist(trees[1,], recursive = FALSE)
 trees2 <- unlist(trees[2,], recursive = FALSE)
+
+# Set the number of replicates
 replicates = c(4, 6)

 test_that("'treediff' works for simple cases", {
+
+  # Call the function to be tested
  res <- treediff(trees1, trees2, replicates)

+  # Check the output object has the expected names
  expect_named(res, c("method", "data.name", "p.value",
                      "statistic", "p.value.indiv"))

-  expect_equal(length(res$statistic), length(res$p.value.indiv))
+  # Check the output object has the expected class
+  expect_s3_class(res, "treeTest")
+
+  # Check that `p.value` is a numeric value
+  expect_is(res$p.value, "numeric")
+
+  # Check that `statistic` is a numeric value
+  expect_is(res$statistic, "numeric")
+
+  # Check that `p.value.indiv` is a numeric value
+  expect_is(res$p.value.indiv, "numeric")
+
+  # Check that `method` is a character string
+  expect_is(res$method, "character")
+
+  # Check that `data.name` is a character string
+  expect_is(res$data.name, "character")
+
+  # Check the length of `statistic` matches `p.value.indiv`
+  expect_length(res$statistic, length(res$p.value.indiv))
+
+  # Check the length of `p-value` matches the number of clusters
+  expect_length(res$p.value, 4)
+
+  # Check the length `statistic` matches the expected number of pairwise
+  # comparisons between the clusters
+  expect_length(res$statistic, sum(choose(leaves, 2)))
+})
+
+## Test errors
+
+
+test_that("Test errors", {
+  # Test if replicates is numeric vector
+  expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = "abc"),
+               "`replicates` is not a numeric vector")
+
+  # Test if replicates is a vector of length 2
+  expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = c(1,2,3)),
+               "`replicates` must be a vector of length 2.")
+
+  # Test if output is a list with class treeTest
+  out <- treediff(trees1, trees2, c(4,6))
+  expect_true(inherits(out, "treeTest"))
+
+  # Test if the number of leaves in each cluster is the same between the two
+  # sets of trees
+  trees2[[5]]$order <- c(trees2[[5]]$order, 101)
+  expect_error(treediff(trees1, trees2, c(4, 6)), "the number of leaves in one or more clusters is different between the two sets of trees.")
+})
+
+
+# compute_squeeze
+test_that("Test for compute_squeeze", {
+
+  # Create a test data matrix
+  test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20)
+  replicates <- c(10,10)
+
+  # Call the function
+  res <- compute_squeeze(test_matrix, replicates)
+
+  # Compare the result with the expected value
+  expect_named(res, c("average_coph", "squeezed_var"))
+  expect_is(res, "list")
+  expect_is(res$average_coph, "data.frame")
+  expect_is(res$squeezed_var, "list")
+  expect_is(res$squeezed_var$var.post, "numeric")
+})
+
+# compute_pvalue
+test_that("Test for compute_pvalue", {
+
+  # Set up example inputs
+  test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20)
+  replicates <- c(10,10)
+
+  # Calculate the average and variance of the test matrix
+  res <- compute_squeeze(test_matrix, replicates)
+
+  # Call the function
+  pvals <- compute_pvalue(res$average_coph, res$squeezed_var, replicates)
+
+  expect_named(pvals, c("statistics", "p.value", "cluster", "p"))
+
+  # Check that the output is a data frame
+  expect_is(pvals, "data.frame")
+  expect_length(pvals$p.value, 1225)
+  expect_length(pvals, 4)
 })