comment corrections

b14fdc1a · CARDENAS GWENDAELLE · 4691eaae · b14fdc1a · b14fdc1a
Commit b14fdc1a authored 2 years ago by CARDENAS GWENDAELLE
--- a/R/treediff.R
+++ b/R/treediff.R
-#' Treediff
+#' @title Treediff
 #'
-#' The treediff function calculates the difference between two trees.
+#' @description Comparison of two sets of trees.
 #'
-#' @param trees1 A list of trees.
+#' @details This function compares two sets of trees using a p-value aggregation
+#' method. The p-values are obtained by performing a student test on the averaged
+#' cophenetic distances of the two sets of trees.
+#'
+#' @param trees1 A list of trees .
 #' @param trees2 A list of trees.
-#' @param replicats A vector of the number of replicates for each group.
+#' @param replicates A vector of the number of replicates for each group.
+#'
+#' @return A list with class "htest":
+#'   \item{p.value}{the p-value for the tree test.}
+#'   \item{statistic}{the value of the statistic of each leaf peer of the tree test.}
+#'   \item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
+#'   \item{method}{a character string indicating what type of test was performed.}
+#'   \item{data.name}{a character string giving the names of the tree subset}
+#'
+#' @author
+#'
+#' @references
 #'
-#' @return A data frame with statistics and p-values.
 #' @export
 #'
 #' @importFrom dplyr %>%
@@ -32,29 +46,23 @@
 #' trees1 <- trees[1:16]
 #' trees2 <- trees[17:40]
 #'
-#' replicats <- c(4,6)
+#' replicates <- c(4,6)
 #'
-#' tree_pvals <- treediff(trees1, trees2, replicats)
+#' tree_pvals <- treediff(trees1, trees2, replicates)
 #' tree_pvals$p.value
-treediff <- function(trees1, trees2, replicats){

-  # Check if the length of replicats is 2
-  if (length(replicats) != 2){
-    stop("Incorrect replicats size, `replicats` must be a vector of 2 elements")
+treediff <- function(trees1, trees2, replicates){
+
+  # Check if the length of replicates is 2
+  if (length(replicates) != 2){
+    stop("Incorrect replicates size, `replicates` must be a vector of 2 elements")
  }

  # Check if the number of clusters is equal for both conditions
-  if (length(trees1) / replicats[1] != length(trees2) / replicats[2]){
-    stop("The number of clusters is different between conditions or `replicats' is uncorrect")
+  if (length(trees1) / replicates[1] != length(trees2) / replicates[2]){
+    stop("The number of clusters is different between conditions or `replicates' is uncorrect")
  }

-  # # Extract the number of pair of the trees
-  # tree_order <- lapply(trees1, "[[", "order")
-  # leaves <- sapply(tree_order, length)
-  #
-  # # Get unique values of the number of leaves
-  # leaves <- unique(leaves)
-
  # Merge trees from both conditions
  trees <- c(trees1, trees2)

@@ -67,31 +75,23 @@ treediff <- function(trees1, trees2, replicats){
    return(adist[upper.tri(adist)])
  })

-  # # Fill the cophenetic vector with NA if the number of leaves is different from the max.
-  # if (length(leaves) != 1){
-  #   len_max <- choose(max(leaves), 2)
-  #
-  #   coph_vect <- sapply(coph_vect, FUN = function(col) {
-  #     c(col, rep(NA, len_max - length(col)))
-  #   })
-  # }
-
  # Compute squeeze factor
-  outs <- compute_squeeze(coph_vect, replicats)
+  outs <- compute_squeeze(coph_vect, replicates)

  # Compute p-values
-  outp <- compute_pvalue(outs$average_coph, outs$squeezed_var, replicats)
+  outp <- compute_pvalue(outs$average_coph, outs$squeezed_var, replicates)

  # Aggregate p-values
  out_aggr <- outp %>% group_by(cluster) %>%
-    summarise("p.value" = min(sort(p.value) / (1:p)) * p) %>% unique()
+    summarise("p.value" = min(sort(p.value) / (1:p)) * p) %>%
+    unique()

  # Store results in a list
  out <- list("method" = "Tree test based on t-test",
              "data.name" = paste(substitute(trees1), "and", substitute(trees2)),
              "p.value" = out_aggr$p.value,
-              "res" = list("statistic" = outp$statistics,
-                           "p.value.indiv" = outp$p.value))
+              "statistic" = outp$statistics,
+              "p.value.indiv" = outp$p.value)

  # Assign class to the list
  class(out) <- "htest"
@@ -100,9 +100,9 @@ treediff <- function(trees1, trees2, replicats){
  return(out)
 }

-compute_squeeze <- function(dist_coph, replicats){
+compute_squeeze <- function(dist_coph, replicates){

-  if (class(dist_coph) == "list"){
+  if (inherits(dist_coph, "list") == TRUE){
    max_pr <- as.numeric(max(summary(dist_coph)[,1]))
    dist_coph <- sapply(dist_coph, FUN = function(col) {
      c(col, rep(NA, max_pr - length(col)))
@@ -110,14 +110,14 @@ compute_squeeze <- function(dist_coph, replicats){
  }

  # Calculate number of clusters
-  nb_cluster <- ncol(dist_coph)/sum(replicats)
+  nb_cluster <- ncol(dist_coph)/sum(replicates)

-  groups1 <- rep(1:nb_cluster, each = replicats[1])
-  groups2 <- rep(1:nb_cluster, each = replicats[2])
+  groups1 <- rep(1:nb_cluster, each = replicates[1])
+  groups2 <- rep(1:nb_cluster, each = replicates[2])

-  # Store replicats values for each group
-  n1 <- replicats[1]
-  n2 <- replicats[2]
+  # Store replicates values for each group
+  n1 <- replicates[1]
+  n2 <- replicates[2]

  # Indices for each group
  col1 <- 1:length(groups1)
@@ -143,7 +143,7 @@ compute_squeeze <- function(dist_coph, replicats){
  average_coph_woNA <- na.omit(average_coph)

  # Calculate variance
-  sq_average_coph <- sweep(average_coph_woNA[-3]^2, 2, replicats, "*")
+  sq_average_coph <- sweep(average_coph_woNA[-3]^2, 2, replicates, "*")

  # Sum of squared values for each group
  sum_sq_coph_trees1 <- unlist(by(t(dist_coph[,col1])^2, groups1, colSums))
@@ -162,7 +162,7 @@ compute_squeeze <- function(dist_coph, replicats){
              "squeezed_var" = squeezed_var))
 }

-compute_pvalue <- function(average_coph, squeezed_var, replicats){
+compute_pvalue <- function(average_coph, squeezed_var, replicates){

  # Extract the cluster information from the average_coph data frame
  cluster <- average_coph$cluster
@@ -170,9 +170,9 @@ compute_pvalue <- function(average_coph, squeezed_var, replicats){
  # Calculate the numerator for the t-statistic
  numerator <- Reduce("-", average_coph[,-3])

-  # Store replicats values for each group
-  n1 <- replicats[1]
-  n2 <- replicats[2]
+  # Store replicates values for each group
+  n1 <- replicates[1]
+  n2 <- replicates[2]

  # Calculate the denominator for the t-statistic
  denominator <- squeezed_var$var.post * (n1 + n2) / (n1 * n2)

--- a/man/treediff.Rd
+++ b/man/treediff.Rd
@@ -4,20 +4,30 @@
 \alias{treediff}
 \title{Treediff}
 \usage{
-treediff(trees1, trees2, replicats)
+treediff(trees1, trees2, replicates)
 }
 \arguments{
-\item{trees1}{A list of trees.}
+\item{trees1}{A list of trees .}

 \item{trees2}{A list of trees.}

-\item{replicats}{A vector of the number of replicates for each group.}
+\item{replicates}{A vector of the number of replicates for each group.}
 }
 \value{
-A data frame with statistics and p-values.
+A list with class "htest":
+\item{p.value}{the p-value for the tree test.}
+\item{statistic}{the value of the statistic of each leaf peer of the tree test.}
+\item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
+\item{method}{a character string indicating what type of test was performed.}
+\item{data.name}{a character string giving the names of the tree subset}
 }
 \description{
-The treediff function calculates the difference between two trees.
+Comparison of two sets of trees.
+}
+\details{
+This function compares two sets of trees using a p-value aggregation
+method. The p-values are obtained by performing a student test on the averaged
+cophenetic distances of the two sets of trees.
 }
 \examples{

@@ -34,8 +44,8 @@ return(out)
 trees1 <- trees[1:16]
 trees2 <- trees[17:40]

-replicats <- c(4,6)
+replicates <- c(4,6)

-tree_pvals <- treediff(trees1, trees2, replicats)
+tree_pvals <- treediff(trees1, trees2, replicates)
 tree_pvals$p.value
 }