Skip to content
Snippets Groups Projects
Commit b14fdc1a authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

comment corrections

parent 4691eaae
No related branches found
No related tags found
No related merge requests found
#' Treediff
#' @title Treediff
#'
#' The treediff function calculates the difference between two trees.
#' @description Comparison of two sets of trees.
#'
#' @param trees1 A list of trees.
#' @details This function compares two sets of trees using a p-value aggregation
#' method. The p-values are obtained by performing a student test on the averaged
#' cophenetic distances of the two sets of trees.
#'
#' @param trees1 A list of trees .
#' @param trees2 A list of trees.
#' @param replicats A vector of the number of replicates for each group.
#' @param replicates A vector of the number of replicates for each group.
#'
#' @return A list with class "htest":
#' \item{p.value}{the p-value for the tree test.}
#' \item{statistic}{the value of the statistic of each leaf peer of the tree test.}
#' \item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
#' \item{method}{a character string indicating what type of test was performed.}
#' \item{data.name}{a character string giving the names of the tree subset}
#'
#' @author
#'
#' @references
#'
#' @return A data frame with statistics and p-values.
#' @export
#'
#' @importFrom dplyr %>%
......@@ -32,29 +46,23 @@
#' trees1 <- trees[1:16]
#' trees2 <- trees[17:40]
#'
#' replicats <- c(4,6)
#' replicates <- c(4,6)
#'
#' tree_pvals <- treediff(trees1, trees2, replicats)
#' tree_pvals <- treediff(trees1, trees2, replicates)
#' tree_pvals$p.value
treediff <- function(trees1, trees2, replicats){
# Check if the length of replicats is 2
if (length(replicats) != 2){
stop("Incorrect replicats size, `replicats` must be a vector of 2 elements")
treediff <- function(trees1, trees2, replicates){
# Check if the length of replicates is 2
if (length(replicates) != 2){
stop("Incorrect replicates size, `replicates` must be a vector of 2 elements")
}
# Check if the number of clusters is equal for both conditions
if (length(trees1) / replicats[1] != length(trees2) / replicats[2]){
stop("The number of clusters is different between conditions or `replicats' is uncorrect")
if (length(trees1) / replicates[1] != length(trees2) / replicates[2]){
stop("The number of clusters is different between conditions or `replicates' is uncorrect")
}
# # Extract the number of pair of the trees
# tree_order <- lapply(trees1, "[[", "order")
# leaves <- sapply(tree_order, length)
#
# # Get unique values of the number of leaves
# leaves <- unique(leaves)
# Merge trees from both conditions
trees <- c(trees1, trees2)
......@@ -67,31 +75,23 @@ treediff <- function(trees1, trees2, replicats){
return(adist[upper.tri(adist)])
})
# # Fill the cophenetic vector with NA if the number of leaves is different from the max.
# if (length(leaves) != 1){
# len_max <- choose(max(leaves), 2)
#
# coph_vect <- sapply(coph_vect, FUN = function(col) {
# c(col, rep(NA, len_max - length(col)))
# })
# }
# Compute squeeze factor
outs <- compute_squeeze(coph_vect, replicats)
outs <- compute_squeeze(coph_vect, replicates)
# Compute p-values
outp <- compute_pvalue(outs$average_coph, outs$squeezed_var, replicats)
outp <- compute_pvalue(outs$average_coph, outs$squeezed_var, replicates)
# Aggregate p-values
out_aggr <- outp %>% group_by(cluster) %>%
summarise("p.value" = min(sort(p.value) / (1:p)) * p) %>% unique()
summarise("p.value" = min(sort(p.value) / (1:p)) * p) %>%
unique()
# Store results in a list
out <- list("method" = "Tree test based on t-test",
"data.name" = paste(substitute(trees1), "and", substitute(trees2)),
"p.value" = out_aggr$p.value,
"res" = list("statistic" = outp$statistics,
"p.value.indiv" = outp$p.value))
"statistic" = outp$statistics,
"p.value.indiv" = outp$p.value)
# Assign class to the list
class(out) <- "htest"
......@@ -100,9 +100,9 @@ treediff <- function(trees1, trees2, replicats){
return(out)
}
compute_squeeze <- function(dist_coph, replicats){
compute_squeeze <- function(dist_coph, replicates){
if (class(dist_coph) == "list"){
if (inherits(dist_coph, "list") == TRUE){
max_pr <- as.numeric(max(summary(dist_coph)[,1]))
dist_coph <- sapply(dist_coph, FUN = function(col) {
c(col, rep(NA, max_pr - length(col)))
......@@ -110,14 +110,14 @@ compute_squeeze <- function(dist_coph, replicats){
}
# Calculate number of clusters
nb_cluster <- ncol(dist_coph)/sum(replicats)
nb_cluster <- ncol(dist_coph)/sum(replicates)
groups1 <- rep(1:nb_cluster, each = replicats[1])
groups2 <- rep(1:nb_cluster, each = replicats[2])
groups1 <- rep(1:nb_cluster, each = replicates[1])
groups2 <- rep(1:nb_cluster, each = replicates[2])
# Store replicats values for each group
n1 <- replicats[1]
n2 <- replicats[2]
# Store replicates values for each group
n1 <- replicates[1]
n2 <- replicates[2]
# Indices for each group
col1 <- 1:length(groups1)
......@@ -143,7 +143,7 @@ compute_squeeze <- function(dist_coph, replicats){
average_coph_woNA <- na.omit(average_coph)
# Calculate variance
sq_average_coph <- sweep(average_coph_woNA[-3]^2, 2, replicats, "*")
sq_average_coph <- sweep(average_coph_woNA[-3]^2, 2, replicates, "*")
# Sum of squared values for each group
sum_sq_coph_trees1 <- unlist(by(t(dist_coph[,col1])^2, groups1, colSums))
......@@ -162,7 +162,7 @@ compute_squeeze <- function(dist_coph, replicats){
"squeezed_var" = squeezed_var))
}
compute_pvalue <- function(average_coph, squeezed_var, replicats){
compute_pvalue <- function(average_coph, squeezed_var, replicates){
# Extract the cluster information from the average_coph data frame
cluster <- average_coph$cluster
......@@ -170,9 +170,9 @@ compute_pvalue <- function(average_coph, squeezed_var, replicats){
# Calculate the numerator for the t-statistic
numerator <- Reduce("-", average_coph[,-3])
# Store replicats values for each group
n1 <- replicats[1]
n2 <- replicats[2]
# Store replicates values for each group
n1 <- replicates[1]
n2 <- replicates[2]
# Calculate the denominator for the t-statistic
denominator <- squeezed_var$var.post * (n1 + n2) / (n1 * n2)
......
......@@ -4,20 +4,30 @@
\alias{treediff}
\title{Treediff}
\usage{
treediff(trees1, trees2, replicats)
treediff(trees1, trees2, replicates)
}
\arguments{
\item{trees1}{A list of trees.}
\item{trees1}{A list of trees .}
\item{trees2}{A list of trees.}
\item{replicats}{A vector of the number of replicates for each group.}
\item{replicates}{A vector of the number of replicates for each group.}
}
\value{
A data frame with statistics and p-values.
A list with class "htest":
\item{p.value}{the p-value for the tree test.}
\item{statistic}{the value of the statistic of each leaf peer of the tree test.}
\item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
\item{method}{a character string indicating what type of test was performed.}
\item{data.name}{a character string giving the names of the tree subset}
}
\description{
The treediff function calculates the difference between two trees.
Comparison of two sets of trees.
}
\details{
This function compares two sets of trees using a p-value aggregation
method. The p-values are obtained by performing a student test on the averaged
cophenetic distances of the two sets of trees.
}
\examples{
......@@ -34,8 +44,8 @@ return(out)
trees1 <- trees[1:16]
trees2 <- trees[17:40]
replicats <- c(4,6)
replicates <- c(4,6)
tree_pvals <- treediff(trees1, trees2, replicats)
tree_pvals <- treediff(trees1, trees2, replicates)
tree_pvals$p.value
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment