Skip to content
Snippets Groups Projects
Commit cba85db0 authored by Nathalie Vialaneix's avatar Nathalie Vialaneix
Browse files

updated treediff code (bug with variances still to be fixed)

parent 4a1d080d
No related branches found
No related tags found
No related merge requests found
......@@ -75,12 +75,13 @@ treediff <- function(trees1, trees2, replicates){
# Check if the length of replicates is 2
if (length(replicates) != 2){
stop("Incorrect replicates size, `replicates` must be a vector of 2 elements")
stop("`replicates` must be a vector of length 2.")
}
# Check if the number of clusters is equal for both conditions
if (length(trees1) / replicates[1] != length(trees2) / replicates[2]){
stop("The number of clusters is different between conditions or `replicates' is uncorrect")
if (length(trees1) / replicates[1] != length(trees2) / replicates[2]) {
stop(paste("The number of clusters is different between conditions (or",
"`replicates' is not correct)."))
}
# Merge trees from both conditions
......@@ -90,7 +91,7 @@ treediff <- function(trees1, trees2, replicates){
coph_dist <- sapply(trees, cophenetic, simplify = FALSE)
# Convert cophenetic distances to vector
coph_vect <- sapply(coph_dist, function(adist) {
coph_vect <- lapply(coph_dist, function(adist) {
adist <- as.matrix(adist)
return(adist[upper.tri(adist)])
})
......@@ -107,8 +108,9 @@ treediff <- function(trees1, trees2, replicates){
unique()
# Store results in a list
data_name <- paste(substitute(trees1), "and", substitute(trees2))
out <- list("method" = "Tree test based on t-test",
"data.name" = paste(substitute(trees1), "and", substitute(trees2)),
"data.name" = data_name,
"p.value" = out_aggr$p.value,
"statistic" = outp$statistics,
"p.value.indiv" = outp$p.value)
......@@ -120,57 +122,57 @@ treediff <- function(trees1, trees2, replicates){
return(out)
}
compute_squeeze <- function(dist_coph, replicates){
if (inherits(dist_coph, "list") == TRUE){
max_pr <- as.numeric(max(summary(dist_coph)[,1]))
dist_coph <- sapply(dist_coph, FUN = function(col) {
c(col, rep(NA, max_pr - length(col)))
})
}
compute_squeeze <- function(dist_coph, replicates) {
# Calculate number of clusters
nb_cluster <- ncol(dist_coph)/sum(replicates)
nb_cluster <- length(dist_coph) / sum(replicates)
groups1 <- rep(1:nb_cluster, each = replicates[1])
groups2 <- rep(1:nb_cluster, each = replicates[2])
clusters1 <- rep(1:nb_cluster, each = replicates[1])
clusters2 <- rep(1:nb_cluster, each = replicates[2])
# Store replicates values for each group
n1 <- replicates[1]
n2 <- replicates[2]
# Indices for each group
col1 <- 1:length(groups1)
col2 <- 1:length(groups2) + length(col1)
set1 <- 1:length(clusters1)
set2 <- 1:length(clusters2) + length(col1)
# Average per groups and conditions
average_coph_trees1 <- by(t(dist_coph[, col1]), groups1, colMeans)
condition1 <- as.vector(Reduce(cbind, average_coph_trees1))
average_coph_trees2 <- by(t(dist_coph[, col2]), groups2, colMeans)
condition2 <- as.vector(Reduce(cbind, average_coph_trees2))
# Cluster vector
len1 <- sapply(average_coph_trees1, length)
cluster1 <- rep(1:nb_cluster, len1)
cluster <- cluster1
average_coph_trees1 <- lapply(unique(clusters1), function(acluster) {
where_clust <- which(clusters1 == acluster)
colMeans(Reduce(rbind, dist_coph[where_clust]))
})
average_coph_trees2 <- lapply(unique(clusters1), function(acluster) {
where_clust <- which(clusters2 == acluster) + length(clusters1)
colMeans(Reduce(rbind, dist_coph[where_clust]))
})
# Merge average values and cluster vector
average_coph <- data.frame(condition1, condition2, cluster)
# Remove missing values from average_coph
average_coph_woNA <- na.omit(average_coph)
cluster_length <- sapply(average_coph_trees1, length)
average_coph <- data.frame("set1" = Reduce(c, average_coph_trees1),
"set2" = Reduce(c, average_coph_trees2),
"cluster" = rep(unique(clusters1), cluster_length))
# Calculate variance
sq_average_coph <- sweep(average_coph_woNA[-3]^2, 2, replicates, "*")
sq_average_coph <- sweep(average_coph[-3]^2, 2, replicates, "*")
# Sum of squared values for each group
sum_sq_coph_trees1 <- unlist(by(t(dist_coph[,col1])^2, groups1, colSums))
sum_sq_coph_trees2 <- unlist(by(t(dist_coph[,col2])^2, groups2, colSums))
sum_sq_coph <- data.frame(na.omit(sum_sq_coph_trees1), na.omit(sum_sq_coph_trees2))
## FIX IT: PROBLEM WITH VARIANCES HERE
sum_sq_coph_trees1 <- lapply(unique(clusters1), function(acluster) {
where_clust <- which(clusters1 == acluster)
colMeans(Reduce(rbind, dist_coph[where_clust])^2)
})
sum_sq_coph_trees2 <- lapply(unique(clusters1), function(acluster) {
where_clust <- which(clusters2 == acluster) + length(clusters1)
colMeans(Reduce(rbind, dist_coph[where_clust])^2)
})
sum_sq_coph <- data.frame("set1" = Reduce(c, sum_sq_coph_trees1),
"set2" = Reduce(c, sum_sq_coph_trees2),
"cluster" = rep(unique(clusters1), cluster_length))
variances <- sum_sq_coph - sq_average_coph
variances <- sum_sq_coph[, 1:2] - sq_average_coph
variances <- rowSums(variances)
variances <- as.vector(variances / (n1 + n2 - 2))
......@@ -178,11 +180,10 @@ compute_squeeze <- function(dist_coph, replicates){
squeezed_var <- squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE)
# Return list of average_coph and squeezed_var
return(list("average_coph" = average_coph_woNA,
"squeezed_var" = squeezed_var))
return(list("average_coph" = average_coph, "squeezed_var" = squeezed_var))
}
compute_pvalue <- function(average_coph, squeezed_var, replicates){
compute_pvalue <- function(average_coph, squeezed_var, replicates) {
# Extract the cluster information from the average_coph data frame
cluster <- average_coph$cluster
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment