Skip to content
Snippets Groups Projects
Commit 76b94f1f authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

corrections

parent cbac3d7e
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,7 @@ Authors@R:
person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
comment = c(ORCID = "YOUR-ORCID-ID"))
Description: What the package does (one paragraph).
Imports: dplyr, limma, stats
Imports: dplyr, limma, reshape2, stats
License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
license
Encoding: UTF-8
......
# Generated by roxygen2: do not edit by hand
export(fisher_mix)
S3method(print,treeTest)
S3method(summary,treediff)
export(treediff)
importFrom(dplyr,"%>%")
importFrom(dplyr,group_by)
......@@ -9,4 +10,3 @@ importFrom(limma,squeezeVar)
importFrom(reshape2,colsplit)
importFrom(stats,cophenetic)
importFrom(stats,pt)
importFrom(stats,rf)
......@@ -20,8 +20,7 @@
#' @param replicates A numeric vector of length 2 with the number of replicates
#' for each condition.
#' @param scale Logical. If \code{TRUE}, the trees are all rescalled to have a
#' minimum height equal to 0 (translation) and a maximum height equal to 1
#' (dilatation).
#' minimum height equal to 0 and a maximum height equal to 1.
#' @param order_labels Logical. If \code{TRUE}, take a reference tree and keep
#' track of leave order.
#'
......@@ -60,7 +59,6 @@
#' @importFrom stats cophenetic
#' @importFrom stats pt
#'
#'
#' @examples
#' leaves <- c(100, 120, 50, 80)
#'
......@@ -139,7 +137,7 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
# Compute cophenetic distance
coph_dist <- sapply(trees, cophenetic, simplify = FALSE)
# normalize
# Normalize
if (scale) {
coph_dist <- normalize_trees(coph_dist)
}
......@@ -151,6 +149,8 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
adist <- adist[anorder, anorder]
return(adist[upper.tri(adist)])
}, coph_dist, labels_perm)
# Convert the result to a list of vectors
coph_vect <- lapply(1:ncol(coph_vect), function(col) {
return(as.vector(coph_vect[,col]))
})
......@@ -191,11 +191,17 @@ treediff <- function(trees1, trees2, replicates, scale = FALSE,
#' @export
#' @rdname treediff
print.treeTest <- function(x, prefix = "\t",...) {
print.treeTest <- function(x, ...) {
# print method
cat("\n")
cat(strwrap(x$method, prefix = prefix), sep = "\n")
cat(strwrap(x$method, prefix = "\t"), sep = "\n")
cat("\n")
# Print the name of the data used in the test
cat("data: ", x$data.name, "\n", sep = "")
# print the first 5 p-values
print(colsplit(x$p.value, " ", "p.values :"), max = 5)
}
......@@ -204,9 +210,25 @@ print.treeTest <- function(x, prefix = "\t",...) {
#' @rdname treediff
summary.treediff <- function(object, ...) {
cat("\nSummary\n\n")
cat(" Class : ", class(object),"\n\n")
# Print a header for the summary
cat("\nSummary\n")
# Print the class of the object
cat("\tClass : ", class(object))
cat("\n")
# Print the test output
print(object)
cat("\n")
# Print a summary of the p.value
cat("p.value\n")
print(summary(object$p.value))
cat("\n")
# Print a summary of the `statistic` and `p.value.indiv`
summary(data.frame("statistic" = object$statistic, "p.value.indiv" = object$p.value.indiv))
}
compute_squeeze <- function(dist_coph, replicates) {
......@@ -254,16 +276,19 @@ compute_squeeze <- function(dist_coph, replicates) {
colSums(Reduce(rbind, dist_coph[where_clust])^2)
})
# `data.frame` with the sum of squared values for both groups and the cluster
# number
sum_sq_coph <- data.frame("set1" = Reduce(c, sum_sq_coph_trees1),
"set2" = Reduce(c, sum_sq_coph_trees2),
"cluster" = rep(unique(clusters1), cluster_length))
# Compute the variance using the sum of squared values and the average values
variances <- sum_sq_coph[, 1:2] - sq_average_coph
variances <- rowSums(variances)
variances <- as.vector(variances / (n1 + n2 - 2))
# Calculate squeezed variance
squeezed_var <- squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE)
squeezed_var <- suppressWarnings(squeezeVar(variances, df = n1 + n2 - 2, robust = FALSE))
# Return list of average_coph and squeezed_var
return(list("average_coph" = average_coph, "squeezed_var" = squeezed_var))
......@@ -307,10 +332,19 @@ compute_pvalue <- function(average_coph, squeezed_var, replicates) {
}
normalize_trees <- function(coph_dist) {
# tree_cophs is a list of 'dist' objects as returned by the function cophenetic
# Find the minimum value for each tree and store in all_minima
all_minima <- sapply(coph_dist, min)
# Subtract all_minima from each tree in coph_dist and store in out
out <- mapply("-", coph_dist, all_minima, SIMPLIFY = FALSE)
# Find the maximum value for each tree and store in all_maxima
all_maxima <- sapply(out, max)
# Divide each distance by all_maxima for each tree and store in out
out <- mapply("/", out, all_maxima, SIMPLIFY = FALSE)
# Return coph_dist normalized
return(out)
}
......@@ -2,9 +2,15 @@
% Please edit documentation in R/treediff.R
\name{treediff}
\alias{treediff}
\alias{summary.treediff}
\alias{print.treeTest}
\title{Treediff}
\usage{
treediff(trees1, trees2, replicates)
treediff(trees1, trees2, replicates, scale = FALSE, order_labels = FALSE)
\method{print}{treeTest}(x, ...)
\method{summary}{treediff}(object, ...)
}
\arguments{
\item{trees1}{A list of trees corresponding to the first condition (set).
......@@ -19,6 +25,12 @@ different from that of \code{trees1}.}
\item{replicates}{A numeric vector of length 2 with the number of replicates
for each condition.}
\item{scale}{Logical. If \code{TRUE}, the trees are all rescalled to have a
minimum height equal to 0 and a maximum height equal to 1.}
\item{order_labels}{Logical. If \code{TRUE}, take a reference tree and keep
track of leave order.}
}
\value{
An object of class \code{treeTest} with the following entries:
......@@ -84,7 +96,7 @@ structure between families of trees. \emph{Preprint submitted for
publication}.
}
\author{
Gwendaelle Cardenac\cr
Gwendaëlle Cardenas\cr
Marie Chavent \email{marie.chavent@u-bordeaux.fr}\cr
Sylvain Foissac \email{sylvain.foissac@inrae.fr}\cr
Pierre Neuvial \email{pierre.neuvial@math.univ-toulouse.fr}\cr
......
library("testthat")
library("treediff")
test_check("treediff")
\ No newline at end of file
# 4 clusters
leaves <- c(100,120,50,80)
trees <- sapply(leaves, FUN = function(l){
base_data <- matrix(rnorm(2000), nrow = l, ncol = 200)
# Generate 4 trees from trees1 and trees from trees2 for each number of leaves
trees <- sapply(leaves, FUN = function(leaf){
base_data <- matrix(rnorm(2000), nrow = leaf, ncol = 200)
set1 <- replicate(4, sample(1:100, 50, replace = FALSE))
set2 <- replicate(6, sample(101:200, 50, replace = FALSE))
# Compute hierarchical clustering for each tree in set1
trees1 <- apply(set1, 2, function(asample) {
samples <- base_data[, asample]
out <- hclust(dist(samples), method = "ward.D2")
return(out)
})
# Compute hierarchical clustering for each tree in set2
trees2 <- apply(set2, 2, function(asample) {
samples <- base_data[, asample]
out <- hclust(dist(samples), method = "ward.D2")
......@@ -20,15 +24,109 @@ trees <- sapply(leaves, FUN = function(l){
return(list("trees1" = trees1, "trees2" = trees2))
})
# Unlist the trees
trees1 <- unlist(trees[1,], recursive = FALSE)
trees2 <- unlist(trees[2,], recursive = FALSE)
# Set the number of replicates
replicates = c(4, 6)
test_that("'treediff' works for simple cases", {
# Call the function to be tested
res <- treediff(trees1, trees2, replicates)
# Check the output object has the expected names
expect_named(res, c("method", "data.name", "p.value",
"statistic", "p.value.indiv"))
expect_equal(length(res$statistic), length(res$p.value.indiv))
# Check the output object has the expected class
expect_s3_class(res, "treeTest")
# Check that `p.value` is a numeric value
expect_is(res$p.value, "numeric")
# Check that `statistic` is a numeric value
expect_is(res$statistic, "numeric")
# Check that `p.value.indiv` is a numeric value
expect_is(res$p.value.indiv, "numeric")
# Check that `method` is a character string
expect_is(res$method, "character")
# Check that `data.name` is a character string
expect_is(res$data.name, "character")
# Check the length of `statistic` matches `p.value.indiv`
expect_length(res$statistic, length(res$p.value.indiv))
# Check the length of `p-value` matches the number of clusters
expect_length(res$p.value, 4)
# Check the length `statistic` matches the expected number of pairwise
# comparisons between the clusters
expect_length(res$statistic, sum(choose(leaves, 2)))
})
## Test errors
test_that("Test errors", {
# Test if replicates is numeric vector
expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = "abc"),
"`replicates` is not a numeric vector")
# Test if replicates is a vector of length 2
expect_error(treediff(trees1 = trees1, trees2 = trees2, replicates = c(1,2,3)),
"`replicates` must be a vector of length 2.")
# Test if output is a list with class treeTest
out <- treediff(trees1, trees2, c(4,6))
expect_true(inherits(out, "treeTest"))
# Test if the number of leaves in each cluster is the same between the two
# sets of trees
trees2[[5]]$order <- c(trees2[[5]]$order, 101)
expect_error(treediff(trees1, trees2, c(4, 6)), "the number of leaves in one or more clusters is different between the two sets of trees.")
})
# compute_squeeze
test_that("Test for compute_squeeze", {
# Create a test data matrix
test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20)
replicates <- c(10,10)
# Call the function
res <- compute_squeeze(test_matrix, replicates)
# Compare the result with the expected value
expect_named(res, c("average_coph", "squeezed_var"))
expect_is(res, "list")
expect_is(res$average_coph, "data.frame")
expect_is(res$squeezed_var, "list")
expect_is(res$squeezed_var$var.post, "numeric")
})
# compute_pvalue
test_that("Test for compute_pvalue", {
# Set up example inputs
test_matrix <- matrix(rnorm(24500), nrow = 1225, ncol = 20)
replicates <- c(10,10)
# Calculate the average and variance of the test matrix
res <- compute_squeeze(test_matrix, replicates)
# Call the function
pvals <- compute_pvalue(res$average_coph, res$squeezed_var, replicates)
expect_named(pvals, c("statistics", "p.value", "cluster", "p"))
# Check that the output is a data frame
expect_is(pvals, "data.frame")
expect_length(pvals$p.value, 1225)
expect_length(pvals, 4)
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment