Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
treediff
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
A compter du 1er avril, attention à vos pipelines :
Nouvelles limitations de Docker Hub
Show more breadcrumbs
SCALES
treediff
Commits
b14fdc1a
Commit
b14fdc1a
authored
2 years ago
by
CARDENAS GWENDAELLE
Browse files
Options
Downloads
Patches
Plain Diff
comment corrections
parent
4691eaae
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
R/treediff.R
+47
-47
47 additions, 47 deletions
R/treediff.R
man/treediff.Rd
+17
-7
17 additions, 7 deletions
man/treediff.Rd
with
64 additions
and
54 deletions
R/treediff.R
+
47
−
47
View file @
b14fdc1a
#' Treediff
#'
@title
Treediff
#'
#'
The treediff function calculates the difference between two
trees.
#'
@description Comparison of two sets of
trees.
#'
#' @param trees1 A list of trees.
#' @details This function compares two sets of trees using a p-value aggregation
#' method. The p-values are obtained by performing a student test on the averaged
#' cophenetic distances of the two sets of trees.
#'
#' @param trees1 A list of trees .
#' @param trees2 A list of trees.
#' @param replicats A vector of the number of replicates for each group.
#' @param replicates A vector of the number of replicates for each group.
#'
#' @return A list with class "htest":
#' \item{p.value}{the p-value for the tree test.}
#' \item{statistic}{the value of the statistic of each leaf peer of the tree test.}
#' \item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
#' \item{method}{a character string indicating what type of test was performed.}
#' \item{data.name}{a character string giving the names of the tree subset}
#'
#' @author
#'
#' @references
#'
#' @return A data frame with statistics and p-values.
#' @export
#'
#' @importFrom dplyr %>%
...
...
@@ -32,29 +46,23 @@
#' trees1 <- trees[1:16]
#' trees2 <- trees[17:40]
#'
#' replicats <- c(4,6)
#' replicat
e
s <- c(4,6)
#'
#' tree_pvals <- treediff(trees1, trees2, replicats)
#' tree_pvals <- treediff(trees1, trees2, replicat
e
s)
#' tree_pvals$p.value
treediff
<-
function
(
trees1
,
trees2
,
replicats
){
# Check if the length of replicats is 2
if
(
length
(
replicats
)
!=
2
){
stop
(
"Incorrect replicats size, `replicats` must be a vector of 2 elements"
)
treediff
<-
function
(
trees1
,
trees2
,
replicates
){
# Check if the length of replicates is 2
if
(
length
(
replicates
)
!=
2
){
stop
(
"Incorrect replicates size, `replicates` must be a vector of 2 elements"
)
}
# Check if the number of clusters is equal for both conditions
if
(
length
(
trees1
)
/
replicats
[
1
]
!=
length
(
trees2
)
/
replicats
[
2
]){
stop
(
"The number of clusters is different between conditions or `replicats' is uncorrect"
)
if
(
length
(
trees1
)
/
replicat
e
s
[
1
]
!=
length
(
trees2
)
/
replicat
e
s
[
2
]){
stop
(
"The number of clusters is different between conditions or `replicat
e
s' is uncorrect"
)
}
# # Extract the number of pair of the trees
# tree_order <- lapply(trees1, "[[", "order")
# leaves <- sapply(tree_order, length)
#
# # Get unique values of the number of leaves
# leaves <- unique(leaves)
# Merge trees from both conditions
trees
<-
c
(
trees1
,
trees2
)
...
...
@@ -67,31 +75,23 @@ treediff <- function(trees1, trees2, replicats){
return
(
adist
[
upper.tri
(
adist
)])
})
# # Fill the cophenetic vector with NA if the number of leaves is different from the max.
# if (length(leaves) != 1){
# len_max <- choose(max(leaves), 2)
#
# coph_vect <- sapply(coph_vect, FUN = function(col) {
# c(col, rep(NA, len_max - length(col)))
# })
# }
# Compute squeeze factor
outs
<-
compute_squeeze
(
coph_vect
,
replicats
)
outs
<-
compute_squeeze
(
coph_vect
,
replicat
e
s
)
# Compute p-values
outp
<-
compute_pvalue
(
outs
$
average_coph
,
outs
$
squeezed_var
,
replicats
)
outp
<-
compute_pvalue
(
outs
$
average_coph
,
outs
$
squeezed_var
,
replicat
e
s
)
# Aggregate p-values
out_aggr
<-
outp
%>%
group_by
(
cluster
)
%>%
summarise
(
"p.value"
=
min
(
sort
(
p.value
)
/
(
1
:
p
))
*
p
)
%>%
unique
()
summarise
(
"p.value"
=
min
(
sort
(
p.value
)
/
(
1
:
p
))
*
p
)
%>%
unique
()
# Store results in a list
out
<-
list
(
"method"
=
"Tree test based on t-test"
,
"data.name"
=
paste
(
substitute
(
trees1
),
"and"
,
substitute
(
trees2
)),
"p.value"
=
out_aggr
$
p.value
,
"res"
=
list
(
"statistic"
=
outp
$
statistics
,
"p.value.indiv"
=
outp
$
p.value
)
)
"statistic"
=
outp
$
statistics
,
"p.value.indiv"
=
outp
$
p.value
)
# Assign class to the list
class
(
out
)
<-
"htest"
...
...
@@ -100,9 +100,9 @@ treediff <- function(trees1, trees2, replicats){
return
(
out
)
}
compute_squeeze
<-
function
(
dist_coph
,
replicats
){
compute_squeeze
<-
function
(
dist_coph
,
replicat
e
s
){
if
(
clas
s
(
dist_coph
)
==
"list"
){
if
(
inherit
s
(
dist_coph
,
"list"
)
==
TRUE
)
{
max_pr
<-
as.numeric
(
max
(
summary
(
dist_coph
)[,
1
]))
dist_coph
<-
sapply
(
dist_coph
,
FUN
=
function
(
col
)
{
c
(
col
,
rep
(
NA
,
max_pr
-
length
(
col
)))
...
...
@@ -110,14 +110,14 @@ compute_squeeze <- function(dist_coph, replicats){
}
# Calculate number of clusters
nb_cluster
<-
ncol
(
dist_coph
)
/
sum
(
replicats
)
nb_cluster
<-
ncol
(
dist_coph
)
/
sum
(
replicat
e
s
)
groups1
<-
rep
(
1
:
nb_cluster
,
each
=
replicats
[
1
])
groups2
<-
rep
(
1
:
nb_cluster
,
each
=
replicats
[
2
])
groups1
<-
rep
(
1
:
nb_cluster
,
each
=
replicat
e
s
[
1
])
groups2
<-
rep
(
1
:
nb_cluster
,
each
=
replicat
e
s
[
2
])
# Store replicats values for each group
n1
<-
replicats
[
1
]
n2
<-
replicats
[
2
]
# Store replicat
e
s values for each group
n1
<-
replicat
e
s
[
1
]
n2
<-
replicat
e
s
[
2
]
# Indices for each group
col1
<-
1
:
length
(
groups1
)
...
...
@@ -143,7 +143,7 @@ compute_squeeze <- function(dist_coph, replicats){
average_coph_woNA
<-
na.omit
(
average_coph
)
# Calculate variance
sq_average_coph
<-
sweep
(
average_coph_woNA
[
-3
]
^
2
,
2
,
replicats
,
"*"
)
sq_average_coph
<-
sweep
(
average_coph_woNA
[
-3
]
^
2
,
2
,
replicat
e
s
,
"*"
)
# Sum of squared values for each group
sum_sq_coph_trees1
<-
unlist
(
by
(
t
(
dist_coph
[,
col1
])
^
2
,
groups1
,
colSums
))
...
...
@@ -162,7 +162,7 @@ compute_squeeze <- function(dist_coph, replicats){
"squeezed_var"
=
squeezed_var
))
}
compute_pvalue
<-
function
(
average_coph
,
squeezed_var
,
replicats
){
compute_pvalue
<-
function
(
average_coph
,
squeezed_var
,
replicat
e
s
){
# Extract the cluster information from the average_coph data frame
cluster
<-
average_coph
$
cluster
...
...
@@ -170,9 +170,9 @@ compute_pvalue <- function(average_coph, squeezed_var, replicats){
# Calculate the numerator for the t-statistic
numerator
<-
Reduce
(
"-"
,
average_coph
[,
-3
])
# Store replicats values for each group
n1
<-
replicats
[
1
]
n2
<-
replicats
[
2
]
# Store replicat
e
s values for each group
n1
<-
replicat
e
s
[
1
]
n2
<-
replicat
e
s
[
2
]
# Calculate the denominator for the t-statistic
denominator
<-
squeezed_var
$
var.post
*
(
n1
+
n2
)
/
(
n1
*
n2
)
...
...
This diff is collapsed.
Click to expand it.
man/treediff.Rd
+
17
−
7
View file @
b14fdc1a
...
...
@@ -4,20 +4,30 @@
\alias{treediff}
\title{Treediff}
\usage{
treediff(trees1, trees2, replicats)
treediff(trees1, trees2, replicat
e
s)
}
\arguments{
\item{trees1}{A list of trees.}
\item{trees1}{A list of trees
.}
\item{trees2}{A list of trees.}
\item{replicats}{A vector of the number of replicates for each group.}
\item{replicat
e
s}{A vector of the number of replicates for each group.}
}
\value{
A data frame with statistics and p-values.
A list with class "htest":
\item{p.value}{the p-value for the tree test.}
\item{statistic}{the value of the statistic of each leaf peer of the tree test.}
\item{p.value.indiv}{the p-value for each leaf peer of the tree test.}
\item{method}{a character string indicating what type of test was performed.}
\item{data.name}{a character string giving the names of the tree subset}
}
\description{
The treediff function calculates the difference between two trees.
Comparison of two sets of trees.
}
\details{
This function compares two sets of trees using a p-value aggregation
method. The p-values are obtained by performing a student test on the averaged
cophenetic distances of the two sets of trees.
}
\examples{
...
...
@@ -34,8 +44,8 @@ return(out)
trees1 <- trees[1:16]
trees2 <- trees[17:40]
replicats <- c(4,6)
replicat
e
s <- c(4,6)
tree_pvals <- treediff(trees1, trees2, replicats)
tree_pvals <- treediff(trees1, trees2, replicat
e
s)
tree_pvals$p.value
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment