initial commit

e3c33a93 · CARDENAS GWENDAELLE · 06643f58 · e3c33a93 · e3c33a93 · e3c33a93
Commit e3c33a93 authored 2 years ago by CARDENAS GWENDAELLE
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: treediff
-Type: Package
-Title: What the package does (short line)
-Version: 1.0
-Date: 2020-08-12
-Author: Who wrote it
-Maintainer: Who to complain to <yourfault@somewhere.net>
-Description: More about what it does (maybe more than one line)
-License: What license is it under?
+Title: What the Package Does (One Line, Title Case)
+Version: 0.0.0.9000
+Authors@R: 
+    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
+           comment = c(ORCID = "YOUR-ORCID-ID"))
+Description: What the package does (one paragraph).
+Imports: dplyr, limma, stats
+License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
+    license
 Encoding: UTF-8
-Imports: ClusterR, mclust
-RoxygenNote: 7.1.1
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
 Suggests: 
-    testthat
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
--- a/NAMESPACE
+++ b/NAMESPACE
 # Generated by roxygen2: do not edit by hand

-export(compute_stats)
-export(fisher_mix)
 export(treediff)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,group_by)
+importFrom(dplyr,summarise)
 importFrom(limma,squeezeVar)
 importFrom(stats,cophenetic)
-importFrom(stats,punif)
-importFrom(stats,rf)
+importFrom(stats,na.omit)
+importFrom(stats,pt)
--- a/man/treediff.Rd
+++ b/man/treediff.Rd
@@ -2,97 +2,40 @@
 % Please edit documentation in R/treediff.R
 \name{treediff}
 \alias{treediff}
-\title{Titre}
+\title{Treediff}
 \usage{
-treediff(trees1, trees2, scale = TRUE, threshold.p = 0.1, nsim = 10000)
+treediff(trees1, trees2, replicats)
 }
 \arguments{
-\item{trees1}{List of trees (\code{\link[stats]{hclust}} types) of the first 
-condition}
+\item{trees1}{A list of trees.}

-\item{trees2}{list of trees (\code{\link[stats]{hclust}} types) of the second
-condition}
+\item{trees2}{A list of trees.}

-\item{scale}{Logical. If \code{TRUE} (the default value), the trees are all
-rescalled to have a minimum height equal to 0 (translation) and a maximum
-height equal to 1 (dilatation)}
-
-\item{threshold.p}{proportion threshold for the percentage of cophenetic 
-distance of a given entry across trees above which the entry is not 
-considered. Default to \code{0.1}}
-
-\item{nsim}{number of simulations performed for the estimation of the 
-empirical distribution. Default to \code{10000}}
+\item{replicats}{A vector of the number of replicates for each group.}
 }
 \value{
-A list with classes \code{htest} and \code{tree_test} containing the
-following components: \describe{ ## TODO
-  \item{\code{statistics}}{ the value of the observed statistic (Hotelling 
-  based statistics).}
-  \item{\code{parameter}}{ the rank of the observed statistic.}
-  \item{\code{p.value}}{ the pseudo p-value of the test.}
-  \item{\code{p.value_Simes}}{the p-value of the test obtained by Simes aggregation 
-  of univariate p-values}
-  \item{\code{alternative}}{ a character string describing the alternative 
-  hypothesis (always \code{"greater"} here).}
-  \item{\code{method}}{ a character string giving the method used.}
-  \item{\code{data.name}}{ a character string giving the name(s) of the data,
-  and the number of simulations.}
-  \item{\code{res}}{ a list with components \code{simulations} (\code{nsim}
-  simulated values of statistic, final value is observed statistic),
-  \code{cophenetics} (the cophenetic distances of the input trees),
-  \code{truncated_cophenetics} (the selected cophenetic distances after 
-  truncation) and \code{truncated_indices} (the indices of the entries 
-  corresponding to selected cophenetic distances after truncation),
-  \code{p} (the effective number of pairs of leaves after filtering),
-  \code{d0} (the additional degrees of freedom for the moderated test statistic),
-  \code{d} (the initial degrees of freedom for the moderated test statistic),
-  \code{Tk} (the vector of p test statistics before aggregation),
-  \code{pk} (the  vector of p p-values corresponding to Tk)}
-}
+A data frame with statistics and p-values.
 }
 \description{
-Description
-}
-\details{
-Long description (several lines)
-}
-\note{
-The returned p-value is based on the rank of the observed statistics
-when compared to all simulated values obtained from the theoretical 
-distribution. For \code{nsim = 99}, for instance, it can not be smaller than
-0.01.
+The treediff function calculates the difference between two trees.
 }
 \examples{
- 
-set.seed(12081238) # for reproducibility
-# data: generated from a normal distribution with nothing special
+
 base_data <- matrix(rnorm(2000), nrow = 100, ncol = 200)

-# then create two groups by simply subsampling the columns differently
-group1 <- replicate(20, sample(1:100, 50, replace = FALSE))
-group2 <- replicate(20, sample(101:200, 50, replace = FALSE))
-conditions <- factor(rep(c(1, 2), each = 20))
+group1 <- replicate(16, sample(1:100, 50, replace = FALSE))
+group2 <- replicate(24, sample(101:200, 50, replace = FALSE))

-# obtain hclust for the two groups
 trees <- apply(cbind(group1, group2), 2, function(asample) {
-samples <- base_data[ ,asample]
+  samples <- base_data[ ,asample]
  out <- hclust(dist(samples), method = "ward.D2")
-    return(out)
+return(out)
 })
+trees1 <- trees[1:16]
+trees2 <- trees[17:40]

-trees1 <- trees[1:20]
-trees2 <- trees[21:40]
-treediff(trees1, trees2, nsim = 1e5)$p.value
+replicats <- c(4,6)

-# with a weaker threshold
-treediff(trees1, trees2, threshold.p = 0.2, nsim = 1e5)
-}
-\references{
-TODO
-}
-\seealso{
-\code{\link{fisher_mix}} to obtain the empirical distribution under 
-the null hypothesis and \code{\link{compute_stats}} for intermediate 
-computation of statistics
+tree_pvals <- treediff(trees1, trees2, replicats)
+tree_pvals$p.value
 }