From ea80e69bf7f668a55270261dbd387e2066fd13d9 Mon Sep 17 00:00:00 2001
From: gcardenas <gwendaelle.cardenas@inrae.fr>
Date: Wed, 3 May 2023 17:02:10 +0200
Subject: [PATCH] add tests

---
 R/HiC2Tree.R                   |  12 ++--
 man/HiC2Tree.Rd                |  26 +++++++
 man/HiCDOCDataSet.Rd           |  26 +++++++
 man/clusterTree.Rd             |  16 +++++
 man/normalizeCount.Rd          |  15 +++++
 tests/testthat/test-HiC2Tree.R | 119 ++++++++++++++++++++++++---------
 6 files changed, 176 insertions(+), 38 deletions(-)

diff --git a/R/HiC2Tree.R b/R/HiC2Tree.R
index 3dc738e..b89e188 100644
--- a/R/HiC2Tree.R
+++ b/R/HiC2Tree.R
@@ -26,7 +26,7 @@
 #'   \item{testRes}{ A list of treediff results for each cluster.}
 #' }
 #'
-#' @example
+#' @examples
 #' dd <- "../data/"
 #' replicat <- 1:3
 #' cond <- c("90", "110")
@@ -130,7 +130,7 @@ HiC2Tree <- function(files, format, binsize = NULL, index = NULL,
 #' @importFrom purrr reduce
 #' @importFrom SummarizedExperiment assay
 #'
-#' @example
+#' @examples
 #' dd <- "../data/"
 #' replicat <- 1:3
 #' cond <- c("90", "110")
@@ -154,7 +154,7 @@ HiC2Tree <- function(files, format, binsize = NULL, index = NULL,
 #' files <- unlist(all_mat_chr)
 #' replicates <- c(3,3)
 #'
-#' DataFrameSet(files, format, binsize, chromosomes, index)
+#' HiCDOCDataSet(files, format, binsize, chromosomes, index)
 #'
 #' @export
 
@@ -261,7 +261,7 @@ HiCDOCDataSet <- function(files, format, binsize = NULL, chromosomes,
 #'
 #' @export
 #'
-#' @example
+#' @examples
 #' nb_row <- 120
 #' chromosome <- rep(1, nb_row)
 #' index1 <- sample(1:100, nb_row, replace = TRUE)
@@ -391,7 +391,7 @@ normalizeCount.HiCDOCDataSetList <- function(count_matrice){
 #'
 #' @export
 #'
-#' @example
+#' @examples
 #' n <- 5
 #'
 #' matrice <- matrix(runif(n*n), nrow = n, ncol = n)
@@ -403,7 +403,7 @@ normalizeCount.HiCDOCDataSetList <- function(count_matrice){
 #' index1 <- sample(1:100, n, replace = TRUE)
 #' index2 <- sample(1:100, n, replace = TRUE)
 #'
-#' mat <- cbind(chromosome, index1, index2, matrice_symetrique)
+#' mat <- cbind(chromosome, index1, index2, matrice)
 #'
 #' res <- clusterTree(mat)
 clusterTree <- function(mat){
diff --git a/man/HiC2Tree.Rd b/man/HiC2Tree.Rd
index e3bade9..20439c0 100644
--- a/man/HiC2Tree.Rd
+++ b/man/HiC2Tree.Rd
@@ -40,3 +40,29 @@ data, the bins size of the Hi-C array, the chromosomes to be included in the
 analysis, and the number of replicates. It returns a list containing all
 trees, metadata, index and treediff results.
 }
+\examples{
+dd <- "../data/"
+replicat <- 1:3
+cond <- c("90", "110")
+
+all_begins <- interaction(expand.grid(replicat, cond), sep = "-")
+all_begins <- as.character(all_begins)
+
+nb_chr <- 2
+chromosomes <- 1:nb_chr
+all_mat_chr <- lapply(chromosomes, function(chr){
+  all_mat <- lapply(all_begins, function(ab) {
+    mat_file <- paste0(dd, "Rep", ab, "-chr", chr, "_200000.bed")
+  })
+  all_mat <- unlist(all_mat)
+})
+
+index <- paste0(dd, "index.200000.longest18chr.abs.bed")
+format <- rep(rep("HiC-Pro", 6), nb_chr)
+binsize <- 200000
+files <- unlist(all_mat_chr)
+replicates <- c(3,3)
+
+HiC2Tree(files, format, binsize, index, chromosomes, replicates)
+
+}
diff --git a/man/HiCDOCDataSet.Rd b/man/HiCDOCDataSet.Rd
index b9fd020..854eaf1 100644
--- a/man/HiCDOCDataSet.Rd
+++ b/man/HiCDOCDataSet.Rd
@@ -38,3 +38,29 @@ This function creates a count matrix from a set of files
 in different formats, such as tabular, cooler, juicer or HiC-Pro. It returns
 a list of interaction matrices.
 }
+\examples{
+dd <- "../data/"
+replicat <- 1:3
+cond <- c("90", "110")
+
+all_begins <- interaction(expand.grid(replicat, cond), sep = "-")
+all_begins <- as.character(all_begins)
+
+nb_chr <- 2
+chromosomes <- 1:nb_chr
+all_mat_chr <- lapply(chromosomes, function(chr) {
+  all_mat <- lapply(all_begins, function(ab) {
+    mat_file <- paste0(dd, "Rep", ab, "-chr", chr, "_200000.bed")
+  })
+  all_mat <- unlist(all_mat)
+})
+
+index <- paste0(dd, "index.200000.longest18chr.abs.bed")
+format <- rep(rep("HiC-Pro", 6), nb_chr)
+binsize <- 200000
+files <- unlist(all_mat_chr)
+replicates <- c(3,3)
+
+HiCDOCDataSet(files, format, binsize, chromosomes, index)
+
+}
diff --git a/man/clusterTree.Rd b/man/clusterTree.Rd
index 80fa7e2..47b6d15 100644
--- a/man/clusterTree.Rd
+++ b/man/clusterTree.Rd
@@ -25,3 +25,19 @@ This function creates a hierarchical clustering tree for each
 group in a given matrix. The function breaks the chromosome into clusters
 using the "broken stick" method and then converts the clusters into trees.
 }
+\examples{
+n <- 5
+
+matrice <- matrix(runif(n*n), nrow = n, ncol = n)
+matrice[lower.tri(matrice)] <- t(matrice)[lower.tri(matrice)]
+matrice <- as.data.frame(matrice)
+names(matrice) <- c("mat_1", "mat_2", "mat_3", "mat_4", "mat_5")
+
+chromosome <- rep(1, n)
+index1 <- sample(1:100, n, replace = TRUE)
+index2 <- sample(1:100, n, replace = TRUE)
+
+mat <- cbind(chromosome, index1, index2, matrice)
+
+res <- clusterTree(mat)
+}
diff --git a/man/normalizeCount.Rd b/man/normalizeCount.Rd
index 1e2453a..63ac28a 100644
--- a/man/normalizeCount.Rd
+++ b/man/normalizeCount.Rd
@@ -18,3 +18,18 @@ normalizeCount(count_matrice)
 This function normalizes the count matrix using loess
 regression.
 }
+\examples{
+nb_row <- 120
+chromosome <- rep(1, nb_row)
+index1 <- sample(1:100, nb_row, replace = TRUE)
+index2 <- sample(1:100, nb_row, replace = TRUE)
+
+m <- data.frame("mat_1" =  sample(1:500, nb_row, replace = TRUE),
+                "mat_2" =  sample(1:500, nb_row, replace = TRUE),
+                "mat_3" =  sample(1:500, nb_row, replace = TRUE),
+                "mat_4" =  sample(1:500, nb_row, replace = TRUE))
+
+mat <- cbind(chromosome, index1, index2, m)
+
+normalizeCount(mat)
+}
diff --git a/tests/testthat/test-HiC2Tree.R b/tests/testthat/test-HiC2Tree.R
index 10f81d8..f72c9b8 100644
--- a/tests/testthat/test-HiC2Tree.R
+++ b/tests/testthat/test-HiC2Tree.R
@@ -1,4 +1,4 @@
-dd <- "path"
+dd <- "../../data"
 replicat <- 1:3
 cond <- c("90", "110")
 
@@ -54,58 +54,56 @@ test_that("Test errors", {
 
   files_test <- NULL
   # Test if `files` and `format` is correctly filled in
-  expect_error(HiC2Tree(files_test, format, binsize, index, chromosomes),
-               "`files` or `format` is incorrectly filled in.")
+  expect_error(HiC2Tree(files_test, format, binsize, index, chromosomes,
+  replicates),"`files`, `chromosomes` or `replicates` is incorrectly filled in.")
 
   binsize_test <- "test"
   # Test if `binzise` is a numeric vector
-  expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes),
-               "`binsize` is not a numeric vector.")
+  expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes,
+                        replicates), "`binsize` is not a numeric vector.")
 
   format_test <- format
   format_test[4] <- "test"
 
   # Test if `format` is a correctly filled in
-  expect_error(HiC2Tree(files, format_test, binsize, index, chromosomes),
-               "The `format` vector is incorrectly filled in.")
+  expect_error(HiC2Tree(files, format_test, binsize, index, chromosomes,
+                  replicates), "The `format` vector is incorrectly filled in.")
 
   index_test <- NULL
   # Test if `index` is not empty
-  expect_error(HiC2Tree(files, format, binsize, index_test, chromosomes),
-               "`index` is empty.")
+  expect_error(HiC2Tree(files, format, binsize, index_test, chromosomes,
+                        replicates), "`index` is empty.")
 
   binsize_test <- NULL
   # Test if `binsize` is not empty
-  expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes),
-               "`binsize` is empty.")
+  expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes,
+                        replicates), "`binsize` is empty.")
 })
 
 data <- HiCDOCDataSet(files, format, binsize, chromosomes, index)
-norm_count <- lapply(data$interactionMat, normalizeCount)
-resTree <- lapply(norm_count, clusterTree)
-
-trees1_1 <- lapply(seq_along(resTree[[1]]), function(i){
-  list(resTree[[1]][[i]]$mat_1, resTree[[1]][[i]]$mat_2, resTree[[1]][[i]]$mat_3)
-})
-trees1_2 <- lapply(seq_along(resTree[[2]]),function(i){
-  list(resTree[[2]][[i]]$mat_7, resTree[[2]][[i]]$mat_8, resTree[[2]][[i]]$mat_9)
-})
-trees1 <- unlist(c(trees1_1, trees1_2), recursive = FALSE)
-
-trees2_1 <- lapply(seq_along(resTree[[1]]), function(i){
-  list(resTree[[1]][[i]]$mat_4, resTree[[1]][[i]]$mat_5, resTree[[1]][[i]]$mat_6)
-})
-trees2_2 <- lapply(seq_along(resTree[[2]]),function(i){
-  list(resTree[[2]][[i]]$mat_10, resTree[[2]][[i]]$mat_11, resTree[[2]][[i]]$mat_12)
-})
-trees2 <- unlist(c(trees2_1, trees2_2), recursive = FALSE)
+norm_count <- normalizeCount(data$HiCDOCDataSet)
+resTree <- clusterTree(norm_count)
+tail(resTree$metadata)
+
+trees1 <- resTree$trees[which(resTree$metadata$mat == "mat_1" |
+                              resTree$metadata$mat == "mat_2" |
+                              resTree$metadata$mat == "mat_3" |
+                              resTree$metadata$mat == "mat_7" |
+                              resTree$metadata$mat == "mat_8" |
+                              resTree$metadata$mat ==  "mat_9")]
+trees2 <- resTree$trees[which(resTree$metadata$mat == "mat_4" |
+                              resTree$metadata$mat == "mat_5" |
+                              resTree$metadata$mat == "mat_6" |
+                              resTree$metadata$mat == "mat_10" |
+                              resTree$metadata$mat == "mat_11" |
+                              resTree$metadata$mat == "mat_12")]
 
 resTest <- treediff(trees1, trees2, replicates)
 
-test_that("'HiCDOCDataSet' works for simple cases", {
+test_that("Comparison HiC2Tree and sub-functions works for simple cases", {
 
   # Check the output object has the expected names
-  expect_named(data, c("interactionMat", "indexData", "index_mat_chr"))
+  expect_named(data, c("HiCDOCDataSet", "indexData", "index_mat_chr"))
 
   # Check that the column names of the `norm_count` data frame are as expected
   expect_equal(colnames(norm_count[[1]]), c("chromosome", "index1", "index2",
@@ -119,7 +117,7 @@ test_that("'HiCDOCDataSet' works for simple cases", {
 
   # Check that the `mat_1` element of the first element of `resTree` is an
   # "hclust" object
-  expect_is(resTree[[1]][[1]]$mat_1, "hclust")
+  expect_is(resTree$trees$tree1, "hclust")
 
   # Check that `resTest` is of class "treeTest"
   expect_s3_class(resTest, "treeTest")
@@ -131,3 +129,60 @@ test_that("'HiCDOCDataSet' works for simple cases", {
   # rows in `res$metadata`
   expect_length(c(trees1, trees2), nrow(res$metadata))
 })
+
+test_that("HiCDOCDataSet function", {
+
+  # Check the output object has the expected names
+  expect_named(data, c("HiCDOCDataSet", "indexData", "index_mat_chr"))
+
+  # Check the output object has the expected class
+  expect_s3_class(data$HiCDOCDataSet, "HiCDOCDataSetList")
+  expect_s3_class(data$indexData, "data.frame")
+  expect_s3_class(data$index_mat_chr, "data.frame")
+})
+
+test_that("normalizeCount function", {
+
+  # Check the output object has the expected class
+  expect_type(norm_count, "list")
+  expect_length(norm_count, length(chromosomes))
+})
+
+test_that("clusterTree function", {
+
+  # Check the output object has the expected class
+  expect_type(resTree, "list")
+
+  # Check the output object has the expected names
+  expect_named(resTree, c("trees", "metadata"))
+  expect_length(resTree$trees, nrow(resTree$metadata))
+})
+
+test_that("create_cluster function", {
+
+  res <- create_cluster(norm_count[[1]])
+  # Check the output object has the expected class
+  expect_type(res, "list")
+
+  cluster <- unique(res)
+  expect_true(length(cluster$merged_clust) > 1)
+})
+
+test_that("create_trees function", {
+
+  cluster <- create_cluster(norm_count[[1]])
+
+  # Get the indices of the rows and columns in the cluster
+  selected <- rownames(cluster)[cluster == 1]
+
+  # Select the sub-matrix
+  red_mat <- ((norm_count[[1]]$index1 %in% selected) & (norm_count[[1]]$index2 %in% selected))
+  red_mat <- norm_count[[1]][red_mat, ]
+
+  res <- create_trees(red_mat, selected)
+  # Check the output object has the expected class
+  expect_type(res, "list")
+  expect_s3_class(res$mat_1, "hclust")
+
+  expect_length(selected, length(res$mat_1$order))
+})
-- 
GitLab