From ea80e69bf7f668a55270261dbd387e2066fd13d9 Mon Sep 17 00:00:00 2001 From: gcardenas <gwendaelle.cardenas@inrae.fr> Date: Wed, 3 May 2023 17:02:10 +0200 Subject: [PATCH] add tests --- R/HiC2Tree.R | 12 ++-- man/HiC2Tree.Rd | 26 +++++++ man/HiCDOCDataSet.Rd | 26 +++++++ man/clusterTree.Rd | 16 +++++ man/normalizeCount.Rd | 15 +++++ tests/testthat/test-HiC2Tree.R | 119 ++++++++++++++++++++++++--------- 6 files changed, 176 insertions(+), 38 deletions(-) diff --git a/R/HiC2Tree.R b/R/HiC2Tree.R index 3dc738e..b89e188 100644 --- a/R/HiC2Tree.R +++ b/R/HiC2Tree.R @@ -26,7 +26,7 @@ #' \item{testRes}{ A list of treediff results for each cluster.} #' } #' -#' @example +#' @examples #' dd <- "../data/" #' replicat <- 1:3 #' cond <- c("90", "110") @@ -130,7 +130,7 @@ HiC2Tree <- function(files, format, binsize = NULL, index = NULL, #' @importFrom purrr reduce #' @importFrom SummarizedExperiment assay #' -#' @example +#' @examples #' dd <- "../data/" #' replicat <- 1:3 #' cond <- c("90", "110") @@ -154,7 +154,7 @@ HiC2Tree <- function(files, format, binsize = NULL, index = NULL, #' files <- unlist(all_mat_chr) #' replicates <- c(3,3) #' -#' DataFrameSet(files, format, binsize, chromosomes, index) +#' HiCDOCDataSet(files, format, binsize, chromosomes, index) #' #' @export @@ -261,7 +261,7 @@ HiCDOCDataSet <- function(files, format, binsize = NULL, chromosomes, #' #' @export #' -#' @example +#' @examples #' nb_row <- 120 #' chromosome <- rep(1, nb_row) #' index1 <- sample(1:100, nb_row, replace = TRUE) @@ -391,7 +391,7 @@ normalizeCount.HiCDOCDataSetList <- function(count_matrice){ #' #' @export #' -#' @example +#' @examples #' n <- 5 #' #' matrice <- matrix(runif(n*n), nrow = n, ncol = n) @@ -403,7 +403,7 @@ normalizeCount.HiCDOCDataSetList <- function(count_matrice){ #' index1 <- sample(1:100, n, replace = TRUE) #' index2 <- sample(1:100, n, replace = TRUE) #' -#' mat <- cbind(chromosome, index1, index2, matrice_symetrique) +#' mat <- cbind(chromosome, index1, index2, matrice) #' #' res <- clusterTree(mat) clusterTree <- function(mat){ diff --git a/man/HiC2Tree.Rd b/man/HiC2Tree.Rd index e3bade9..20439c0 100644 --- a/man/HiC2Tree.Rd +++ b/man/HiC2Tree.Rd @@ -40,3 +40,29 @@ data, the bins size of the Hi-C array, the chromosomes to be included in the analysis, and the number of replicates. It returns a list containing all trees, metadata, index and treediff results. } +\examples{ +dd <- "../data/" +replicat <- 1:3 +cond <- c("90", "110") + +all_begins <- interaction(expand.grid(replicat, cond), sep = "-") +all_begins <- as.character(all_begins) + +nb_chr <- 2 +chromosomes <- 1:nb_chr +all_mat_chr <- lapply(chromosomes, function(chr){ + all_mat <- lapply(all_begins, function(ab) { + mat_file <- paste0(dd, "Rep", ab, "-chr", chr, "_200000.bed") + }) + all_mat <- unlist(all_mat) +}) + +index <- paste0(dd, "index.200000.longest18chr.abs.bed") +format <- rep(rep("HiC-Pro", 6), nb_chr) +binsize <- 200000 +files <- unlist(all_mat_chr) +replicates <- c(3,3) + +HiC2Tree(files, format, binsize, index, chromosomes, replicates) + +} diff --git a/man/HiCDOCDataSet.Rd b/man/HiCDOCDataSet.Rd index b9fd020..854eaf1 100644 --- a/man/HiCDOCDataSet.Rd +++ b/man/HiCDOCDataSet.Rd @@ -38,3 +38,29 @@ This function creates a count matrix from a set of files in different formats, such as tabular, cooler, juicer or HiC-Pro. It returns a list of interaction matrices. } +\examples{ +dd <- "../data/" +replicat <- 1:3 +cond <- c("90", "110") + +all_begins <- interaction(expand.grid(replicat, cond), sep = "-") +all_begins <- as.character(all_begins) + +nb_chr <- 2 +chromosomes <- 1:nb_chr +all_mat_chr <- lapply(chromosomes, function(chr) { + all_mat <- lapply(all_begins, function(ab) { + mat_file <- paste0(dd, "Rep", ab, "-chr", chr, "_200000.bed") + }) + all_mat <- unlist(all_mat) +}) + +index <- paste0(dd, "index.200000.longest18chr.abs.bed") +format <- rep(rep("HiC-Pro", 6), nb_chr) +binsize <- 200000 +files <- unlist(all_mat_chr) +replicates <- c(3,3) + +HiCDOCDataSet(files, format, binsize, chromosomes, index) + +} diff --git a/man/clusterTree.Rd b/man/clusterTree.Rd index 80fa7e2..47b6d15 100644 --- a/man/clusterTree.Rd +++ b/man/clusterTree.Rd @@ -25,3 +25,19 @@ This function creates a hierarchical clustering tree for each group in a given matrix. The function breaks the chromosome into clusters using the "broken stick" method and then converts the clusters into trees. } +\examples{ +n <- 5 + +matrice <- matrix(runif(n*n), nrow = n, ncol = n) +matrice[lower.tri(matrice)] <- t(matrice)[lower.tri(matrice)] +matrice <- as.data.frame(matrice) +names(matrice) <- c("mat_1", "mat_2", "mat_3", "mat_4", "mat_5") + +chromosome <- rep(1, n) +index1 <- sample(1:100, n, replace = TRUE) +index2 <- sample(1:100, n, replace = TRUE) + +mat <- cbind(chromosome, index1, index2, matrice) + +res <- clusterTree(mat) +} diff --git a/man/normalizeCount.Rd b/man/normalizeCount.Rd index 1e2453a..63ac28a 100644 --- a/man/normalizeCount.Rd +++ b/man/normalizeCount.Rd @@ -18,3 +18,18 @@ normalizeCount(count_matrice) This function normalizes the count matrix using loess regression. } +\examples{ +nb_row <- 120 +chromosome <- rep(1, nb_row) +index1 <- sample(1:100, nb_row, replace = TRUE) +index2 <- sample(1:100, nb_row, replace = TRUE) + +m <- data.frame("mat_1" = sample(1:500, nb_row, replace = TRUE), + "mat_2" = sample(1:500, nb_row, replace = TRUE), + "mat_3" = sample(1:500, nb_row, replace = TRUE), + "mat_4" = sample(1:500, nb_row, replace = TRUE)) + +mat <- cbind(chromosome, index1, index2, m) + +normalizeCount(mat) +} diff --git a/tests/testthat/test-HiC2Tree.R b/tests/testthat/test-HiC2Tree.R index 10f81d8..f72c9b8 100644 --- a/tests/testthat/test-HiC2Tree.R +++ b/tests/testthat/test-HiC2Tree.R @@ -1,4 +1,4 @@ -dd <- "path" +dd <- "../../data" replicat <- 1:3 cond <- c("90", "110") @@ -54,58 +54,56 @@ test_that("Test errors", { files_test <- NULL # Test if `files` and `format` is correctly filled in - expect_error(HiC2Tree(files_test, format, binsize, index, chromosomes), - "`files` or `format` is incorrectly filled in.") + expect_error(HiC2Tree(files_test, format, binsize, index, chromosomes, + replicates),"`files`, `chromosomes` or `replicates` is incorrectly filled in.") binsize_test <- "test" # Test if `binzise` is a numeric vector - expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes), - "`binsize` is not a numeric vector.") + expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes, + replicates), "`binsize` is not a numeric vector.") format_test <- format format_test[4] <- "test" # Test if `format` is a correctly filled in - expect_error(HiC2Tree(files, format_test, binsize, index, chromosomes), - "The `format` vector is incorrectly filled in.") + expect_error(HiC2Tree(files, format_test, binsize, index, chromosomes, + replicates), "The `format` vector is incorrectly filled in.") index_test <- NULL # Test if `index` is not empty - expect_error(HiC2Tree(files, format, binsize, index_test, chromosomes), - "`index` is empty.") + expect_error(HiC2Tree(files, format, binsize, index_test, chromosomes, + replicates), "`index` is empty.") binsize_test <- NULL # Test if `binsize` is not empty - expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes), - "`binsize` is empty.") + expect_error(HiC2Tree(files, format, binsize_test, index, chromosomes, + replicates), "`binsize` is empty.") }) data <- HiCDOCDataSet(files, format, binsize, chromosomes, index) -norm_count <- lapply(data$interactionMat, normalizeCount) -resTree <- lapply(norm_count, clusterTree) - -trees1_1 <- lapply(seq_along(resTree[[1]]), function(i){ - list(resTree[[1]][[i]]$mat_1, resTree[[1]][[i]]$mat_2, resTree[[1]][[i]]$mat_3) -}) -trees1_2 <- lapply(seq_along(resTree[[2]]),function(i){ - list(resTree[[2]][[i]]$mat_7, resTree[[2]][[i]]$mat_8, resTree[[2]][[i]]$mat_9) -}) -trees1 <- unlist(c(trees1_1, trees1_2), recursive = FALSE) - -trees2_1 <- lapply(seq_along(resTree[[1]]), function(i){ - list(resTree[[1]][[i]]$mat_4, resTree[[1]][[i]]$mat_5, resTree[[1]][[i]]$mat_6) -}) -trees2_2 <- lapply(seq_along(resTree[[2]]),function(i){ - list(resTree[[2]][[i]]$mat_10, resTree[[2]][[i]]$mat_11, resTree[[2]][[i]]$mat_12) -}) -trees2 <- unlist(c(trees2_1, trees2_2), recursive = FALSE) +norm_count <- normalizeCount(data$HiCDOCDataSet) +resTree <- clusterTree(norm_count) +tail(resTree$metadata) + +trees1 <- resTree$trees[which(resTree$metadata$mat == "mat_1" | + resTree$metadata$mat == "mat_2" | + resTree$metadata$mat == "mat_3" | + resTree$metadata$mat == "mat_7" | + resTree$metadata$mat == "mat_8" | + resTree$metadata$mat == "mat_9")] +trees2 <- resTree$trees[which(resTree$metadata$mat == "mat_4" | + resTree$metadata$mat == "mat_5" | + resTree$metadata$mat == "mat_6" | + resTree$metadata$mat == "mat_10" | + resTree$metadata$mat == "mat_11" | + resTree$metadata$mat == "mat_12")] resTest <- treediff(trees1, trees2, replicates) -test_that("'HiCDOCDataSet' works for simple cases", { +test_that("Comparison HiC2Tree and sub-functions works for simple cases", { # Check the output object has the expected names - expect_named(data, c("interactionMat", "indexData", "index_mat_chr")) + expect_named(data, c("HiCDOCDataSet", "indexData", "index_mat_chr")) # Check that the column names of the `norm_count` data frame are as expected expect_equal(colnames(norm_count[[1]]), c("chromosome", "index1", "index2", @@ -119,7 +117,7 @@ test_that("'HiCDOCDataSet' works for simple cases", { # Check that the `mat_1` element of the first element of `resTree` is an # "hclust" object - expect_is(resTree[[1]][[1]]$mat_1, "hclust") + expect_is(resTree$trees$tree1, "hclust") # Check that `resTest` is of class "treeTest" expect_s3_class(resTest, "treeTest") @@ -131,3 +129,60 @@ test_that("'HiCDOCDataSet' works for simple cases", { # rows in `res$metadata` expect_length(c(trees1, trees2), nrow(res$metadata)) }) + +test_that("HiCDOCDataSet function", { + + # Check the output object has the expected names + expect_named(data, c("HiCDOCDataSet", "indexData", "index_mat_chr")) + + # Check the output object has the expected class + expect_s3_class(data$HiCDOCDataSet, "HiCDOCDataSetList") + expect_s3_class(data$indexData, "data.frame") + expect_s3_class(data$index_mat_chr, "data.frame") +}) + +test_that("normalizeCount function", { + + # Check the output object has the expected class + expect_type(norm_count, "list") + expect_length(norm_count, length(chromosomes)) +}) + +test_that("clusterTree function", { + + # Check the output object has the expected class + expect_type(resTree, "list") + + # Check the output object has the expected names + expect_named(resTree, c("trees", "metadata")) + expect_length(resTree$trees, nrow(resTree$metadata)) +}) + +test_that("create_cluster function", { + + res <- create_cluster(norm_count[[1]]) + # Check the output object has the expected class + expect_type(res, "list") + + cluster <- unique(res) + expect_true(length(cluster$merged_clust) > 1) +}) + +test_that("create_trees function", { + + cluster <- create_cluster(norm_count[[1]]) + + # Get the indices of the rows and columns in the cluster + selected <- rownames(cluster)[cluster == 1] + + # Select the sub-matrix + red_mat <- ((norm_count[[1]]$index1 %in% selected) & (norm_count[[1]]$index2 %in% selected)) + red_mat <- norm_count[[1]][red_mat, ] + + res <- create_trees(red_mat, selected) + # Check the output object has the expected class + expect_type(res, "list") + expect_s3_class(res$mat_1, "hclust") + + expect_length(selected, length(res$mat_1$order)) +}) -- GitLab