diff --git a/R/HiC2Tree.R b/R/HiC2Tree.R index 1cab28eb9858f655c872b08001ce949ffe33c24f..58d2723a28a20adeed9a4b817b673ab65addbae4 100644 --- a/R/HiC2Tree.R +++ b/R/HiC2Tree.R @@ -26,6 +26,7 @@ #' @importFrom data.table as.data.table #' @importFrom data.table setnames #' @importFrom SummarizedExperiment SummarizedExperiment +#' @importFrom SummarizedExperiment assay #' @importFrom future.apply future_lapply #' @importFrom HiCDOC HiCDOCDataSetFromTabular #' @importFrom HiCDOC HiCDOCDataSetFromCool @@ -197,12 +198,12 @@ InteractionDataSet <- function(file, format, binsize = NULL, chromosome, indexData <- as.data.frame(indexData) # For each chromosome, extract the interaction counts - assayChromosome <- SummarizedExperiment::assay(HiCDOCSet) + assayChromosome <- assay(HiCDOCSet) assayChromosome <- as.data.table(assayChromosome) - data.table::setnames(assayChromosome,paste(HiCDOCSet$condition, + data.table::setnames(assayChromosome, paste(HiCDOCSet$condition, HiCDOCSet$replicate, sep = "_")) - interactionsChromosome <- InteractionSet::interactions(HiCDOCSet) + interactionsChromosome <- interactions(HiCDOCSet) interactionsChromosome <- as.data.table(interactionsChromosome) interaction_mat <- cbind(interactionsChromosome[, .( chromosome = chromosome, index1, index2 )], assayChromosome) @@ -233,7 +234,8 @@ create_cluster <- function(res) { res$bindex1 <- match(res$index1, all_bins) res$bindex2 <- match(res$index2, all_bins) - merged_mat <- rowSums(res[ ,-c(1:3)]) + nb_col <- length(res) + merged_mat <- rowSums(res[ ,-c(1:3, nb_col -1, nb_col)]) cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins)) cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat @@ -243,6 +245,7 @@ create_cluster <- function(res) { # perform constrained hierarchical clustering merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1) + merged_res$labels <- as.character(all_bins) # select the number of clusters with broken stick method @@ -285,11 +288,17 @@ normalize_count <- function(count_matrice, index_mat_chr){ cur_dge <- SummarizedExperiment(list(counts = counts)) cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)]) + lib.sizes <- cur_dge$totals + # Normalize the counts - offsets <- csaw::normOffsets(cur_dge, se.out = FALSE) + offsets <- normOffsets(cur_dge, se.out = FALSE) + + offsets <- offsets - mean(log(lib.sizes)) + counts <- counts / exp(offsets) count_matrice <- as.data.frame(count_matrice) count_matrice[ ,-c(1:3)] <- data.frame(counts) return(count_matrice) } + diff --git a/man/HiC2Tree.Rd b/man/HiC2Tree.Rd index 026112695ad8569feabe8e1e8bdad4e193b36b37..a4ccd2a59aec29ea8d6fdf94bff476e5910e4ec3 100644 --- a/man/HiC2Tree.Rd +++ b/man/HiC2Tree.Rd @@ -22,10 +22,10 @@ included in the analysis.} \value{ A list contain: \itemize{ - \item{trees}{list of all trees} - \item{metadata}{data frame which contain: files, chromosome, cluster of + \item{trees}{ list of all trees} + \item{metadata}{ data frame which contain: files, chromosome, cluster of each trees} - \item{indexData}{data table which contain correspondence of each bin in + \item{indexData}{ data table which contain correspondence of each bin in the genome} } }