normalization corrections

38351d31 · CARDENAS GWENDAELLE · 078fd711 · 38351d31 · 38351d31
Commit 38351d31 authored 2 years ago by CARDENAS GWENDAELLE
--- a/R/HiC2Tree.R
+++ b/R/HiC2Tree.R
@@ -26,6 +26,7 @@
 #' @importFrom data.table as.data.table
 #' @importFrom data.table setnames
 #' @importFrom SummarizedExperiment SummarizedExperiment
+#' @importFrom SummarizedExperiment assay
 #' @importFrom future.apply future_lapply
 #' @importFrom HiCDOC HiCDOCDataSetFromTabular
 #' @importFrom HiCDOC HiCDOCDataSetFromCool
@@ -197,12 +198,12 @@ InteractionDataSet <- function(file, format, binsize = NULL, chromosome,
    indexData <- as.data.frame(indexData)
    # For each chromosome, extract the interaction counts
-    assayChromosome <- SummarizedExperiment::assay(HiCDOCSet)
+    assayChromosome <- assay(HiCDOCSet)
    assayChromosome <- as.data.table(assayChromosome)
-    data.table::setnames(assayChromosome,paste(HiCDOCSet$condition,
+    data.table::setnames(assayChromosome,   paste(HiCDOCSet$condition,
                                               HiCDOCSet$replicate, sep = "_"))
-    interactionsChromosome <- InteractionSet::interactions(HiCDOCSet)
+    interactionsChromosome <- interactions(HiCDOCSet)
    interactionsChromosome <- as.data.table(interactionsChromosome)
    interaction_mat <- cbind(interactionsChromosome[, .(
      chromosome = chromosome, index1, index2 )], assayChromosome)
@@ -233,7 +234,8 @@ create_cluster <- function(res) {
  res$bindex1 <- match(res$index1, all_bins)
  res$bindex2 <- match(res$index2, all_bins)
-  merged_mat <- rowSums(res[ ,-c(1:3)])
+  nb_col <- length(res)
+  merged_mat <- rowSums(res[ ,-c(1:3, nb_col -1, nb_col)])
  cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins))
  cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat
  cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat
@@ -243,6 +245,7 @@ create_cluster <- function(res) {
  # perform constrained hierarchical clustering
  merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1)
  merged_res$labels <- as.character(all_bins)
  # select the number of clusters with broken stick method
@@ -285,11 +288,17 @@ normalize_count <- function(count_matrice, index_mat_chr){
  cur_dge <- SummarizedExperiment(list(counts = counts))
  cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)])
+  lib.sizes <- cur_dge$totals
  # Normalize the counts
-  offsets <- csaw::normOffsets(cur_dge, se.out = FALSE)
+  offsets <- normOffsets(cur_dge, se.out = FALSE)
+  offsets <- offsets - mean(log(lib.sizes))
  counts <- counts / exp(offsets)
  count_matrice <- as.data.frame(count_matrice)
  count_matrice[ ,-c(1:3)] <- data.frame(counts)
  return(count_matrice)
 }
--- a/man/HiC2Tree.Rd
+++ b/man/HiC2Tree.Rd
@@ -22,10 +22,10 @@ included in the analysis.}
 \value{
 A list contain:
 \itemize{
-  \item{trees}{list of all trees}
+  \item{trees}{ list of all trees}
-  \item{metadata}{data frame which contain: files, chromosome, cluster of
+  \item{metadata}{ data frame which contain: files, chromosome, cluster of
  each trees}
-  \item{indexData}{data table which contain correspondence of each bin in
+  \item{indexData}{ data table which contain correspondence of each bin in
  the genome}
 }
 }