Skip to content
Snippets Groups Projects
Commit 38351d31 authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

normalization corrections

parent 078fd711
No related branches found
No related tags found
No related merge requests found
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#' @importFrom data.table as.data.table #' @importFrom data.table as.data.table
#' @importFrom data.table setnames #' @importFrom data.table setnames
#' @importFrom SummarizedExperiment SummarizedExperiment #' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom SummarizedExperiment assay
#' @importFrom future.apply future_lapply #' @importFrom future.apply future_lapply
#' @importFrom HiCDOC HiCDOCDataSetFromTabular #' @importFrom HiCDOC HiCDOCDataSetFromTabular
#' @importFrom HiCDOC HiCDOCDataSetFromCool #' @importFrom HiCDOC HiCDOCDataSetFromCool
...@@ -197,12 +198,12 @@ InteractionDataSet <- function(file, format, binsize = NULL, chromosome, ...@@ -197,12 +198,12 @@ InteractionDataSet <- function(file, format, binsize = NULL, chromosome,
indexData <- as.data.frame(indexData) indexData <- as.data.frame(indexData)
# For each chromosome, extract the interaction counts # For each chromosome, extract the interaction counts
assayChromosome <- SummarizedExperiment::assay(HiCDOCSet) assayChromosome <- assay(HiCDOCSet)
assayChromosome <- as.data.table(assayChromosome) assayChromosome <- as.data.table(assayChromosome)
data.table::setnames(assayChromosome,paste(HiCDOCSet$condition, data.table::setnames(assayChromosome, paste(HiCDOCSet$condition,
HiCDOCSet$replicate, sep = "_")) HiCDOCSet$replicate, sep = "_"))
interactionsChromosome <- InteractionSet::interactions(HiCDOCSet) interactionsChromosome <- interactions(HiCDOCSet)
interactionsChromosome <- as.data.table(interactionsChromosome) interactionsChromosome <- as.data.table(interactionsChromosome)
interaction_mat <- cbind(interactionsChromosome[, .( interaction_mat <- cbind(interactionsChromosome[, .(
chromosome = chromosome, index1, index2 )], assayChromosome) chromosome = chromosome, index1, index2 )], assayChromosome)
...@@ -233,7 +234,8 @@ create_cluster <- function(res) { ...@@ -233,7 +234,8 @@ create_cluster <- function(res) {
res$bindex1 <- match(res$index1, all_bins) res$bindex1 <- match(res$index1, all_bins)
res$bindex2 <- match(res$index2, all_bins) res$bindex2 <- match(res$index2, all_bins)
merged_mat <- rowSums(res[ ,-c(1:3)]) nb_col <- length(res)
merged_mat <- rowSums(res[ ,-c(1:3, nb_col -1, nb_col)])
cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins)) cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins))
cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat
cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat
...@@ -243,6 +245,7 @@ create_cluster <- function(res) { ...@@ -243,6 +245,7 @@ create_cluster <- function(res) {
# perform constrained hierarchical clustering # perform constrained hierarchical clustering
merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1) merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1)
merged_res$labels <- as.character(all_bins) merged_res$labels <- as.character(all_bins)
# select the number of clusters with broken stick method # select the number of clusters with broken stick method
...@@ -285,11 +288,17 @@ normalize_count <- function(count_matrice, index_mat_chr){ ...@@ -285,11 +288,17 @@ normalize_count <- function(count_matrice, index_mat_chr){
cur_dge <- SummarizedExperiment(list(counts = counts)) cur_dge <- SummarizedExperiment(list(counts = counts))
cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)]) cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)])
lib.sizes <- cur_dge$totals
# Normalize the counts # Normalize the counts
offsets <- csaw::normOffsets(cur_dge, se.out = FALSE) offsets <- normOffsets(cur_dge, se.out = FALSE)
offsets <- offsets - mean(log(lib.sizes))
counts <- counts / exp(offsets) counts <- counts / exp(offsets)
count_matrice <- as.data.frame(count_matrice) count_matrice <- as.data.frame(count_matrice)
count_matrice[ ,-c(1:3)] <- data.frame(counts) count_matrice[ ,-c(1:3)] <- data.frame(counts)
return(count_matrice) return(count_matrice)
} }
...@@ -22,10 +22,10 @@ included in the analysis.} ...@@ -22,10 +22,10 @@ included in the analysis.}
\value{ \value{
A list contain: A list contain:
\itemize{ \itemize{
\item{trees}{list of all trees} \item{trees}{ list of all trees}
\item{metadata}{data frame which contain: files, chromosome, cluster of \item{metadata}{ data frame which contain: files, chromosome, cluster of
each trees} each trees}
\item{indexData}{data table which contain correspondence of each bin in \item{indexData}{ data table which contain correspondence of each bin in
the genome} the genome}
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment