Skip to content
Snippets Groups Projects
Commit 38351d31 authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

normalization corrections

parent 078fd711
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,7 @@
#' @importFrom data.table as.data.table
#' @importFrom data.table setnames
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom SummarizedExperiment assay
#' @importFrom future.apply future_lapply
#' @importFrom HiCDOC HiCDOCDataSetFromTabular
#' @importFrom HiCDOC HiCDOCDataSetFromCool
......@@ -197,12 +198,12 @@ InteractionDataSet <- function(file, format, binsize = NULL, chromosome,
indexData <- as.data.frame(indexData)
# For each chromosome, extract the interaction counts
assayChromosome <- SummarizedExperiment::assay(HiCDOCSet)
assayChromosome <- assay(HiCDOCSet)
assayChromosome <- as.data.table(assayChromosome)
data.table::setnames(assayChromosome,paste(HiCDOCSet$condition,
data.table::setnames(assayChromosome, paste(HiCDOCSet$condition,
HiCDOCSet$replicate, sep = "_"))
interactionsChromosome <- InteractionSet::interactions(HiCDOCSet)
interactionsChromosome <- interactions(HiCDOCSet)
interactionsChromosome <- as.data.table(interactionsChromosome)
interaction_mat <- cbind(interactionsChromosome[, .(
chromosome = chromosome, index1, index2 )], assayChromosome)
......@@ -233,7 +234,8 @@ create_cluster <- function(res) {
res$bindex1 <- match(res$index1, all_bins)
res$bindex2 <- match(res$index2, all_bins)
merged_mat <- rowSums(res[ ,-c(1:3)])
nb_col <- length(res)
merged_mat <- rowSums(res[ ,-c(1:3, nb_col -1, nb_col)])
cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins))
cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat
cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat
......@@ -243,6 +245,7 @@ create_cluster <- function(res) {
# perform constrained hierarchical clustering
merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1)
merged_res$labels <- as.character(all_bins)
# select the number of clusters with broken stick method
......@@ -285,11 +288,17 @@ normalize_count <- function(count_matrice, index_mat_chr){
cur_dge <- SummarizedExperiment(list(counts = counts))
cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)])
lib.sizes <- cur_dge$totals
# Normalize the counts
offsets <- csaw::normOffsets(cur_dge, se.out = FALSE)
offsets <- normOffsets(cur_dge, se.out = FALSE)
offsets <- offsets - mean(log(lib.sizes))
counts <- counts / exp(offsets)
count_matrice <- as.data.frame(count_matrice)
count_matrice[ ,-c(1:3)] <- data.frame(counts)
return(count_matrice)
}
......@@ -22,10 +22,10 @@ included in the analysis.}
\value{
A list contain:
\itemize{
\item{trees}{list of all trees}
\item{metadata}{data frame which contain: files, chromosome, cluster of
\item{trees}{ list of all trees}
\item{metadata}{ data frame which contain: files, chromosome, cluster of
each trees}
\item{indexData}{data table which contain correspondence of each bin in
\item{indexData}{ data table which contain correspondence of each bin in
the genome}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment