Skip to content
Snippets Groups Projects
Commit 9721fe35 authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

change interactionset fonction

parent 4904522d
No related branches found
No related tags found
No related merge requests found
HiC2Tree <- function(file, type, binsize, index = NULL,
chromosomes) {
InteractionDataSet <- InteractionDataSet(file, type, binsize, chromosome, index)
HiC2Tree <- function(files, format, binsize, index = NULL,
chromosomes){
# create an InteractionDataSet object
InteractionDataSet <- InteractionDataSet(files, format, binsize, chromosomes,
index)
# Extract the InteractionDataSet and index_mat_chr variables
res <- InteractionDataSet$InteractionDataSet
index_mat_chr <- InteractionDataSet$index_mat_chr
# Perform MA normalization with cyclic loess
norm_matrices <- lapply(res, FUN = normalize_count)
# Find common clustering
# Normalize the count data using cyclic loess
norm_matrices <- normalize_count(res, index_mat_chr)
# Create cluster for each chromosome
cur_clust <- lapply(norm_matrices, FUN = create_cluster)
all_trees <- lapply(seq_along(cur_clust), function(i){
all_clusters <- unique(cur_clust[[i]][, 1])
# Create a list of trees
all_trees <- future_lapply(seq_along(cur_clust), function(i){
# List of all clusters
all_clusters <- unique(cur_clust[[i]][, 1])
# Create a tree for each cluster
trees <- sapply(all_clusters, function(ac) {
cat("cluster", ac, ":\n")
#### for each cluster, create a dendrogram for each sample
# Get the indices of the rows and columns in the cluster
selected <- rownames(cur_clust[[i]])[cur_clust[[i]] == ac]
# Select the sub-matrix
red_mat <- ((norm_matrices[[i]]$index1 %in% selected) &
(norm_matrices[[i]]$index2 %in% selected))
red_mat <- norm_matrices[[i]][red_mat, ]
# Create trees
trees <- create_trees(red_mat, selected)
return(trees)
}, simplify = FALSE)
}, simplify = FALSE)
})
# Count the number of clusters for each chromosome
nb_cluster <- sapply(all_trees, length)
# Count the number of matrices for each chromosome
nb_mat <- sapply(chromosomes, function(chr){
nb_mat <- sum(index_mat_chr$chromosome == chr)
return(nb_mat)
})
# Create a vector of chromosomes for each tree
met_chr <- rep(chromosomes, nb_cluster * nb_mat)
# Create a vector of cluster numbers for each tree
met_cluster <- sapply(seq_along(chromosomes), function(i){
met_cluster <-rep(seq(nb_cluster[i]), each = nb_mat[i])
return(met_cluster)
})
all_trees <- unlist(all_trees, recursive = FALSE)
return(list("all_trees" = all_trees, "metadata" = InteractionDataSet$indexData))
# Create a vector of file names for each tree
files <- sapply(seq_along(chromosomes), function(i){
files <- rep(files[which(index_mat_chr$chromosome == chromosomes[i])],
nb_cluster[i])
return(files)
})
all_trees <- unlist(unlist(all_trees, recursive = FALSE), recursive = FALSE)
# Give each tree a name
names <- sapply(seq_along(all_trees), function(i){
names <- paste0("tree", i)
return(names)
})
names(all_trees) <- names
# Add metadata to the trees
metadata <- data.frame("names" = names, "chromosome" = met_chr,
"cluster" = unlist(met_cluster),
"file" = unlist(files))
# Return a list containing the trees, metadata, and indexData
return(list("trees" = all_trees, "metadata" = metadata,
"index" = InteractionDataSet$indexData))
}
InteractionDataSet <- function(file, type, binsize, chromosome, index = NULL) {
InteractionDataSet <- function(file, format, binsize, chromosome, index = NULL){
# Set the conditions to "mat"
conditions <- "mat"
# Create a list of HiCDOCDataSet objects
HiCDOCDataSet <- sapply(seq_along(file), function(i){
if (type[i] == "tabular") {
HiCDOCDataSet <- HiCDOCDataSetFromTabular(file[i], i, conditions,
if (format[i] == "tabular") {
HiCDOCDataSet <- HiCDOCDataSetFromTabular(file[i], i, conditions,
binsize)
} else if (type[i] == "cooler") {
HiCDOCDataSet <- HiCDOCDataSetFromCool(file[i], i, conditions,
} else if (format[i] == "cooler") {
HiCDOCDataSet <- HiCDOCDataSetFromCool(file[i], i, conditions,
binsize)
} else if (type[i] == "juicer") {
HiCDOCDataSet <- HiCDOCDataSetFromHiC(file[i], i, conditions,
} else if (format[i] == "juicer") {
HiCDOCDataSet <- HiCDOCDataSetFromHiC(file[i], i, conditions,
binsize)
} else if (type[i] == "HiC-Pro") {
HiCDOCDataSet <- HiCDOCDataSetFromHiCPro(file[i], bedPaths = index, i,
} else if (format[i] == "HiC-Pro") {
HiCDOCDataSet <- HiCDOCDataSetFromHiCPro(file[i], bedPaths = index, i,
conditions)
}
return(HiCDOCDataSet)
})
InteractionData <- lapply(HiCDOCDataSet, function(data){
interactionSet <- data@interactions
return(interactionSet)
})
indexData <- sapply(HiCDOCDataSet, function(data){
# Create a matrix of chromosomes and matrix
index_mat_chr <- sapply(HiCDOCDataSet, function(data){
chromosome <- data@chromosomes
variables <- paste(data@colData$condition, data@colData$replicate, sep = "_")
variables <- paste(data@colData$condition, data@colData$replicate,
sep = "_")
return(list("chromosome" = chromosome, "variables" = variables ))
})
indexData <- as.data.frame(t(indexData))
fill <- NA
InteractionDataSet <- lapply(chromosome, function(chr){
mat <- indexData$chromosome == chr
unionInteractions <- Reduce(union, InteractionData[mat])
InteractionSet <- lapply(which(mat), function(i){
over <- match(HiCDOCDataSet[[i]], unionInteractions)
totalColumns <- ncol(HiCDOCDataSet[[i]])
newAssays <- matrix(
rep(fill, length(unionInteractions) * totalColumns),
ncol = totalColumns
)
newAssays[over, ] <- SummarizedExperiment::assay(HiCDOCDataSet[[i]])
InteractionSet <- InteractionSet::InteractionSet(
newAssays,
unionInteractions,
colData = colData(HiCDOCDataSet[[i]]))
return(InteractionSet)
})
assayChromosome <- lapply(InteractionSet, function(data){
assayChromosome <- SummarizedExperiment::assay(data)
assayChromosome <- as.data.table(assayChromosome)
setnames(assayChromosome, paste(data$condition,
data$replicate, sep = "_"))
return(assayChromosome)
})
assayChromosome <- Reduce(cbind, assayChromosome)
unionInteractions <- as.data.table(unionInteractions)
dataplot <- cbind(unionInteractions[, .(chromosome = chromosome, index1,
index2)], assayChromosome)
dataplot[is.na(dataplot)] <- 0
return(dataplot)
index_mat_chr <- as.data.frame(t(index_mat_chr))
index_mat_chr$chromosome <- unlist(index_mat_chr$chromosome)
index_mat_chr$variables <- unlist(index_mat_chr$variables)
# Merge all the InteractionSet objects
suppressWarnings(mergedinteractionSet <- Reduce(
f = HiCDOC:::.mergeInteractionSet,
x = HiCDOCDataSet
))
HiCDOCSet <- new("HiCDOCDataSet", mergedinteractionSet)
HiCDOCSet <- HiCDOC:::.fillHiCDOCDataSet(HiCDOCSet)
# Extract the regions from the HiCDOCSet object
indexData <- HiCDOCSet@interactions@regions
indexData <- as.data.frame(indexData)
# For each chromosome, extract the interaction counts
all_mat_count <- future_lapply(chromosome, function(chr){
rowsChromosome <- (S4Vectors::mcols(HiCDOCSet)$chromosome == chr)
assayChromosome <- SummarizedExperiment::assay(HiCDOCSet[rowsChromosome, ])
assayChromosome <- as.data.table(assayChromosome)
data.table::setnames(
assayChromosome,
paste(HiCDOCSet$condition, HiCDOCSet$replicate, sep = "_")
)
interactionsChromosome <- InteractionSet::interactions(
HiCDOCSet[rowsChromosome, ]
)
interactionsChromosome <- as.data.table(interactionsChromosome)
interaction_mat <- cbind( interactionsChromosome[, .(
chromosome = chromosome, index1, index2 )], assayChromosome)
return(interaction_mat)
})
return(list("InteractionDataSet" = InteractionDataSet, "indexData" = indexData))
mat_count <- Reduce(rbind, all_mat_count)
mat_count[is.na(mat_count)] <- 0
return(list("InteractionDataSet" = mat_count, "indexData" = indexData,
"index_mat_chr" = index_mat_chr))
}
create_cluster <- function(res) {
# extract all unique bin
all_bins <- unique(c(res$index1, res$index2))
# create index mapping for the bins
res$bindex1 <- match(res$index1, all_bins)
res$bindex2 <- match(res$index2, all_bins)
# create a merged squared similarity matrix with sum of normalized counts
merged_mat <- rowSums(res[ ,-c(1:3)])
cur_mat <- matrix(0, ncol = length(all_bins), nrow = length(all_bins))
cur_mat[cbind(res$bindex1, res$bindex2)] <- merged_mat
cur_mat[cbind(res$bindex2, res$bindex1)] <- merged_mat
# log transformation
cur_mat <- log(cur_mat + 1)
# perform constrained HAC (and use bin identifiers as labels)
# perform constrained hierarchical clustering
merged_res <- adjClust(cur_mat, type = "similarity", h = length(all_bins) - 1)
merged_res$labels <- as.character(all_bins)
# select the number of clusters with broken stick
# select the number of clusters with broken stick method
merged_clust <- select(merged_res, type = "bstick")
names(merged_clust) <- all_bins
return(data.frame(merged_clust))
}
create_trees <- function(red_mat, selected){
# match bin indices in the reduced similarity matrix to selected indices
match1 <- match(red_mat$index1, selected)
match2 <- match(red_mat$index2, selected)
match2 <- match(red_mat$index2, selected)
# perform hierarchical clustering for each column
adjclust_out <- sapply(red_mat[, -c(1:3)], function(arep) {
##### create a merged squared similarity matrix with normalized counts
mat_crep <- matrix(0, ncol = length(selected), nrow = length(selected))
mat_crep[cbind(match1, match2)] <- arep
mat_crep[cbind(match2, match1)] <- arep
# perform hierarchical clustering on the similarity matrix
out <- adjClust(log(mat_crep + 1), type = "similarity")
class(out) <- "hclust"
return(out)
}, simplify = FALSE)
return(adjclust_out)
}
normalize_count <- function(count_matrice) {
counts <- as.matrix(count_matrice[ ,-c(1:3)])
colnames(counts) <- NULL
cur_dge <- SummarizedExperiment(list(counts = counts))
cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)])
offsets <- csaw::normOffsets(cur_dge, se.out = FALSE)
rm(cur_dge)
gc(verbose = FALSE)
counts <- counts / exp(offsets)
count_matrice <- as.data.frame(count_matrice)
count_matrice[ ,-c(1:3)] <- data.frame(counts)
rm(counts)
gc(verbose = FALSE)
return(count_matrice)
normalize_count <- function(count_matrice, index_mat_chr){
# Get all unique chromosomes in the count matrix
chromosome <- unique(count_matrice$chromosome)
all_count_matrice <- future_lapply(chromosome, function(chr){
# Subset the count matrix
count_matrice <- count_matrice[which(count_matrice$chromosome == chr), ]
# Subset the index matrix
col <- index_mat_chr$variables[which(index_mat_chr$chromosome == chr)]
# Create a new count matrix
deb <- count_matrice[, c(1:3)]
end <- subset(count_matrice, select = col)
count_matrice <- cbind(deb, end)
# Extract the count values as a matrix
counts <- as.matrix(count_matrice[ ,-c(1:3)])
colnames(counts) <- NULL
# Compute the total number of reads for each sample
cur_dge <- SummarizedExperiment(list(counts = counts))
cur_dge$totals <- colSums(count_matrice[ ,-c(1:3)])
# Normalize the counts
offsets <- csaw::normOffsets(cur_dge, se.out = FALSE)
counts <- counts / exp(offsets)
count_matrice <- as.data.frame(count_matrice)
count_matrice[ ,-c(1:3)] <- data.frame(counts)
return(count_matrice)
})
# Return the list of normalized count matrices
return(all_count_matrice)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment