Skip to content
Snippets Groups Projects
Commit 20073c40 authored by CARDENAS GWENDAELLE's avatar CARDENAS GWENDAELLE
Browse files

Delete helpers.R

parent 41e4c8ff
No related branches found
No related tags found
No related merge requests found
.mergeInteractionSet <- function(interactionSet1, interactionSet2, fill = NA) {
unionInteractions <- GenomicRanges::union(
InteractionSet::interactions(interactionSet1),
InteractionSet::interactions(interactionSet2)
)
# Complete InteractionSets
interactionSet1 <- .fillInteractionSet(
interactionSet1,
unionInteractions,
fill
)
interactionSet2 <- .fillInteractionSet(
interactionSet2,
unionInteractions,
fill
)
# Merge
newiset <- BiocGenerics::cbind(interactionSet1, interactionSet2)
return(newiset)
}
.fillInteractionSet <- function(
interactionSet,
interactionSetUnion,
fill = NA
) {
over <- GenomicRanges::match(interactionSet, interactionSetUnion)
totalColumns <- ncol(interactionSet)
newAssays <- matrix(
rep(fill, length(interactionSetUnion) * totalColumns),
ncol = totalColumns
)
newAssays[over, ] <- SummarizedExperiment::assay(interactionSet)
return(
InteractionSet::InteractionSet(
newAssays,
interactionSetUnion,
colData = SummarizedExperiment::colData(interactionSet)
)
)
}
.fillHiCDOCDataSet <- function(object) {
# Reduce the levels in interaction part
object <- InteractionSet::reduceRegions(object)
objectRegions <- InteractionSet::regions(object)
chromosomeNames <- unique(as.character(
GenomeInfoDb::seqnames(objectRegions)
))
chromosomeNames <- gtools::mixedsort(chromosomeNames)
GenomeInfoDb::seqlevels(
InteractionSet::regions(object),
pruning.mode = "coarse"
) <- chromosomeNames
# Add chromosome column for split purpose
chromosomes <-
GenomeInfoDb::seqnames(InteractionSet::anchors(object, "first"))
chromosomes <- S4Vectors::Rle(factor(chromosomes, levels = chromosomeNames))
S4Vectors::mcols(object) <- S4Vectors::DataFrame("chromosome" = chromosomes)
# Sorting interactions and assay
ids <- InteractionSet::anchors(object, id = TRUE)
neworder <- order(chromosomes, ids$first, ids$second)
object <- object[neworder, ]
# Fill all other slots than interactionSet part
# Chromosomes and their size (max bin)
object@chromosomes <- chromosomeNames
object@totalBins <- .determineChromosomeSizes(object)
object@parameters <- defaultHiCDOCParameters
# Valid conditions and replicates by chromosome (==not empty)
# maybe do a function for valid conditions and replicates ?
valids <- .determineValids(object)
object@validAssay <- valids
# Weakbins
object@weakBins <- vector("list", length(object@chromosomes))
names(object@weakBins) <- object@chromosomes
return(object)
}
.determineChromosomeSizes <- function(object) {
tabChromosomes <- as.data.table(InteractionSet::regions(object))
tabChromosomes[, minIndex := min(index), by = .(seqnames)]
# minStart to correct chromosomes not starting at the 0 position
tabChromosomes[
index == minIndex,
minStart := round(start / width),
by = .(seqnames)
]
# Comuting chromosome entire size
tabChromosomes <- tabChromosomes[
,
.(binSize = max(index) - min(index) + 1 + max(minStart, na.rm = TRUE)),
by = .(seqnames)
]
totalBins <- tabChromosomes$binSize
names(totalBins) <- tabChromosomes$seqnames
return(totalBins)
}
.determineValids <- function(object) {
valids <- S4Vectors::split(
SummarizedExperiment::assay(object),
S4Vectors::mcols(object)$chromosome,
drop = FALSE
)
valids <- lapply(valids, function(x)
apply(x, 2, stats::var, na.rm=TRUE))
valids <- lapply(valids, function(x) which(x>0 & !is.na(x)))
return(valids)
}
defaultHiCDOCParameters <- list(
smallChromosomeThreshold = 100,
sparseReplicateThreshold = 0.3,
weakPositionThreshold = 1,
loessSampleSize = 20000,
kMeansDelta = 0.0001,
kMeansIterations = 50,
kMeansRestarts = 20,
PC1CheckThreshold = 0.75
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment