diff --git a/R/helpers.R b/R/helpers.R deleted file mode 100644 index 967163469bcd5b1aa6bf8c098122537c4aa0357c..0000000000000000000000000000000000000000 --- a/R/helpers.R +++ /dev/null @@ -1,127 +0,0 @@ -.mergeInteractionSet <- function(interactionSet1, interactionSet2, fill = NA) { - unionInteractions <- GenomicRanges::union( - InteractionSet::interactions(interactionSet1), - InteractionSet::interactions(interactionSet2) - ) - # Complete InteractionSets - interactionSet1 <- .fillInteractionSet( - interactionSet1, - unionInteractions, - fill - ) - interactionSet2 <- .fillInteractionSet( - interactionSet2, - unionInteractions, - fill - ) - - # Merge - newiset <- BiocGenerics::cbind(interactionSet1, interactionSet2) - return(newiset) -} - -.fillInteractionSet <- function( - interactionSet, - interactionSetUnion, - fill = NA -) { - over <- GenomicRanges::match(interactionSet, interactionSetUnion) - totalColumns <- ncol(interactionSet) - newAssays <- matrix( - rep(fill, length(interactionSetUnion) * totalColumns), - ncol = totalColumns - ) - newAssays[over, ] <- SummarizedExperiment::assay(interactionSet) - return( - InteractionSet::InteractionSet( - newAssays, - interactionSetUnion, - colData = SummarizedExperiment::colData(interactionSet) - ) - ) -} - -.fillHiCDOCDataSet <- function(object) { - # Reduce the levels in interaction part - object <- InteractionSet::reduceRegions(object) - objectRegions <- InteractionSet::regions(object) - chromosomeNames <- unique(as.character( - GenomeInfoDb::seqnames(objectRegions) - )) - chromosomeNames <- gtools::mixedsort(chromosomeNames) - GenomeInfoDb::seqlevels( - InteractionSet::regions(object), - pruning.mode = "coarse" - ) <- chromosomeNames - - # Add chromosome column for split purpose - chromosomes <- - GenomeInfoDb::seqnames(InteractionSet::anchors(object, "first")) - chromosomes <- S4Vectors::Rle(factor(chromosomes, levels = chromosomeNames)) - S4Vectors::mcols(object) <- S4Vectors::DataFrame("chromosome" = chromosomes) - - # Sorting interactions and assay - ids <- InteractionSet::anchors(object, id = TRUE) - neworder <- order(chromosomes, ids$first, ids$second) - object <- object[neworder, ] - - # Fill all other slots than interactionSet part - # Chromosomes and their size (max bin) - object@chromosomes <- chromosomeNames - object@totalBins <- .determineChromosomeSizes(object) - object@parameters <- defaultHiCDOCParameters - - # Valid conditions and replicates by chromosome (==not empty) - # maybe do a function for valid conditions and replicates ? - valids <- .determineValids(object) - object@validAssay <- valids - - # Weakbins - object@weakBins <- vector("list", length(object@chromosomes)) - names(object@weakBins) <- object@chromosomes - - return(object) -} - -.determineChromosomeSizes <- function(object) { - tabChromosomes <- as.data.table(InteractionSet::regions(object)) - tabChromosomes[, minIndex := min(index), by = .(seqnames)] - # minStart to correct chromosomes not starting at the 0 position - tabChromosomes[ - index == minIndex, - minStart := round(start / width), - by = .(seqnames) - ] - # Comuting chromosome entire size - tabChromosomes <- tabChromosomes[ - , - .(binSize = max(index) - min(index) + 1 + max(minStart, na.rm = TRUE)), - by = .(seqnames) - ] - totalBins <- tabChromosomes$binSize - names(totalBins) <- tabChromosomes$seqnames - return(totalBins) -} - -.determineValids <- function(object) { - valids <- S4Vectors::split( - SummarizedExperiment::assay(object), - S4Vectors::mcols(object)$chromosome, - drop = FALSE - ) - valids <- lapply(valids, function(x) - apply(x, 2, stats::var, na.rm=TRUE)) - valids <- lapply(valids, function(x) which(x>0 & !is.na(x))) - return(valids) -} - -defaultHiCDOCParameters <- list( - smallChromosomeThreshold = 100, - sparseReplicateThreshold = 0.3, - weakPositionThreshold = 1, - loessSampleSize = 20000, - kMeansDelta = 0.0001, - kMeansIterations = 50, - kMeansRestarts = 20, - PC1CheckThreshold = 0.75 -)