Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
.mergeInteractionSet <- function(interactionSet1, interactionSet2, fill = NA) {
unionInteractions <- GenomicRanges::union(
InteractionSet::interactions(interactionSet1),
InteractionSet::interactions(interactionSet2)
)
# Complete InteractionSets
interactionSet1 <- .fillInteractionSet(
interactionSet1,
unionInteractions,
fill
)
interactionSet2 <- .fillInteractionSet(
interactionSet2,
unionInteractions,
fill
)
# Merge
newiset <- BiocGenerics::cbind(interactionSet1, interactionSet2)
return(newiset)
}
.fillInteractionSet <- function(
interactionSet,
interactionSetUnion,
fill = NA
) {
over <- GenomicRanges::match(interactionSet, interactionSetUnion)
totalColumns <- ncol(interactionSet)
newAssays <- matrix(
rep(fill, length(interactionSetUnion) * totalColumns),
ncol = totalColumns
)
newAssays[over, ] <- SummarizedExperiment::assay(interactionSet)
return(
InteractionSet::InteractionSet(
newAssays,
interactionSetUnion,
colData = SummarizedExperiment::colData(interactionSet)
)
)
}
.fillHiCDOCDataSet <- function(object) {
# Reduce the levels in interaction part
object <- InteractionSet::reduceRegions(object)
objectRegions <- InteractionSet::regions(object)
chromosomeNames <- unique(as.character(
GenomeInfoDb::seqnames(objectRegions)
))
chromosomeNames <- gtools::mixedsort(chromosomeNames)
GenomeInfoDb::seqlevels(
InteractionSet::regions(object),
pruning.mode = "coarse"
) <- chromosomeNames
# Add chromosome column for split purpose
chromosomes <-
GenomeInfoDb::seqnames(InteractionSet::anchors(object, "first"))
chromosomes <- S4Vectors::Rle(factor(chromosomes, levels = chromosomeNames))
S4Vectors::mcols(object) <- S4Vectors::DataFrame("chromosome" = chromosomes)
# Sorting interactions and assay
ids <- InteractionSet::anchors(object, id = TRUE)
neworder <- order(chromosomes, ids$first, ids$second)
object <- object[neworder, ]
# Fill all other slots than interactionSet part
# Chromosomes and their size (max bin)
object@chromosomes <- chromosomeNames
object@totalBins <- .determineChromosomeSizes(object)
object@parameters <- defaultHiCDOCParameters
# Valid conditions and replicates by chromosome (==not empty)
# maybe do a function for valid conditions and replicates ?
valids <- .determineValids(object)
object@validAssay <- valids
# Weakbins
object@weakBins <- vector("list", length(object@chromosomes))
names(object@weakBins) <- object@chromosomes
return(object)
}
.determineChromosomeSizes <- function(object) {
tabChromosomes <- as.data.table(InteractionSet::regions(object))
tabChromosomes[, minIndex := min(index), by = .(seqnames)]
# minStart to correct chromosomes not starting at the 0 position
tabChromosomes[
index == minIndex,
minStart := round(start / width),
by = .(seqnames)
]
# Comuting chromosome entire size
tabChromosomes <- tabChromosomes[
,
.(binSize = max(index) - min(index) + 1 + max(minStart, na.rm = TRUE)),
by = .(seqnames)
]
totalBins <- tabChromosomes$binSize
names(totalBins) <- tabChromosomes$seqnames
return(totalBins)
}
.determineValids <- function(object) {
valids <- S4Vectors::split(
SummarizedExperiment::assay(object),
S4Vectors::mcols(object)$chromosome,
drop = FALSE
)
valids <- lapply(valids, function(x)
apply(x, 2, stats::var, na.rm=TRUE))
valids <- lapply(valids, function(x) which(x>0 & !is.na(x)))
return(valids)
}
defaultHiCDOCParameters <- list(
smallChromosomeThreshold = 100,
sparseReplicateThreshold = 0.3,
weakPositionThreshold = 1,
loessSampleSize = 20000,
kMeansDelta = 0.0001,
kMeansIterations = 50,
kMeansRestarts = 20,
PC1CheckThreshold = 0.75
)