Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
treediff
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
A compter du 1er avril, attention à vos pipelines :
Nouvelles limitations de Docker Hub
Show more breadcrumbs
SCALES
treediff
Commits
9721fe35
Commit
9721fe35
authored
2 years ago
by
CARDENAS GWENDAELLE
Browse files
Options
Downloads
Patches
Plain Diff
change interactionset fonction
parent
4904522d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
R/HiC2Tree.R
+192
-134
192 additions, 134 deletions
R/HiC2Tree.R
with
192 additions
and
134 deletions
R/HiC2Tree.R
+
192
−
134
View file @
9721fe35
HiC2Tree
<-
function
(
file
,
type
,
binsize
,
index
=
NULL
,
chromosomes
)
{
InteractionDataSet
<-
InteractionDataSet
(
file
,
type
,
binsize
,
chromosome
,
index
)
HiC2Tree
<-
function
(
files
,
format
,
binsize
,
index
=
NULL
,
chromosomes
){
# create an InteractionDataSet object
InteractionDataSet
<-
InteractionDataSet
(
files
,
format
,
binsize
,
chromosomes
,
index
)
# Extract the InteractionDataSet and index_mat_chr variables
res
<-
InteractionDataSet
$
InteractionDataSet
index_mat_chr
<-
InteractionDataSet
$
index_mat_chr
#
Perform MA normalization with
cyclic loess
norm_matrices
<-
lapply
(
res
,
FUN
=
normalize_count
)
#
Find common clustering
#
Normalize the count data using
cyclic loess
norm_matrices
<-
normalize_count
(
res
,
index_mat_chr
)
#
Create cluster for each chromosome
cur_clust
<-
lapply
(
norm_matrices
,
FUN
=
create_cluster
)
all_trees
<-
lapply
(
seq_along
(
cur_clust
),
function
(
i
){
all_clusters
<-
unique
(
cur_clust
[[
i
]][,
1
])
# Create a list of trees
all_trees
<-
future_lapply
(
seq_along
(
cur_clust
),
function
(
i
){
# List of all clusters
all_clusters
<-
unique
(
cur_clust
[[
i
]][,
1
])
# Create a tree for each cluster
trees
<-
sapply
(
all_clusters
,
function
(
ac
)
{
cat
(
"cluster"
,
ac
,
":\n"
)
#### for each cluster, create a dendrogram for each sample
# Get the indices of the rows and columns in the cluster
selected
<-
rownames
(
cur_clust
[[
i
]])[
cur_clust
[[
i
]]
==
ac
]
# Select the sub-matrix
red_mat
<-
((
norm_matrices
[[
i
]]
$
index1
%in%
selected
)
&
(
norm_matrices
[[
i
]]
$
index2
%in%
selected
))
red_mat
<-
norm_matrices
[[
i
]][
red_mat
,
]
# Create trees
trees
<-
create_trees
(
red_mat
,
selected
)
return
(
trees
)
},
simplify
=
FALSE
)
},
simplify
=
FALSE
)
})
# Count the number of clusters for each chromosome
nb_cluster
<-
sapply
(
all_trees
,
length
)
# Count the number of matrices for each chromosome
nb_mat
<-
sapply
(
chromosomes
,
function
(
chr
){
nb_mat
<-
sum
(
index_mat_chr
$
chromosome
==
chr
)
return
(
nb_mat
)
})
# Create a vector of chromosomes for each tree
met_chr
<-
rep
(
chromosomes
,
nb_cluster
*
nb_mat
)
# Create a vector of cluster numbers for each tree
met_cluster
<-
sapply
(
seq_along
(
chromosomes
),
function
(
i
){
met_cluster
<-
rep
(
seq
(
nb_cluster
[
i
]),
each
=
nb_mat
[
i
])
return
(
met_cluster
)
})
all_trees
<-
unlist
(
all_trees
,
recursive
=
FALSE
)
return
(
list
(
"all_trees"
=
all_trees
,
"metadata"
=
InteractionDataSet
$
indexData
))
# Create a vector of file names for each tree
files
<-
sapply
(
seq_along
(
chromosomes
),
function
(
i
){
files
<-
rep
(
files
[
which
(
index_mat_chr
$
chromosome
==
chromosomes
[
i
])],
nb_cluster
[
i
])
return
(
files
)
})
all_trees
<-
unlist
(
unlist
(
all_trees
,
recursive
=
FALSE
),
recursive
=
FALSE
)
# Give each tree a name
names
<-
sapply
(
seq_along
(
all_trees
),
function
(
i
){
names
<-
paste0
(
"tree"
,
i
)
return
(
names
)
})
names
(
all_trees
)
<-
names
# Add metadata to the trees
metadata
<-
data.frame
(
"names"
=
names
,
"chromosome"
=
met_chr
,
"cluster"
=
unlist
(
met_cluster
),
"file"
=
unlist
(
files
))
# Return a list containing the trees, metadata, and indexData
return
(
list
(
"trees"
=
all_trees
,
"metadata"
=
metadata
,
"index"
=
InteractionDataSet
$
indexData
))
}
InteractionDataSet
<-
function
(
file
,
type
,
binsize
,
chromosome
,
index
=
NULL
)
{
InteractionDataSet
<-
function
(
file
,
format
,
binsize
,
chromosome
,
index
=
NULL
){
# Set the conditions to "mat"
conditions
<-
"mat"
# Create a list of HiCDOCDataSet objects
HiCDOCDataSet
<-
sapply
(
seq_along
(
file
),
function
(
i
){
if
(
type
[
i
]
==
"tabular"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromTabular
(
file
[
i
],
i
,
conditions
,
if
(
format
[
i
]
==
"tabular"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromTabular
(
file
[
i
],
i
,
conditions
,
binsize
)
}
else
if
(
type
[
i
]
==
"cooler"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromCool
(
file
[
i
],
i
,
conditions
,
}
else
if
(
format
[
i
]
==
"cooler"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromCool
(
file
[
i
],
i
,
conditions
,
binsize
)
}
else
if
(
type
[
i
]
==
"juicer"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromHiC
(
file
[
i
],
i
,
conditions
,
}
else
if
(
format
[
i
]
==
"juicer"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromHiC
(
file
[
i
],
i
,
conditions
,
binsize
)
}
else
if
(
type
[
i
]
==
"HiC-Pro"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromHiCPro
(
file
[
i
],
bedPaths
=
index
,
i
,
}
else
if
(
format
[
i
]
==
"HiC-Pro"
)
{
HiCDOCDataSet
<-
HiCDOCDataSetFromHiCPro
(
file
[
i
],
bedPaths
=
index
,
i
,
conditions
)
}
return
(
HiCDOCDataSet
)
})
InteractionData
<-
lapply
(
HiCDOCDataSet
,
function
(
data
){
interactionSet
<-
data
@
interactions
return
(
interactionSet
)
})
indexData
<-
sapply
(
HiCDOCDataSet
,
function
(
data
){
# Create a matrix of chromosomes and matrix
index_mat_chr
<-
sapply
(
HiCDOCDataSet
,
function
(
data
){
chromosome
<-
data
@
chromosomes
variables
<-
paste
(
data
@
colData
$
condition
,
data
@
colData
$
replicate
,
sep
=
"_"
)
variables
<-
paste
(
data
@
colData
$
condition
,
data
@
colData
$
replicate
,
sep
=
"_"
)
return
(
list
(
"chromosome"
=
chromosome
,
"variables"
=
variables
))
})
indexData
<-
as.data.frame
(
t
(
indexData
))
fill
<-
NA
InteractionDataSet
<-
lapply
(
chromosome
,
function
(
chr
){
mat
<-
indexData
$
chromosome
==
chr
unionInteractions
<-
Reduce
(
union
,
InteractionData
[
mat
])
InteractionSet
<-
lapply
(
which
(
mat
),
function
(
i
){
over
<-
match
(
HiCDOCDataSet
[[
i
]],
unionInteractions
)
totalColumns
<-
ncol
(
HiCDOCDataSet
[[
i
]])
newAssays
<-
matrix
(
rep
(
fill
,
length
(
unionInteractions
)
*
totalColumns
),
ncol
=
totalColumns
)
newAssays
[
over
,
]
<-
SummarizedExperiment
::
assay
(
HiCDOCDataSet
[[
i
]])
InteractionSet
<-
InteractionSet
::
InteractionSet
(
newAssays
,
unionInteractions
,
colData
=
colData
(
HiCDOCDataSet
[[
i
]]))
return
(
InteractionSet
)
})
assayChromosome
<-
lapply
(
InteractionSet
,
function
(
data
){
assayChromosome
<-
SummarizedExperiment
::
assay
(
data
)
assayChromosome
<-
as.data.table
(
assayChromosome
)
setnames
(
assayChromosome
,
paste
(
data
$
condition
,
data
$
replicate
,
sep
=
"_"
))
return
(
assayChromosome
)
})
assayChromosome
<-
Reduce
(
cbind
,
assayChromosome
)
unionInteractions
<-
as.data.table
(
unionInteractions
)
dataplot
<-
cbind
(
unionInteractions
[,
.
(
chromosome
=
chromosome
,
index1
,
index2
)],
assayChromosome
)
dataplot
[
is.na
(
dataplot
)]
<-
0
return
(
dataplot
)
index_mat_chr
<-
as.data.frame
(
t
(
index_mat_chr
))
index_mat_chr
$
chromosome
<-
unlist
(
index_mat_chr
$
chromosome
)
index_mat_chr
$
variables
<-
unlist
(
index_mat_chr
$
variables
)
# Merge all the InteractionSet objects
suppressWarnings
(
mergedinteractionSet
<-
Reduce
(
f
=
HiCDOC
:::
.mergeInteractionSet
,
x
=
HiCDOCDataSet
))
HiCDOCSet
<-
new
(
"HiCDOCDataSet"
,
mergedinteractionSet
)
HiCDOCSet
<-
HiCDOC
:::
.fillHiCDOCDataSet
(
HiCDOCSet
)
# Extract the regions from the HiCDOCSet object
indexData
<-
HiCDOCSet
@
interactions
@
regions
indexData
<-
as.data.frame
(
indexData
)
# For each chromosome, extract the interaction counts
all_mat_count
<-
future_lapply
(
chromosome
,
function
(
chr
){
rowsChromosome
<-
(
S4Vectors
::
mcols
(
HiCDOCSet
)
$
chromosome
==
chr
)
assayChromosome
<-
SummarizedExperiment
::
assay
(
HiCDOCSet
[
rowsChromosome
,
])
assayChromosome
<-
as.data.table
(
assayChromosome
)
data.table
::
setnames
(
assayChromosome
,
paste
(
HiCDOCSet
$
condition
,
HiCDOCSet
$
replicate
,
sep
=
"_"
)
)
interactionsChromosome
<-
InteractionSet
::
interactions
(
HiCDOCSet
[
rowsChromosome
,
]
)
interactionsChromosome
<-
as.data.table
(
interactionsChromosome
)
interaction_mat
<-
cbind
(
interactionsChromosome
[,
.
(
chromosome
=
chromosome
,
index1
,
index2
)],
assayChromosome
)
return
(
interaction_mat
)
})
return
(
list
(
"InteractionDataSet"
=
InteractionDataSet
,
"indexData"
=
indexData
))
mat_count
<-
Reduce
(
rbind
,
all_mat_count
)
mat_count
[
is.na
(
mat_count
)]
<-
0
return
(
list
(
"InteractionDataSet"
=
mat_count
,
"indexData"
=
indexData
,
"index_mat_chr"
=
index_mat_chr
))
}
create_cluster
<-
function
(
res
)
{
# extract all unique bin
all_bins
<-
unique
(
c
(
res
$
index1
,
res
$
index2
))
# create index mapping for the bins
res
$
bindex1
<-
match
(
res
$
index1
,
all_bins
)
res
$
bindex2
<-
match
(
res
$
index2
,
all_bins
)
# create a merged squared similarity matrix with sum of normalized counts
merged_mat
<-
rowSums
(
res
[
,
-
c
(
1
:
3
)])
cur_mat
<-
matrix
(
0
,
ncol
=
length
(
all_bins
),
nrow
=
length
(
all_bins
))
cur_mat
[
cbind
(
res
$
bindex1
,
res
$
bindex2
)]
<-
merged_mat
cur_mat
[
cbind
(
res
$
bindex2
,
res
$
bindex1
)]
<-
merged_mat
# log transformation
cur_mat
<-
log
(
cur_mat
+
1
)
# perform constrained
HAC (and use bin identifiers as labels)
# perform constrained
hierarchical clustering
merged_res
<-
adjClust
(
cur_mat
,
type
=
"similarity"
,
h
=
length
(
all_bins
)
-
1
)
merged_res
$
labels
<-
as.character
(
all_bins
)
# select the number of clusters with broken stick
# select the number of clusters with broken stick
method
merged_clust
<-
select
(
merged_res
,
type
=
"bstick"
)
names
(
merged_clust
)
<-
all_bins
return
(
data.frame
(
merged_clust
))
}
create_trees
<-
function
(
red_mat
,
selected
){
# match bin indices in the reduced similarity matrix to selected indices
match1
<-
match
(
red_mat
$
index1
,
selected
)
match2
<-
match
(
red_mat
$
index2
,
selected
)
match2
<-
match
(
red_mat
$
index2
,
selected
)
# perform hierarchical clustering for each column
adjclust_out
<-
sapply
(
red_mat
[,
-
c
(
1
:
3
)],
function
(
arep
)
{
##### create a merged squared similarity matrix with normalized counts
mat_crep
<-
matrix
(
0
,
ncol
=
length
(
selected
),
nrow
=
length
(
selected
))
mat_crep
[
cbind
(
match1
,
match2
)]
<-
arep
mat_crep
[
cbind
(
match2
,
match1
)]
<-
arep
# perform hierarchical clustering on the similarity matrix
out
<-
adjClust
(
log
(
mat_crep
+
1
),
type
=
"similarity"
)
class
(
out
)
<-
"hclust"
return
(
out
)
},
simplify
=
FALSE
)
return
(
adjclust_out
)
}
normalize_count
<-
function
(
count_matrice
)
{
counts
<-
as.matrix
(
count_matrice
[
,
-
c
(
1
:
3
)])
colnames
(
counts
)
<-
NULL
cur_dge
<-
SummarizedExperiment
(
list
(
counts
=
counts
))
cur_dge
$
totals
<-
colSums
(
count_matrice
[
,
-
c
(
1
:
3
)])
offsets
<-
csaw
::
normOffsets
(
cur_dge
,
se.out
=
FALSE
)
rm
(
cur_dge
)
gc
(
verbose
=
FALSE
)
counts
<-
counts
/
exp
(
offsets
)
count_matrice
<-
as.data.frame
(
count_matrice
)
count_matrice
[
,
-
c
(
1
:
3
)]
<-
data.frame
(
counts
)
rm
(
counts
)
gc
(
verbose
=
FALSE
)
return
(
count_matrice
)
normalize_count
<-
function
(
count_matrice
,
index_mat_chr
){
# Get all unique chromosomes in the count matrix
chromosome
<-
unique
(
count_matrice
$
chromosome
)
all_count_matrice
<-
future_lapply
(
chromosome
,
function
(
chr
){
# Subset the count matrix
count_matrice
<-
count_matrice
[
which
(
count_matrice
$
chromosome
==
chr
),
]
# Subset the index matrix
col
<-
index_mat_chr
$
variables
[
which
(
index_mat_chr
$
chromosome
==
chr
)]
# Create a new count matrix
deb
<-
count_matrice
[,
c
(
1
:
3
)]
end
<-
subset
(
count_matrice
,
select
=
col
)
count_matrice
<-
cbind
(
deb
,
end
)
# Extract the count values as a matrix
counts
<-
as.matrix
(
count_matrice
[
,
-
c
(
1
:
3
)])
colnames
(
counts
)
<-
NULL
# Compute the total number of reads for each sample
cur_dge
<-
SummarizedExperiment
(
list
(
counts
=
counts
))
cur_dge
$
totals
<-
colSums
(
count_matrice
[
,
-
c
(
1
:
3
)])
# Normalize the counts
offsets
<-
csaw
::
normOffsets
(
cur_dge
,
se.out
=
FALSE
)
counts
<-
counts
/
exp
(
offsets
)
count_matrice
<-
as.data.frame
(
count_matrice
)
count_matrice
[
,
-
c
(
1
:
3
)]
<-
data.frame
(
counts
)
return
(
count_matrice
)
})
# Return the list of normalized count matrices
return
(
all_count_matrice
)
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment