Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
GenEpi
Analyses
rosePigs
Commits
2302d566
Commit
2302d566
authored
Jun 07, 2021
by
Safia Saci
Browse files
Upload New File
parent
19721e53
Changes
1
Hide whitespace changes
Inline
Side-by-side
post_dmrseq.R
0 → 100644
View file @
2302d566
#Packages ------
library
(
"annotatr"
)
library
(
dmrseq
)
library
(
ChIPpeakAnno
)
library
(
"BSgenome.Sscrofa.ENSEMBL.susScr11"
)
library
(
rtracklayer
)
library
(
GenomicFeatures
)
library
(
ggplot2
)
library
(
dplyr
)
library
(
sqldf
)
library
(
EnhancedVolcano
)
# post_DMRseq for line
dmr_line
<-
readRDS
(
"dmrseq_line_gl_.RDS"
)
# how many regions were significant with threshold FDR (valeur q ) < 0,05
sum
(
dmr_line
$
qval
<
0.05
&
!
is.na
(
dmr_line
$
qval
))
#[1] 514
# select just the regions below FDR 0.05 and place in a new data.frame
sigRegions
<-
dmr_line
[
dmr_line
$
qval
<
0.05
&
!
is.na
(
dmr_line
$
qval
),]
# proportion of regions with hyper-methylation
sum
(
sigRegions
$
stat
>
0
)
/
length
(
sigRegions
)
#[1] 0.3677043
#la distribution des valeurs de mthylation et la couverture
png
(
"empDistribution_line.png"
)
plotEmpiricalDistribution
(
my_bsseq
,
testCovariate
=
"line"
)
dev.off
()
#la distribution des valeurs de couverture
png
(
"empDistribution_cov_line.png"
)
plotEmpiricalDistribution
(
my_bsseq
,
testCovariate
=
"line"
,
type
=
"Cov"
,
bySample
=
TRUE
)
dev.off
()
# les diffrences de méthylation moyennes brutes
rawDiff
<-
meanDiff
(
bs
=
my_bsseq
,
dmrs
=
sigRegions
,
testCovariate
=
"line"
)
# > rawDiff
# [1] -0.06697107 0.65499397 -0.72611870 -0.64181925 -0.49965266 -0.45594677
# [7] 0.53302195 0.56387873 -0.50272785 0.55070516 0.43083733 -0.42058421
# [13] 0.27443041 -0.49812904 -0.37921699 -0.37865752 -0.38911153 0.36365112
# [19] -0.39077932 0.31607675 0.52949786 0.29417818 0.45540535 0.33787464
# [25] -0.35948313 -0.45162010 -0.27985297 0.39493516 -0.47083226 0.42744061
# [31] -0.52618477 -0.42351045 0.45146505 -0.32473054 -0.54561380 -0.28381469
# [37] -0.48105534 -0.57115220 -0.36816292 0.34160426 -0.51632695 -0.45488030
# [43] 0.43150438 0.30224684 -0.27807545 -0.36034982 0.26471895 -0.38047931
# [49] 0.29286504 -0.28758397 0.32047644 -0.47214260 -0.50198665 0.49088834
# [55] -0.26503356 0.35151399 0.31452995 -0.29632739 0.48645615 -0.32979596
# [61] -0.30344430 0.33379889 0.36874063 -0.27096727 -0.41348754 -0.36459582
# [67] 0.33567362
# >
str
(
rawDiff
)
sigRegions
$
beta
<-
sort
(
sigRegions
$
beta
,
decreasing
=
TRUE
)
for
(
i
in
1
:
15
)
{
png
(
paste0
(
"sig_dmr_gl_"
,
i
,
".png"
))
plotDMRs
(
my_bsseq
,
extend
=
2500
,
regions
=
sigRegions
[
i
,],
testCovariate
=
"line"
)
dev.off
()
}
# dmrs annotation -------
gtf
<-
import
(
con
=
"Sus_scrofa.Sscrofa11.1.102.gtf.gz"
,
format
=
"GFF"
)
txdb
<-
makeTxDbFromGFF
(
file
=
"Sus_scrofa.Sscrofa11.1.102.gtf.gz"
,
format
=
'gtf'
,
organism
=
"Sus scrofa"
)
dmr_peaks
<-
assignChromosomeRegion
(
sigRegions
,
nucleotideLevel
=
FALSE
,
precedence
=
c
(
"Promoters"
,
"immediateDownstream"
,
"fiveUTRs"
,
"threeUTRs"
,
"Exons"
,
"Introns"
),
TxDb
=
txdb
)
res
<-
as.data.frame
(
dmr_peaks
$
percentage
,
keep.rownames
=
TRUE
)
png
(
"annotate_dmrs_cons.png"
)
ggplot
(
res
,
aes
(
x
=
subjectHits
,
y
=
Freq
))
+
geom_bar
(
stat
=
"identity"
,
width
=
0.6
,
fill
=
"steelblue"
)
+
theme_minimal
()
dev.off
()
# Find the nearest feature and the distance to the feature for the peaklist
annoData
<-
gtf
[
gtf
$
type
==
"gene"
]
names
(
annoData
)
<-
annoData
$
gene_id
nearest_id
<-
nearest
(
sigRegions
,
annoData
,
ignore.strand
=
TRUE
)
keep
<-
!
is.na
(
nearest_id
)
sigRegions
<-
sigRegions
[
keep
,
]
nearest_id
<-
nearest_id
[
keep
]
sigRegions
$
nearest_gene
<-
annoData
[
nearest_id
,
]
$
gene_id
sigRegions
$
symbol
<-
annoData
[
nearest_id
,
]
$
gene_name
sigRegions
$
distance
<-
as.data.frame
(
distanceToNearest
(
sigRegions
,
annoData
))
$
distance
## plot the distance to the feature for the peaklist
hist
(
log10
(
sigRegions
$
distance
+
1
))
## add logFC and pValue of rnaseq analysis to sigRegions
gene_file
<-
read.csv
(
"gene_info.csv"
,
sep
=
";"
)
sigRegions_file
<-
as.data.frame
(
sigRegions
)
### rename sigRegions column
sigRegions_file
<-
sigRegions_file
%>%
rename
(
index_start
=
index.start
,
index_width
=
index.width
,
index_end
=
index.end
)
sigRegions_file
<-
sqldf
(
"select G.seqnames, G.start, G.end, G.width,
G.strand, G.L, G.area, G.beta, G.stat,
G.pval, G.qval, G.index_start, G.index_end,
G.index_width, G.nearest_gene, G.symbol, G.distance,
S.log2fc_line, S.adjpval_line, S.status_line
from gene_file S, sigRegions_file G
where S.gene = G.nearest_gene"
)
write.csv
(
sigRegions_file
,
"sig_dmr_annotate.csv"
)
## Volcano ------
sigRegions_file
$
state
[
sigRegions_file
$
Fold
>
0
]
<-
"HyperMethylated"
sigRegions_file
$
state
[
sigRegions_file
$
Fold
<
0
]
<-
"HypoMethylated"
sigRegions_file
$
diffex
[
sigRegions_file
$
log2fc_line
>
0
&
sigRegions_file
$
adjpval_line
<
0.01
]
<-
"UP"
sigRegions_file
$
diffex
[
sigRegions_file
$
log2fc_line
<
0
&
sigRegions_file
$
adjpval_line
<
0.01
]
<-
"DOWN"
hyper
<-
sigRegions_file
[
which
(
sigRegions_file
$
state
==
"HyperMethylated"
),
]
hypo
<-
sigRegions_file
[
which
(
sigRegions_file
$
state
==
"HypoMethylated"
),
]
volcano_rosepigs
<-
function
(
coef_table
,
title
=
""
)
{
ggplot
(
data
=
coef_table
,
aes
(
x
=
log2fc_line
,
y
=-
log10
(
adjpval_line
),
col
=
diffex
))
+
geom_point
()
+
coord_cartesian
(
xlim
=
c
(
-5
,
5
),
ylim
=
c
(
0
,
8
))
+
annotate
(
"text"
,
x
=
c
(
-4
,
4
),
y
=
8
,
label
=
table
(
coef_table
$
diffex
))
+
labs
(
title
=
title
)
+
xlab
(
"log2 fold change"
)
+
ylab
(
"-log10 adjusted p-value"
)
+
theme_bw
(
base_size
=
10
)
+
theme
(
legend.position
=
"none"
)
}
volcanos
<-
volcano_rosepigs
(
hyper
[
which
(
hyper
$
distance
<
10000
),
],
title
=
"Hyper-methylated with distance < 10000"
)
+
volcano_rosepigs
(
hypo
[
which
(
hypo
$
distance
<
10000
),
],
title
=
"Hypo-methylated with distance < 10000"
)
+
volcano_rosepigs
(
hyper
[
which
(
hyper
$
distance
>
10000
),
],
title
=
"Hyper-methylated with distance > 10000"
)
+
volcano_rosepigs
(
hypo
[
which
(
hypo
$
distance
>
10000
),
],
title
=
"Hypo-methylated with distance > 10000"
)
ggsave
(
"MethylatedVolcano_cons.png"
,
volcanos
,
width
=
10
,
height
=
8
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment