Commit 2302d566 authored by Safia Saci's avatar Safia Saci
Browse files

Upload New File

parent 19721e53
#Packages ------
library("annotatr")
library(dmrseq)
library(ChIPpeakAnno)
library("BSgenome.Sscrofa.ENSEMBL.susScr11")
library(rtracklayer)
library(GenomicFeatures)
library(ggplot2)
library(dplyr)
library(sqldf)
library(EnhancedVolcano)
# post_DMRseq for line
dmr_line <- readRDS("dmrseq_line_gl_.RDS")
# how many regions were significant with threshold FDR (valeur q ) < 0,05
sum(dmr_line$qval < 0.05 & !is.na(dmr_line$qval))
#[1] 514
# select just the regions below FDR 0.05 and place in a new data.frame
sigRegions <- dmr_line[dmr_line$qval < 0.05 & !is.na(dmr_line$qval),]
# proportion of regions with hyper-methylation
sum(sigRegions$stat > 0) / length(sigRegions)
#[1] 0.3677043
#la distribution des valeurs de mthylation et la couverture
png("empDistribution_line.png")
plotEmpiricalDistribution(my_bsseq, testCovariate="line")
dev.off()
#la distribution des valeurs de couverture
png("empDistribution_cov_line.png")
plotEmpiricalDistribution(my_bsseq, testCovariate="line", type="Cov", bySample=TRUE)
dev.off()
# les diffrences de méthylation moyennes brutes
rawDiff <- meanDiff(bs = my_bsseq,
dmrs = sigRegions,
testCovariate = "line")
# > rawDiff
# [1] -0.06697107 0.65499397 -0.72611870 -0.64181925 -0.49965266 -0.45594677
# [7] 0.53302195 0.56387873 -0.50272785 0.55070516 0.43083733 -0.42058421
# [13] 0.27443041 -0.49812904 -0.37921699 -0.37865752 -0.38911153 0.36365112
# [19] -0.39077932 0.31607675 0.52949786 0.29417818 0.45540535 0.33787464
# [25] -0.35948313 -0.45162010 -0.27985297 0.39493516 -0.47083226 0.42744061
# [31] -0.52618477 -0.42351045 0.45146505 -0.32473054 -0.54561380 -0.28381469
# [37] -0.48105534 -0.57115220 -0.36816292 0.34160426 -0.51632695 -0.45488030
# [43] 0.43150438 0.30224684 -0.27807545 -0.36034982 0.26471895 -0.38047931
# [49] 0.29286504 -0.28758397 0.32047644 -0.47214260 -0.50198665 0.49088834
# [55] -0.26503356 0.35151399 0.31452995 -0.29632739 0.48645615 -0.32979596
# [61] -0.30344430 0.33379889 0.36874063 -0.27096727 -0.41348754 -0.36459582
# [67] 0.33567362
# >
str(rawDiff)
sigRegions$beta <- sort(sigRegions$beta, decreasing= TRUE)
for (i in 1:15) {
png(paste0("sig_dmr_gl_", i, ".png"))
plotDMRs(my_bsseq,
extend = 2500,
regions=sigRegions[i,],
testCovariate="line")
dev.off()
}
# dmrs annotation -------
gtf <- import(con = "Sus_scrofa.Sscrofa11.1.102.gtf.gz", format = "GFF")
txdb <- makeTxDbFromGFF(file = "Sus_scrofa.Sscrofa11.1.102.gtf.gz",
format = 'gtf',
organism = "Sus scrofa" )
dmr_peaks<-assignChromosomeRegion(sigRegions,
nucleotideLevel = FALSE,
precedence = c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"),
TxDb = txdb)
res <- as.data.frame(dmr_peaks$percentage, keep.rownames = TRUE)
png("annotate_dmrs_cons.png")
ggplot(res, aes(x = subjectHits, y = Freq)) +
geom_bar(stat = "identity", width=0.6, fill="steelblue") +
theme_minimal()
dev.off()
# Find the nearest feature and the distance to the feature for the peaklist
annoData <- gtf[gtf$type == "gene"]
names(annoData) <- annoData$gene_id
nearest_id <- nearest(sigRegions, annoData, ignore.strand = TRUE)
keep <- !is.na(nearest_id)
sigRegions <- sigRegions[keep, ]
nearest_id <- nearest_id[keep]
sigRegions$nearest_gene <- annoData[nearest_id, ]$gene_id
sigRegions$symbol <- annoData[nearest_id, ]$gene_name
sigRegions$distance <- as.data.frame(distanceToNearest(sigRegions, annoData))$distance
## plot the distance to the feature for the peaklist
hist(log10(sigRegions$distance + 1))
## add logFC and pValue of rnaseq analysis to sigRegions
gene_file <- read.csv("gene_info.csv", sep = ";")
sigRegions_file <- as.data.frame(sigRegions)
### rename sigRegions column
sigRegions_file <- sigRegions_file %>% rename(index_start = index.start,
index_width = index.width,
index_end = index.end
)
sigRegions_file <- sqldf("select G.seqnames, G.start, G.end, G.width,
G.strand, G.L, G.area, G.beta, G.stat,
G.pval, G.qval, G.index_start, G.index_end,
G.index_width, G.nearest_gene, G.symbol, G.distance,
S.log2fc_line, S.adjpval_line, S.status_line
from gene_file S, sigRegions_file G
where S.gene = G.nearest_gene")
write.csv(sigRegions_file, "sig_dmr_annotate.csv" )
## Volcano ------
sigRegions_file$state[sigRegions_file$Fold > 0 ] <- "HyperMethylated"
sigRegions_file$state[sigRegions_file$Fold < 0 ] <- "HypoMethylated"
sigRegions_file$diffex[sigRegions_file$log2fc_line > 0 & sigRegions_file$adjpval_line < 0.01] <- "UP"
sigRegions_file$diffex[sigRegions_file$log2fc_line < 0 & sigRegions_file$adjpval_line < 0.01] <- "DOWN"
hyper <- sigRegions_file[which(sigRegions_file$state == "HyperMethylated"), ]
hypo <- sigRegions_file[which(sigRegions_file$state == "HypoMethylated"), ]
volcano_rosepigs <- function(coef_table, title = "") {
ggplot(data=coef_table, aes(x=log2fc_line, y=-log10(adjpval_line), col= diffex )) +
geom_point() +
coord_cartesian(xlim = c(-5,5), ylim = c(0,8)) +
annotate("text", x = c(-4,4), y = 8, label = table(coef_table$diffex)) +
labs(title = title) +
xlab("log2 fold change") +
ylab("-log10 adjusted p-value") +
theme_bw(base_size = 10) +
theme(legend.position = "none")
}
volcanos <- volcano_rosepigs(hyper[which(hyper$distance < 10000), ], title = "Hyper-methylated with distance < 10000") +
volcano_rosepigs(hypo[which(hypo$distance < 10000), ], title = "Hypo-methylated with distance < 10000") +
volcano_rosepigs(hyper[which(hyper$distance > 10000), ], title = "Hyper-methylated with distance > 10000") +
volcano_rosepigs(hypo[which(hypo$distance > 10000), ], title = "Hypo-methylated with distance > 10000")
ggsave("MethylatedVolcano_cons.png", volcanos, width = 10, height = 8)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment