Commit fa176ffd authored by Safia Saci's avatar Safia Saci
Browse files

Update PCIT_RIF_analyse.R

parent f0cf74e6
......@@ -7,16 +7,23 @@ library(dplyr)
library(CeTF)
library(data.table)
library(kableExtra)
library(rtracklayer)
library(ComplexHeatmap)
library("RColorBrewer")
library(ggplot2)
# tables with diff expr genes ------
# tables with diff expr genes -----
## Input data -------
coefLine <- read.csv("data/coef_line.csv", sep = ";", header = T)
coefLine <- read.csv("C:/Users/ssaci/Documents/safia/projet/RosePigs_analyse/data/coef_line.csv", sep = ";", header = T)
coefCondition <- read.csv("data/coef_condition.csv", sep = ";", header = T)
coefSex <- read.csv("data/coef_sex.csv", sep = ";", header = T)
## collection of gene_names into gtf :
## GTF file -----
gtf <- import(con = "data/Sus_scrofa.Sscrofa11.1.102.gtf.gz", format = "GFF")
## collection of gene_names into gtf :
gene_name <- function(){
listeGenes <- as.data.frame(gtf$gene_id)
listeGenes <- distinct(listeGenes)
......@@ -28,6 +35,8 @@ gene_name <- function(){
}
geneNames <- gene_name()
# collection of gene_type into gtf :
gene_type <- function(){
listeGenes <- as.data.frame(gtf$gene_id)
......@@ -43,7 +52,6 @@ geneType <- gene_type()
# creation of the tsv file containing the genes, their Expression value and their expression status
gene_file <- sqldf("select C.gene, G.gene_name as gene_symbol, T.gene_biotype as gene_type, C.logFC as log2fc_condition, C.adj_P_Val as adjpval_condition, C.expressed as status_condition,
L.logFC as log2fc_line, L.adj_P_Val as adjpval_line, L.expressed as status_line,
......@@ -56,6 +64,8 @@ gene_file <- sqldf("select C.gene, G.gene_name as gene_symbol, T.gene_biotype as
write.csv(gene_file, "data/gene_info.csv")
# differentially expressed genes in line and in condition:
diff_ex <- sqldf("select C.gene, C.expressed as condition_expressed, L.expressed as line_expressed
from coefLine L, coefCondition C
......@@ -81,6 +91,7 @@ diff_ex <- sqldf("select C.gene, C.expressed as condition_expressed, L.expressed
# 16 ENSSSCG00000032894 DOWN DOWN
# genes Network --------
## with Cor ----------
load("data/expMat2.RData")
......@@ -119,6 +130,11 @@ diff_ex_gene_cond <- sqldf("select C.gene, C.expressed as condition_expressed
from coefCondition C
where C.expressed != 'NS' ")
gene_file_line <- sqldf("select gene, log2fc_line, adjpval_line from gene_file
where gene in (select gene from diff_ex_gene_line)")
gene_file_cond <- sqldf("select * from gene_file
where gene in (select gene from diff_ex_gene_cond)")
# the scores matrix of genes diff expr
de_mat <- expMat2[diff_ex_gene$gene, ]
......@@ -174,6 +190,8 @@ diff_ex_gene$expression <- ifelse(
)
)
# recovery of 12640 genes with the best correlation in abs value:
pcor_mat_dist_filter <- pcor_mat_distinct %>% arrange(desc(abs(pcor_mat_distinct$corr2)))
pcor_dist_filter_table <- pcor_mat_dist_filter[1:12640, ]
......@@ -203,16 +221,9 @@ Go_pigs <- sqldf("select U.Genes_table_ID, U.UniProtKB_Gene_Name_ID, G.gene_prod
from uniprot U, genontology G
where G.ACC_ID == U.UniProtKB_Gene_Name_ID ")
#
# my_genes_de <- diff_ex_gene$gene
# summary(my_genes_de %in% row.names(expMat2))
#
# my_genes_tf <- Go_pigs$Genes_table_ID
# summary(my_genes_tf %in% row.names(expMat2))
# my_genes_tf <- my_genes_tf[my_genes_tf %in% row.names(expMat2)]
# de_mat_tf <- expMat2[c(my_genes_de, my_genes_tf), ]
## line --------------------
## per line --------------------
my_genes_de_line <- diff_ex_gene_line$gene
summary(my_genes_de_line %in% row.names(expMat))
......@@ -230,8 +241,7 @@ RIF_out_line <- RIF(input = de_mat_tf_line,
## cond------------------
# ---
## per cond------------------
my_genes_de_cond <- diff_ex_gene_cond$gene
summary(my_genes_de_cond %in% row.names(expMat))
......@@ -240,8 +250,8 @@ summary(my_genes_tf_cond %in% row.names(expMat))
my_genes_tf_cond <- my_genes_tf_cond[my_genes_tf_cond %in% row.names(expMat)]
de_mat_tf_cond <- expMat[c(my_genes_de_cond, my_genes_tf_cond), ]
## Go_pigs sorting
## Go_pigs sorting
de_mat_tf_cond <- cbind(de_mat_tf_cond[,c("G11-_fasted_F_2884","G11-_fasted_M_2889",
"G11-_fasted_F_2899","G11-_fasted_M_2902",
"G11-_fasted_F_2980","G11-_fasted_M_2987",
......@@ -274,3 +284,109 @@ list_tf <- sqldf("select distinct L.TF, L.avgexpr as avgexpr_line, L.RIF1 as RIF
and C.TF = L.TF")
write.csv(list_tf, "data/RIF_Tf_file.csv")
# Ananlyse RIf resullts-------
list_tf <- read.csv("data/RIF_Tf_file.csv")
# plot 1
rifDF <- read.csv2("data/rif_df.csv")
bplot <- ggplot(rifDF, aes(x = factor(name, levels = c("LRP6","HIF1A","TEAD1","CTNNB1","RLIM","NR1D1","ELK1","CTH","GTF3A","SMAD7","ZNF575","PSMD10","SNAI3","EHF","ONECUT3","MYBL2","PROX2","FOXD4","AIRE","PRMT2")),
y = rif_value, fill = categorie )) +
geom_bar(stat="identity", position="dodge" )+
scale_fill_manual(values=c("indianred1", "indianred4","deepskyblue1", "deepskyblue4")) +
coord_flip() +
ylab("RIF")+
xlab("TFs")+
geom_text(aes(label= rif_value), position=position_dodge(width=0.9))+
theme(panel.grid.major = element_line(colour = "white"),
panel.grid.minor = element_line(colour = "white"))
ggsave(filename = "plots/rif_TF.png", bplot)
# best TFs with RIF value
best_rif <- read.csv2("data/best_rif.csv")
best_rif$val <- as.numeric(best_rif$val)
levels <- group_by(best_rif, name) %>%
summarise(max_rif = max(val)) %>%
arrange(max_rif) %>%
pull(name)
p2 <- ggplot(best_rif, aes(x = categorie, y = val, fill = categorie))+
geom_bar(stat = "identity", position = "dodge", width=0.8) +
scale_fill_manual(values=c("indianred1", "indianred4","deepskyblue1", "deepskyblue4"))+
ylab("RIF")+
coord_flip(ylim = c(0, 4.2), expand = FALSE)+
xlab("TFs")+
facet_wrap(~factor(name, levels = rev(levels)))+
theme_bw()+
#scale_y_discrete(breaks=seq(-1, 4, 0.5))+
theme(panel.grid.major = element_line(colour = "white"),
panel.grid.minor = element_line(colour = "white"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "none")
ggsave("plots/best_rif2.png", p2, height = 4, width = 6)
# TFs with Gprofiler------------
# listes of TFS from g;profiler
up_cond_tf <- read.csv("data/gprofiler/up_cond.csv")
up_line_tf <- read.csv("data/gprofiler/up_line.csv")
down_cond_tf <- read.csv("data/gprofiler/down_cond.csv")
de_line_tf <- read.csv("data/line_DE_tf_gprofiler.csv")
de_cond_tf <- read.csv("data/gprofiler/DE_cond.csv")
# add "condition" categorie
up_cond_tf$categorie <- "UP_Condition"
up_line_tf$categorie <- "UP_Line"
down_cond_tf$categorie <- "DOWN_Condition"
de_line_tf$categorie <- "DE_Line"
de_cond_tf$categorie <- "DE_Condition"
# order TFs with -log10(pval_adj)
up_cond_tf <- up_cond_tf[order(up_cond_tf$negative_log10_of_adjusted_p_value, decreasing = TRUE), ]
up_line_tf <- up_line_tf[order(up_line_tf$negative_log10_of_adjusted_p_value, decreasing = TRUE), ]
down_cond_tf <- down_cond_tf[order(down_cond_tf$negative_log10_of_adjusted_p_value, decreasing = TRUE), ]
de_cond_tf <- de_cond_tf[order(de_cond_tf$negative_log10_of_adjusted_p_value, decreasing = TRUE), ]
# x <- data.frame(tf = tfs$term_name, val= tfs$negative_log10_of_adjusted_p_value, id = tfs$term_id, categorie = tfs$categorie)
# # save the liste of the best Tfs
# write.csv(x, "data/tsf.csv")
x <- read.csv("data/tsf.csv")
all_tf <- rbind(up_cond_tf, up_line_tf, down_cond_tf, de_cond_tf, de_line_tf)
motifs <- filter(x, grepl(pattern = "_1$", id)) %>%
group_by(categorie) %>%
slice_max(val, n = 5) %>%
pull(id) %>%
unique()
tfs <- mutate(x, categorie = factor(categorie, levels = c("DE_Condition", "UP_Condition", "DOWN_Condition", "DE_Line","UP_Line", "DOWN_Line")))%>%
filter(id %in% motifs)
p4 <- ggplot(tfs, aes(x = categorie, y = val, fill = categorie))+
geom_bar(stat = "identity")+
xlab("")+
ylab("-Log10(pVal_Adj)")+
scale_fill_manual("Categories", values=c("yellowgreen", "forestgreen","goldenrod1", "mediumorchid4", "Blue4", "brown3"), drop = FALSE)+
coord_flip(ylim = c(0, 35), expand = TRUE)+
facet_wrap(~factor(tf,))+
scale_x_discrete(drop = FALSE)+
theme_bw()
png("plots/tfs_gprofiler.png", width = 800, height = 600, pointsize = 20)
p4
dev.off()
ggsave(filename ="plots/tfs_gprofiler.png", p4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment