Commit d69159a9 authored by Celine Noirot's avatar Celine Noirot
Browse files

Gaelle update

parent 6c319bf7
...@@ -10,7 +10,7 @@ packages <- c("optparse", # to read arguments from a command line ...@@ -10,7 +10,7 @@ packages <- c("optparse", # to read arguments from a command line
for(package in packages){ for(package in packages){
# if package is installed locally, load # if package is installed locally, load
if (!(package %in% rownames(installed.packages()))) { if (!(package %in% rownames(installed.packages()))) {
install.packages(package) install.packages(package, repos="http://cran.univ-paris1.fr")
} }
do.call('library', list(package)) do.call('library', list(package))
} }
...@@ -18,7 +18,7 @@ for(package in packages){ ...@@ -18,7 +18,7 @@ for(package in packages){
#Bioconductor package #Bioconductor package
if (!("topGO" %in% rownames(installed.packages()))) { if (!("topGO" %in% rownames(installed.packages()))) {
source("http://www.bioconductor.org/biocLite.R") source("http://www.bioconductor.org/biocLite.R")
biocLite("topGO", suppressUpdates = TRUE, suppressAutoUpdate = TRUE, ask = FALSE) biocLite("topGO")
} }
library(topGO) # GO enrichment library(topGO) # GO enrichment
...@@ -80,9 +80,9 @@ opt = parse_args(opt_parser); ...@@ -80,9 +80,9 @@ opt = parse_args(opt_parser);
#opt$out <- XXX # for example "C:/My documents/results" #opt$out <- XXX # for example "C:/My documents/results"
#opt$test <- XXX # "fisher" or "KS" #opt$test <- XXX # "fisher" or "KS"
#opt$algorithm <- XXX # ('classic', 'elim' or 'weight') #opt$algorithm <- XXX # ('classic', 'elim' or 'weight')
#opt$paramAlgo <- XXX # ('bonferroni' or 'BH' for classic, 0.01 for elim and 'ratio' or 'log' for weight) #opt$paramAlgo <- XXX # ('bonferroni' or 'BH')
#opt$alpha <- 0.05 # for example 0.05 #opt$alpha <- 0.05 # for example 0.05
#opt$target <- "contigs" # for example contigs, gene... #opt$target <- "contigs" # for example contigs
# table with genes and GO, genes of interest, test and algorithm are needed to # table with genes and GO, genes of interest, test and algorithm are needed to
...@@ -114,34 +114,6 @@ if(is.null(opt$paramAlgo)){ ...@@ -114,34 +114,6 @@ if(is.null(opt$paramAlgo)){
} }
} }
if(opt$algorithm == "classic" & !(opt$paramAlgo == "bonferroni" | opt$paramAlgo == "BH")){
stop("paramAlgo need to be equals to 'bonferroni' or 'BH'\n",
call. = FALSE)
}
if(opt$algorithm == "classic" & !(opt$paramAlgo == "bonferroni" | opt$paramAlgo == "BH")){
stop("paramAlgo need to be equals to 'bonferroni' or 'BH'\n",
call. = FALSE)
}
if(opt$algorithm == "elim"){
opt$paramAlgo <- as.numeric(opt$paramAlgo)
if(is.na(opt$paramAlgo)){
stop("paramAlgo need to be a number\n",
call. = FALSE)
}
if(opt$paramAlgo > 1){
stop("paramAlgo need to be inferior to 1\n",
call. = FALSE)
}
}
if(opt$algorithm == "weight" & !(opt$paramAlgo == "ratio" | opt$paramAlgo == "log")){
stop("paramAlgo need to be equals to 'ratio' or 'log'\n",
call. = FALSE)
}
##################################################### #####################################################
### ---------------------Input data and preprocessing ### ---------------------Input data and preprocessing
...@@ -327,10 +299,10 @@ for(ontology in c("BP", "MF", "CC")){ ...@@ -327,10 +299,10 @@ for(ontology in c("BP", "MF", "CC")){
resultTest@score <- p.adjust(resultTest@score, method = opt$paramAlgo) resultTest@score <- p.adjust(resultTest@score, method = opt$paramAlgo)
} else if (opt$algorithm == "elim"){ } else if (opt$algorithm == "elim"){
resultTest <- runTest(myGOdata, algorithm = opt$algorithm, statistic = opt$test, resultTest <- runTest(myGOdata, algorithm = opt$algorithm, statistic = opt$test,
cutOff = as.numeric(opt$paramAlgo)) cutOff = as.numeric(opt$cutOff))
} else if (opt$algorithm == "weight"){ } else if (opt$algorithm == "weight"){
resultTest <- runTest(myGOdata, algorithm = opt$algorithm, statistic = opt$test, resultTest <- runTest(myGOdata, algorithm = opt$algorithm, statistic = opt$test,
sigRatio = opt$paramAlgo) sigRatio = opt$weightFunc)
} }
sigRes <- resultTest@score[resultTest@score <= opt$alpha] sigRes <- resultTest@score[resultTest@score <= opt$alpha]
...@@ -356,9 +328,7 @@ for(ontology in c("BP", "MF", "CC")){ ...@@ -356,9 +328,7 @@ for(ontology in c("BP", "MF", "CC")){
} }
if(!is.null(GOgeneSig)){ names(GOgeneSig)[4] <- opt$target
names(GOgeneSig)[4] <- opt$target
}
write.table(GOgeneSig, file = normalizePath(file.path(opt$out, write.table(GOgeneSig, file = normalizePath(file.path(opt$out,
paste0("GOgeneSig.csv")), paste0("GOgeneSig.csv")),
mustWork = FALSE), sep = "\t", mustWork = FALSE), sep = "\t",
...@@ -372,7 +342,7 @@ write.table(unique(GOgeneSig[,c("ontology", "GO", "GODef", "pval")]), ...@@ -372,7 +342,7 @@ write.table(unique(GOgeneSig[,c("ontology", "GO", "GODef", "pval")]),
## JSON ## JSON
stat <- list("notInTheAnalysis" = ifelse(!is.na(notInAnalysis[1]), as.character(notInAnalysis), "none")) stat <- list("notInTheAnalysis" = ifelse(!is.na(notInAnalysis[1]), notInAnalysis, "none"))
write(gsub(" ", "", toJSON(list(stat=stat), collapse = "")), write(gsub(" ", "", toJSON(list(stat=stat), collapse = "")),
file = normalizePath(file.path(opt$out, "results.json"), file = normalizePath(file.path(opt$out, "results.json"),
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#common packages #common packages
packages <- c("optparse", # to read arguments from a command line packages <- c("optparse", # to read arguments from a command line
"ggplot2", # nice plots "ggplot2", # nice plots
"scales", #log2 transformation
"reshape2") # reshape grouped data "reshape2") # reshape grouped data
for(package in packages){ for(package in packages){
...@@ -89,12 +90,14 @@ PC_before_log <- log2(dge$counts + 1) ...@@ -89,12 +90,14 @@ PC_before_log <- log2(dge$counts + 1)
PC_before_log_long <- melt(PC_before_log) PC_before_log_long <- melt(PC_before_log)
colnames(PC_before_log_long) <- c("name", "lib", "log2CPM") colnames(PC_before_log_long) <- c("name", "lib", "log2CPM")
PC_before_long <- melt(PC_before)
colnames(PC_before_long) <- c("name", "lib", "CPM")
## boxplot ## boxplot
ggplot(PC_before_log_long, aes(x=lib, y=log2CPM)) + ggplot(PC_before_log_long, aes(x=lib, y=log2CPM)) +
geom_boxplot(fill='gray', color="black") + theme_bw() + xlab(label = "") + geom_boxplot(fill='gray', color="black") + theme_bw() + xlab(label = "") +
ylab(label = expression(log[2]*"(counts + 1)")) + ylab(label = expression(log[2]*"(counts + 1)")) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, colour = "black")) +
ggtitle("Boxplot on raw pseudo-counts") ggtitle("Boxplot on raw pseudo-counts")
ggsave(filename = normalizePath(file.path(opt$out, "rawcount_boxplot.svg"), ggsave(filename = normalizePath(file.path(opt$out, "rawcount_boxplot.svg"),
mustWork = FALSE), width = 11, height = 7) mustWork = FALSE), width = 11, height = 7)
...@@ -112,7 +115,9 @@ if (ncol(PC_before) > 2){ ...@@ -112,7 +115,9 @@ if (ncol(PC_before) > 2){
} }
## density ## density
ggplot(PC_before_log_long, aes(x=log2CPM, colour=lib)) + geom_density() + myLog2_trans <- function() trans_new("myLog2", function(x) log2(x+1), function(x) (2^x) - 1)
ggplot(PC_before_long, aes(x=CPM, colour=lib)) + geom_density(trim = TRUE) +
scale_x_continuous(trans = "myLog2", breaks=c(0,1,5,10,50,100,500,1000,5000,10000)) +
theme_bw() + xlab(label = expression(log[2]*"(counts + 1)")) + theme_bw() + xlab(label = expression(log[2]*"(counts + 1)")) +
ylab(label = "Density") + ggtitle("Density plot on raw pseudo-counts") + ylab(label = "Density") + ggtitle("Density plot on raw pseudo-counts") +
guides(color=guide_legend(title="Library")) guides(color=guide_legend(title="Library"))
...@@ -147,6 +152,12 @@ normalization <- function(dge, method) { ...@@ -147,6 +152,12 @@ normalization <- function(dge, method) {
PC_after <- cpm(dgeNorm) PC_after <- cpm(dgeNorm)
PC_after_log <- log2(PC_after + 1) PC_after_log <- log2(PC_after + 1)
PC_after_log_long <- melt(PC_after_log)
colnames(PC_after_log_long) <- c("name", "lib", "log2CPM")
PC_after_long <- melt(PC_after)
colnames(PC_after_long) <- c("name", "lib", "CPM")
#Normalized data #Normalized data
PC_after <- cbind("#name" = rownames(PC_after), PC_after) PC_after <- cbind("#name" = rownames(PC_after), PC_after)
sample_info <- data.frame(sample.name=rownames(dgeNorm$samples), sample_info <- data.frame(sample.name=rownames(dgeNorm$samples),
...@@ -164,9 +175,6 @@ normalization <- function(dge, method) { ...@@ -164,9 +175,6 @@ normalization <- function(dge, method) {
##### Plots ##### Plots
PC_after_log_long <- melt(PC_after_log)
colnames(PC_after_log_long) <- c("name", "lib", "log2CPM")
#title #title
tit <- method tit <- method
if (method == "upperquartile") { if (method == "upperquartile") {
...@@ -177,6 +185,7 @@ normalization <- function(dge, method) { ...@@ -177,6 +185,7 @@ normalization <- function(dge, method) {
ggplot(PC_after_log_long, aes(x=lib, y=log2CPM)) + ggplot(PC_after_log_long, aes(x=lib, y=log2CPM)) +
geom_boxplot(fill='gray', color="black") + theme_bw() + xlab(label = "") + geom_boxplot(fill='gray', color="black") + theme_bw() + xlab(label = "") +
ylab(label = expression(log[2]*"(counts + 1)")) + ylab(label = expression(log[2]*"(counts + 1)")) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, colour = "black")) +
ggtitle(paste0("Boxplot on pseudo-counts after normalization (method = ", ggtitle(paste0("Boxplot on pseudo-counts after normalization (method = ",
tit, ")")) tit, ")"))
ggsave(filename = normalizePath(file.path(opt$out, paste0(method, ggsave(filename = normalizePath(file.path(opt$out, paste0(method,
...@@ -198,7 +207,8 @@ normalization <- function(dge, method) { ...@@ -198,7 +207,8 @@ normalization <- function(dge, method) {
#density #density
ggplot(PC_after_log_long, aes(x=log2CPM, colour=lib)) + geom_density() + ggplot(PC_after_long, aes(x=CPM, colour=lib)) + geom_density(trim = TRUE) +
scale_x_continuous(trans = "myLog2", breaks=c(0,1,5,10,50,100,500,1000,5000,10000)) +
theme_bw() + xlab(label = expression(log[2]*"(counts + 1)")) + theme_bw() + xlab(label = expression(log[2]*"(counts + 1)")) +
ylab(label = "Density") + ylab(label = "Density") +
ggtitle(paste0("Density plot on pseudo-counts after normalization (method = ", ggtitle(paste0("Density plot on pseudo-counts after normalization (method = ",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment