Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bios4biol
bioinfo-utils
Commits
000aeca3
Commit
000aeca3
authored
Sep 28, 2018
by
Celine Noirot
Browse files
Debug new parameters and add package svglite
parent
80056632
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
bin/DEG.R
View file @
000aeca3
This diff is collapsed.
Click to expand it.
bin/GOBarplot.R
View file @
000aeca3
...
...
@@ -4,7 +4,7 @@
##DATE = "03/2016"
##AUTHORS = "Gaëlle Lefort"
##KEYWORDS = "RNAseq gene ontology barplot"
##DESCRIPTION = "GOEnrichment.R carries out a barplot to compare gene ontology between multiple set of files."
##DESCRIPTION = "GOEnrichment.R carries out a barplot to compare gene ontology between multiple set of files."
############################# Enrichment analysis ##############################
...
...
@@ -14,7 +14,8 @@
#common packages
packages
<-
c
(
"optparse"
,
# to read arguments from a command line
"ggplot2"
,
# nice plots
"RJSONIO"
)
# work with JSON file
"RJSONIO"
,
# work with JSON file
"DBI"
)
for
(
package
in
packages
){
# if package is installed locally, load
...
...
@@ -22,7 +23,7 @@ for(package in packages){
install.packages
(
package
)
}
do.call
(
'library'
,
list
(
package
))
}
}
#Bioconductor package
if
(
!
(
"topGO"
%in%
rownames
(
installed.packages
())))
{
...
...
@@ -38,19 +39,19 @@ library(topGO) # GO enrichment
###### Parameter initialization when the script is launched from command line
############
option_list
=
list
(
make_option
(
c
(
"-f"
,
"--files"
),
type
=
"character"
,
default
=
NULL
,
help
=
"geneID2GO files: list of tabulated matrix with the correspondence
make_option
(
c
(
"-f"
,
"--files"
),
type
=
"character"
,
default
=
NULL
,
help
=
"geneID2GO files: list of tabulated matrix with the correspondence
gene to GO separated by ','"
,
metavar
=
"character"
),
make_option
(
c
(
"-n"
,
"--names"
),
type
=
"character"
,
default
=
NULL
,
make_option
(
c
(
"-n"
,
"--names"
),
type
=
"character"
,
default
=
NULL
,
help
=
"names of the samples (same order than files)"
,
metavar
=
"character"
),
make_option
(
c
(
"--fileFormat"
),
type
=
"character"
,
default
=
"topGO"
,
help
=
"two types of file can be used : topGO (gene_id1 GO_1,GO_2,...
make_option
(
c
(
"--fileFormat"
),
type
=
"character"
,
default
=
"topGO"
,
help
=
"two types of file can be used : topGO (gene_id1 GO_1,GO_2,...
\n gene_id2 GO_3,...) or twoColumns (only 2 columns
#gene_id GO_id)"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
NULL
,
help
=
"folder path where results are stored"
,
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
NULL
,
help
=
"folder path where results are stored"
,
metavar
=
"character"
)
);
);
opt_parser
=
OptionParser
(
usage
=
"Carries out a barplot to compare gene ontology between multiple set of files"
,
option_list
=
option_list
);
opt
=
parse_args
(
opt_parser
);
...
...
@@ -66,11 +67,11 @@ opt = parse_args(opt_parser);
#opt$out <- XXX # for example "C:/My documents/results"
# table with genes and GO, genes of interest, test and algorithm are needed to
# table with genes and GO, genes of interest, test and algorithm are needed to
#continue
if
(
is.null
(
opt
$
files
))
{
print_help
(
opt_parser
)
stop
(
"Tables with genes and GO missing.\n"
,
stop
(
"Tables with genes and GO missing.\n"
,
call.
=
FALSE
)
}
...
...
@@ -83,7 +84,7 @@ ifelse(!dir.exists(opt$out), dir.create(opt$out), FALSE)
### ---------------------Input data and preprocessing
#####################################################
## read tables with all genes
l
<-
unlist
(
strsplit
(
opt
$
files
,
','
))
l
<-
unlist
(
strsplit
(
opt
$
files
,
','
))
opt
$
names
<-
unlist
(
strsplit
(
opt
$
names
,
','
))
...
...
@@ -92,50 +93,50 @@ if (opt$fileFormat == "topGO"){
for
(
i
in
seq_along
(
l
)){
geneID2GO
[[
i
]]
<-
readMappings
(
normalizePath
(
l
[
i
]),
sep
=
"\t"
,
IDsep
=
","
)
}
}
else
{
for
(
i
in
seq_along
(
l
)){
go
<-
read.table
(
normalizePath
(
l
[
i
]),
header
=
TRUE
,
sep
=
"\t"
,
go
<-
read.table
(
normalizePath
(
l
[
i
]),
header
=
TRUE
,
sep
=
"\t"
,
comment.char
=
""
)
# obtains a list of GO by genes for all genes (mandatory for topGO package)
geneID2GO
[[
i
]]
<-
list
()
for
(
i
in
unique
(
go
[,
1
])){
gene
<-
go
[
go
[,
1
]
==
i
,]
for
(
x
in
unique
(
go
[,
1
])){
gene
<-
go
[
go
[,
1
]
==
x
,]
listGO
<-
vector
()
for
(
j
in
1
:
nrow
(
gene
)){
listGO
<-
c
(
listGO
,
as.character
(
gene
[
j
,
2
]))
}
geneID2GO
[[
i
]]
<-
c
(
geneID2GO
[[
i
]],
list
(
listGO
))
}
names
(
geneID2GO
[[
i
]])
<-
unique
(
go
[,
1
])
}
}
}
geneList
<-
list
()
for
(
i
in
seq_along
(
geneID2GO
)){
geneList
[[
i
]]
<-
factor
(
rep
(
1
,
length
(
geneID2GO
[[
i
]])))
levels
(
geneList
[[
i
]])
<-
c
(
levels
(
geneList
[[
i
]]),
0
)
names
(
geneList
[[
i
]])
<-
names
(
geneID2GO
[[
i
]])
names
(
geneList
[[
i
]])
<-
names
(
geneID2GO
[[
i
]])
}
######################### Functions
########## get name/definition of a GO (hidden function in topGO)
getTermsDefinition
<-
function
(
whichTerms
,
ontology
,
numChar
=
20
,
getTermsDefinition
<-
function
(
whichTerms
,
ontology
,
numChar
=
20
,
multipLines
=
FALSE
)
{
qTerms
<-
paste
(
paste
(
"'"
,
whichTerms
,
"'"
,
sep
=
""
),
collapse
=
","
)
retVal
<-
dbGetQuery
(
GO_dbconn
(),
retVal
<-
dbGetQuery
(
GO_dbconn
(),
paste
(
"SELECT term, go_id FROM go_term WHERE ontology IN"
,
"('"
,
ontology
,
"') AND go_id IN ("
,
"('"
,
ontology
,
"') AND go_id IN ("
,
qTerms
,
");"
,
sep
=
""
))
termsNames
<-
retVal
$
term
names
(
termsNames
)
<-
retVal
$
go_id
if
(
!
multipLines
)
if
(
!
multipLines
)
shortNames
<-
paste
(
substr
(
termsNames
,
1
,
numChar
),
ifelse
(
nchar
(
termsNames
)
>
numChar
,
'...'
,
''
),
sep
=
''
)
else
...
...
@@ -144,7 +145,7 @@ getTermsDefinition <- function(whichTerms, ontology, numChar = 20,
a
<-
strwrap
(
x
,
numChar
)
return
(
paste
(
a
,
sep
=
""
,
collapse
=
"\\\n"
))
})
names
(
shortNames
)
<-
names
(
termsNames
)
return
(
shortNames
[
whichTerms
])
}
...
...
@@ -152,27 +153,27 @@ getTermsDefinition <- function(whichTerms, ontology, numChar = 20,
########## find GO at levels 2 for ONE ontology
nbLevels2
<-
function
(
geneListI
,
ontology
,
geneID2GO
){
myGOdata
<-
NULL
try
(
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
try
(
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
allGenes
=
geneListI
,
annot
=
annFUN.gene2GO
,
gene2GO
=
geneID2GO
),
silent
=
TRUE
)
count
<-
0
if
(
!
is.null
(
myGOdata
)){
# calculate levels for each GO in our data and his ancestors
levels
<-
buildLevels
(
myGOdata
@
graph
)
# select only GO at level 2
goLevels2
<-
mget
(
ls
(
levels
$
nodes2level
),
envir
=
levels
$
nodes2level
)
==
2
goLevels2
<-
names
(
goLevels2
)[
goLevels2
]
# count genes in each GO
count
<-
NULL
for
(
i
in
goLevels2
){
count
<-
c
(
count
,
countGenesInTerm
(
myGOdata
,
i
))
}
}
return
(
count
)
}
...
...
@@ -181,29 +182,29 @@ nbLevels2 <- function(geneListI, ontology, geneID2GO){
percentOfGene
<-
function
(
geneList
,
ontology
,
DefOntology
){
prop
<-
list
()
count
<-
NULL
#create a table with GO names and proportion of genes
for
(
i
in
seq_along
(
geneList
)){
prop
[[
i
]]
<-
nbLevels2
(
geneList
[[
i
]],
ontology
,
geneID2GO
[[
i
]])
/
length
(
geneList
[[
i
]])
count
<-
rbind
(
count
,
data.frame
(
name
=
getTermsDefinition
(
names
(
prop
[[
i
]]),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
prop
[[
i
]]),
sample
=
opt
$
names
[
i
],
count
<-
rbind
(
count
,
data.frame
(
name
=
getTermsDefinition
(
names
(
prop
[[
i
]]),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
prop
[[
i
]]),
sample
=
opt
$
names
[
i
],
ontology
=
DefOntology
))
}
# table with all GO
all
<-
data.frame
(
name
=
rep
(
unique
(
count
$
name
),
length
(
names
)),
nbGene
=
0
,
sample
=
rep
(
opt
$
names
,
all
<-
data.frame
(
name
=
rep
(
unique
(
count
$
name
),
length
(
names
)),
nbGene
=
0
,
sample
=
rep
(
opt
$
names
,
each
=
length
(
unique
(
count
$
name
))),
ontology
=
DefOntology
)
#only GO-name not in count
not_in_count
<-
all
[
!
(
paste0
(
all
$
name
,
"."
,
all
$
sample
)
%in%
not_in_count
<-
all
[
!
(
paste0
(
all
$
name
,
"."
,
all
$
sample
)
%in%
paste0
(
count
$
name
,
"."
,
count
$
sample
)),]
count
<-
rbind
(
count
,
not_in_count
)
return
(
count
)
}
...
...
@@ -221,15 +222,12 @@ ggplot(count, aes(x=as.factor(name), y=nbGene, fill=sample)) +
geom_bar
(
stat
=
"identity"
,
position
=
position_dodge
())
+
facet_grid
(
~
ontology
,
scales
=
"free_x"
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
70
,
vjust
=
1
,
hjust
=
1
,
colour
=
"black"
))
+
scale_fill_discrete
(
name
=
""
)
+
ylab
(
paste0
(
"Percent of "
,
opt
$
target
))
+
scale_fill_discrete
(
name
=
""
)
+
ylab
(
paste0
(
"Percent of "
,
opt
$
target
))
+
xlab
(
"GO term"
)
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"barplot.svg"
),
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"barplot.svg"
),
mustWork
=
FALSE
),
width
=
11
,
height
=
7
)
write.table
(
count
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
"barplot.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
write.table
(
count
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
"barplot.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
bin/GOEnrichment.R
View file @
000aeca3
...
...
@@ -4,7 +4,7 @@
##DATE = "03/2016"
##AUTHORS = "Gaëlle Lefort"
##KEYWORDS = "RNAseq GO enrichment"
##DESCRIPTION = "GOEnrichment.R perform an enrichment analysis for Gene Ontology (GO)."
##DESCRIPTION = "GOEnrichment.R perform an enrichment analysis for Gene Ontology (GO)."
############################# Enrichment analysis ##############################
...
...
@@ -14,7 +14,8 @@
#common packages
packages
<-
c
(
"optparse"
,
# to read arguments from a command line
"ggplot2"
,
# nice plots
"RJSONIO"
)
# work with JSON file
"RJSONIO"
,
# work with JSON file
"DBI"
)
for
(
package
in
packages
){
# if package is installed locally, load
...
...
@@ -22,7 +23,7 @@ for(package in packages){
install.packages
(
package
)
}
do.call
(
'library'
,
list
(
package
))
}
}
#Bioconductor package
if
(
!
(
"topGO"
%in%
rownames
(
installed.packages
())))
{
...
...
@@ -38,43 +39,43 @@ library(topGO) # GO enrichment
###### Parameter initialization when the script is launched from command line
############
option_list
=
list
(
make_option
(
c
(
"-f"
,
"--file"
),
type
=
"character"
,
default
=
NULL
,
help
=
"geneID2GO file: tabulated matrix with the correspondence
make_option
(
c
(
"-f"
,
"--file"
),
type
=
"character"
,
default
=
NULL
,
help
=
"geneID2GO file: tabulated matrix with the correspondence
gene to GO"
,
metavar
=
"character"
),
make_option
(
c
(
"--fileFormat"
),
type
=
"character"
,
default
=
"topGO"
,
help
=
"two types of file can be used : topGO (gene_id1 GO_1,GO_2,...
make_option
(
c
(
"--fileFormat"
),
type
=
"character"
,
default
=
"topGO"
,
help
=
"two types of file can be used : topGO (gene_id1 GO_1,GO_2,...
\n gene_id2 GO_3,...) or twoColumns (only 2 columns
#gene_id GO_id)"
,
metavar
=
"character"
),
make_option
(
c
(
"-i"
,
"--interest"
),
type
=
"character"
,
default
=
NULL
,
help
=
"matrix with id of genes of interest in the first column
(all these genes must be in the geneID2GO file)"
,
make_option
(
c
(
"-i"
,
"--interest"
),
type
=
"character"
,
default
=
NULL
,
help
=
"matrix with id of genes of interest in the first column
(all these genes must be in the geneID2GO file)"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
NULL
,
help
=
"folder path where results are stored"
,
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
NULL
,
help
=
"folder path where results are stored"
,
metavar
=
"character"
),
make_option
(
c
(
"-t"
,
"--test"
),
type
=
"character"
,
default
=
NULL
,
help
=
"test used to find significant GO ('fisher' or 'KS'
make_option
(
c
(
"-t"
,
"--test"
),
type
=
"character"
,
default
=
NULL
,
help
=
"test used to find significant GO ('fisher' or 'KS'
Kolmogorov-Smirnov)"
,
metavar
=
"character"
),
make_option
(
c
(
"-a"
,
"--algorithm"
),
type
=
"character"
,
default
=
NULL
,
help
=
"algorithm used to correct p-value ('classic', 'elim'
make_option
(
c
(
"-a"
,
"--algorithm"
),
type
=
"character"
,
default
=
NULL
,
help
=
"algorithm used to correct p-value ('classic', 'elim'
or 'weight')"
,
metavar
=
"character"
),
make_option
(
c
(
"--paramAlgo"
),
type
=
"character"
,
default
=
NULL
,
make_option
(
c
(
"--paramAlgo"
),
type
=
"character"
,
default
=
NULL
,
help
=
"- for 'classic' algorithm: method used to correct
p-values ('bonferroni' or 'BH') [default=BH]
- for 'elim' algorithm: value used to remove genes from
significant GO [default=0.01]
- for 'weight' algorithm: weighted function used
('ratio' or 'log') [default=ratio] "
,
('ratio' or 'log') [default=ratio] "
,
metavar
=
"character"
),
make_option
(
c
(
"--alpha"
),
type
=
"character"
,
default
=
0.05
,
help
=
"significance level of the tests [default=%default]"
,
make_option
(
c
(
"--alpha"
),
type
=
"character"
,
default
=
0.05
,
help
=
"significance level of the tests [default=%default]"
,
metavar
=
"character"
),
make_option
(
c
(
"--target"
),
type
=
"character"
,
default
=
"gene"
,
help
=
"type of data [default=%default]"
,
make_option
(
c
(
"--target"
),
type
=
"character"
,
default
=
"gene"
,
help
=
"type of data [default=%default]"
,
metavar
=
"character"
)
);
);
opt_parser
=
OptionParser
(
usage
=
"Perform an enrichment analysis for
opt_parser
=
OptionParser
(
usage
=
"Perform an enrichment analysis for
Gene Ontology (GO)"
,
option_list
=
option_list
);
opt
=
parse_args
(
opt_parser
);
...
...
@@ -94,12 +95,12 @@ opt = parse_args(opt_parser);
#opt$target <- "contigs" # for example contigs, gene...
# table with genes and GO, genes of interest, test and algorithm are needed to
# table with genes and GO, genes of interest, test and algorithm are needed to
#continue
if
(
is.null
(
opt
$
file
)
||
is.null
(
opt
$
interest
)
||
is.null
(
opt
$
test
)
||
if
(
is.null
(
opt
$
file
)
||
is.null
(
opt
$
interest
)
||
is.null
(
opt
$
test
)
||
is.null
(
opt
$
algorithm
))
{
print_help
(
opt_parser
)
stop
(
"Table with genes and GO, genes of interest, test or algorithm missing.\n"
,
stop
(
"Table with genes and GO, genes of interest, test or algorithm missing.\n"
,
call.
=
FALSE
)
}
...
...
@@ -109,7 +110,7 @@ ifelse(!dir.exists(opt$out), dir.create(opt$out), FALSE)
#Stop the script if files doesn't exist
if
(
!
file.exists
(
normalizePath
(
opt
$
file
)))
{
print_help
(
opt_parser
)
stop
(
"table with genes and GO doesn't exist.\n"
,
stop
(
"table with genes and GO doesn't exist.\n"
,
call.
=
FALSE
)
}
...
...
@@ -124,12 +125,12 @@ if(is.null(opt$paramAlgo)){
}
if
(
opt
$
algorithm
==
"classic"
&
!
(
opt
$
paramAlgo
==
"bonferroni"
|
opt
$
paramAlgo
==
"BH"
)){
stop
(
"paramAlgo need to be equals to 'bonferroni' or 'BH'\n"
,
stop
(
"paramAlgo need to be equals to 'bonferroni' or 'BH'\n"
,
call.
=
FALSE
)
}
if
(
opt
$
algorithm
==
"classic"
&
!
(
opt
$
paramAlgo
==
"bonferroni"
|
opt
$
paramAlgo
==
"BH"
)){
stop
(
"paramAlgo need to be equals to 'bonferroni' or 'BH'\n"
,
stop
(
"paramAlgo need to be equals to 'bonferroni' or 'BH'\n"
,
call.
=
FALSE
)
}
...
...
@@ -137,17 +138,17 @@ if(opt$algorithm == "classic" & !(opt$paramAlgo == "bonferroni" | opt$paramAlgo
if
(
opt
$
algorithm
==
"elim"
){
opt
$
paramAlgo
<-
as.numeric
(
opt
$
paramAlgo
)
if
(
is.na
(
opt
$
paramAlgo
)){
stop
(
"paramAlgo need to be a number\n"
,
stop
(
"paramAlgo need to be a number\n"
,
call.
=
FALSE
)
}
if
(
opt
$
paramAlgo
>
1
){
stop
(
"paramAlgo need to be inferior to 1\n"
,
stop
(
"paramAlgo need to be inferior to 1\n"
,
call.
=
FALSE
)
}
}
if
(
opt
$
algorithm
==
"weight"
&
!
(
opt
$
paramAlgo
==
"ratio"
|
opt
$
paramAlgo
==
"log"
)){
stop
(
"paramAlgo need to be equals to 'ratio' or 'log'\n"
,
stop
(
"paramAlgo need to be equals to 'ratio' or 'log'\n"
,
call.
=
FALSE
)
}
...
...
@@ -159,7 +160,7 @@ if(opt$algorithm == "weight" & !(opt$paramAlgo == "ratio" | opt$paramAlgo == "lo
if
(
opt
$
fileFormat
==
"topGO"
){
geneID2GO
<-
readMappings
(
normalizePath
(
opt
$
file
),
sep
=
"\t"
,
IDsep
=
","
)
}
else
{
go
<-
read.table
(
normalizePath
(
opt
$
file
),
header
=
TRUE
,
sep
=
"\t"
,
go
<-
read.table
(
normalizePath
(
opt
$
file
),
header
=
TRUE
,
sep
=
"\t"
,
comment.char
=
""
)
# obtains a list of GO by genes for all genes (mandatory for topGO package)
geneID2GO
<-
list
()
...
...
@@ -171,19 +172,19 @@ if (opt$fileFormat == "topGO"){
}
geneID2GO
<-
c
(
geneID2GO
,
list
(
listGO
))
}
names
(
geneID2GO
)
<-
unique
(
go
[,
1
])
}
## read table with genes of interest
genesOfInterest
<-
read.table
(
normalizePath
(
opt
$
interest
),
header
=
TRUE
,
sep
=
"\t"
,
genesOfInterest
<-
read.table
(
normalizePath
(
opt
$
interest
),
header
=
TRUE
,
sep
=
"\t"
,
comment.char
=
""
)
genesOfInterest
<-
genesOfInterest
[,
1
]
#names of all genes
geneUniverse
<-
names
(
geneID2GO
)
geneUniverse
<-
names
(
geneID2GO
)
# =1 if gene is a gene of interest
geneList
<-
factor
(
as.integer
(
geneUniverse
%in%
genesOfInterest
))
...
...
@@ -196,19 +197,19 @@ notInAnalysis <- genesOfInterest[!(genesOfInterest %in% geneUniverse)]
######################### Functions
########## get name/definition of a GO (hidden function in topGO)
getTermsDefinition
<-
function
(
whichTerms
,
ontology
,
numChar
=
20
,
getTermsDefinition
<-
function
(
whichTerms
,
ontology
,
numChar
=
20
,
multipLines
=
FALSE
)
{
qTerms
<-
paste
(
paste
(
"'"
,
whichTerms
,
"'"
,
sep
=
""
),
collapse
=
","
)
retVal
<-
dbGetQuery
(
GO_dbconn
(),
retVal
<-
dbGetQuery
(
GO_dbconn
(),
paste
(
"SELECT term, go_id FROM go_term WHERE ontology IN"
,
"('"
,
ontology
,
"') AND go_id IN ("
,
"('"
,
ontology
,
"') AND go_id IN ("
,
qTerms
,
");"
,
sep
=
""
))
termsNames
<-
retVal
$
term
names
(
termsNames
)
<-
retVal
$
go_id
if
(
!
multipLines
)
if
(
!
multipLines
)
shortNames
<-
paste
(
substr
(
termsNames
,
1
,
numChar
),
ifelse
(
nchar
(
termsNames
)
>
numChar
,
'...'
,
''
),
sep
=
''
)
else
...
...
@@ -217,7 +218,7 @@ getTermsDefinition <- function(whichTerms, ontology, numChar = 20,
a
<-
strwrap
(
x
,
numChar
)
return
(
paste
(
a
,
sep
=
""
,
collapse
=
"\\\n"
))
})
names
(
shortNames
)
<-
names
(
termsNames
)
return
(
shortNames
[
whichTerms
])
}
...
...
@@ -225,27 +226,27 @@ getTermsDefinition <- function(whichTerms, ontology, numChar = 20,
########## find GO at levels 2 for ONE ontology
nbLevels2
<-
function
(
geneList
,
ontology
){
myGOdata
<-
NULL
try
(
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
try
(
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
allGenes
=
geneList
,
annot
=
annFUN.gene2GO
,
gene2GO
=
geneID2GO
),
silent
=
TRUE
)
count
<-
0
if
(
!
is.null
(
myGOdata
)){
# calculate levels for each GO in our data and his ancestors
levels
<-
buildLevels
(
myGOdata
@
graph
)
# select only GO at level 2
goLevels2
<-
mget
(
ls
(
levels
$
nodes2level
),
envir
=
levels
$
nodes2level
)
==
2
goLevels2
<-
names
(
goLevels2
)[
goLevels2
]
# count genes in each GO
count
<-
NULL
for
(
i
in
goLevels2
){
count
<-
c
(
count
,
countGenesInTerm
(
myGOdata
,
i
))
}
}
return
(
count
)
}
...
...
@@ -254,43 +255,43 @@ nbLevels2 <- function(geneList, ontology){
percentOfGene
<-
function
(
geneList
,
ontology
,
DefOntology
){
## Not genes of interest
All
<-
nbLevels2
(
geneList
,
ontology
)
/
length
(
geneList
)
## genes of interest
geneListGoI
<-
geneList
[
geneList
==
1
]
GoI
<-
nbLevels2
(
geneListGoI
,
ontology
)
/
length
(
geneListGoI
)
## percent of gene by GO
count
<-
data.frame
(
name
=
getTermsDefinition
(
names
(
All
),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
All
),
GoI
=
"All genes"
,
count
<-
data.frame
(
name
=
getTermsDefinition
(
names
(
All
),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
All
),
GoI
=
"All genes"
,
ontology
=
DefOntology
)
if
(
GoI
!=
0
){
count
<-
rbind
(
count
,
data.frame
(
name
=
getTermsDefinition
(
names
(
GoI
),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
GoI
),
GoI
=
"Genes of interest"
,
data.frame
(
name
=
getTermsDefinition
(
names
(
GoI
),
ontology
,
numChar
=
40
),
nbGene
=
as.vector
(
GoI
),
GoI
=
"Genes of interest"
,
ontology
=
DefOntology
))
}
## to obtain the same list of GO between GoI and not GoI, we add line with a percent
## of genes equal to 0
if
(
length
(
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"All genes"
]
%in%
if
(
length
(
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"All genes"
]
%in%
count
$
name
[
count
$
GoI
==
"Genes of interest"
])])
>
0
){
count
<-
rbind
(
count
,
data.frame
(
name
=
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"All genes"
]
%in%
count
$
name
[
count
$
GoI
==
"Genes of interest"
])],
nbGene
=
0
,
GoI
=
"Genes of interest"
,
ontology
=
DefOntology
))
}
if
(
length
(
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"Genes of interest"
]
%in%
if
(
length
(
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"Genes of interest"
]
%in%
count
$
name
[
count
$
GoI
==
"All genes"
])])
>
0
){
count
<-
rbind
(
count
,
data.frame
(
name
=
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"Genes of interest"
]
data.frame
(
name
=
count
$
name
[
!
(
count
$
name
[
count
$
GoI
==
"Genes of interest"
]
%in%
count
$
name
[
count
$
GoI
==
"All genes"
])],
nbGene
=
0
,
GoI
=
"All genes"
,
ontology
=
DefOntology
))
}
return
(
count
)
}
...
...
@@ -309,14 +310,14 @@ ggplot(count, aes(x=as.factor(name), y=nbGene, fill=as.factor(GoI))) +
geom_bar
(
stat
=
"identity"
,
position
=
position_dodge
())
+
facet_grid
(
~
ontology
,
scales
=
"free_x"
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
70
,
vjust
=
1
,
hjust
=
1
,
colour
=
"black"
))
+
scale_fill_discrete
(
name
=
""
)
+
ylab
(
paste0
(
"Percent of "
,
opt
$
target
))
+
scale_fill_discrete
(
name
=
""
)
+
ylab
(
paste0
(
"Percent of "
,
opt
$
target
))
+
xlab
(
"GO term"
)
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"barplot.svg"
),
ggsave
(
filename
=
normalizePath
(
file.path
(
opt
$
out
,
"barplot.svg"
),
mustWork
=
FALSE
),
width
=
11
,
height
=
7
)
write.table
(
count
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
"barplot.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
write.table
(
count
,
file
=
normalizePath
(
file.path
(
opt
$
out
,
paste0
(
"barplot.csv"
)),
mustWork
=
FALSE
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
...
...
@@ -326,56 +327,56 @@ write.table(count, file = normalizePath(file.path(opt$out,
GOgeneSig
<-
NULL
for
(
ontology
in
c
(
"BP"
,
"MF"
,
"CC"
)){
## GO object
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
myGOdata
<-
new
(
"topGOdata"
,
description
=
"My project"
,
ontology
=
ontology
,
allGenes
=
geneList
,
annot
=
annFUN.gene2GO
,
gene2GO
=
geneID2GO
)
## Test and p-value correction if algorithm = 'classic'
resultTest
<-
NULL
if
(
opt
$
algorithm
==
"classic"
){
resultTest
<-
runTest
(
myGOdata
,
algorithm
=
opt
$
algorithm
,
statistic
=
opt
$
test
)
resultTest
@
score
<-
p.adjust
(
resultTest
@
score
,
method
=
opt
$
paramAlgo
)
}
else
if
(
opt
$
algorithm
==
"elim"
){
resultTest
<-
runTest
(
myGOdata
,
algorithm
=
opt
$
algorithm
,
statistic
=
opt
$
test
,
resultTest
<-
runTest
(
myGOdata
,
algorithm
=
opt
$
algorithm
,
statistic
=
opt
$
test
,