From bddff5337923650da50956aed5b3e050083c54d4 Mon Sep 17 00:00:00 2001 From: Aurelien Brionne <aurelien.brionne@inrae.fr> Date: Wed, 15 Jul 2020 14:42:48 +0200 Subject: [PATCH] merge_enrich_terms pvalue cutoff upgrade --- DESCRIPTION | 2 +- NEWS | 1 + R/enrich_GO_terms.R | 38 +++++++++++++++-------- R/merge_enrich_terms.R | 50 ++++++++++++++++++------------- README.md | 2 +- vignettes/mouse_bionconductor.Rmd | 14 +++++---- 6 files changed, 68 insertions(+), 39 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e2b177a..0748244 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: ViSEAGO -Version: 1.3.5 +Version: 1.3.6 Title: ViSEAGO: a Bioconductor package for clustering biological functions using Gene Ontology and semantic similarity Author: Aurelien Brionne [aut, cre], Amelie Juanchich [aut], diff --git a/NEWS b/NEWS index 8dd0ac6..4f5f07d 100644 --- a/NEWS +++ b/NEWS @@ -71,4 +71,5 @@ CHANGES IN VERSION 1.3 o Ensembl2GO() biomart update o show_heatmap() upgrade o upset print update + o merge_enrich_terms upgrade pvalue cutoff diff --git a/R/enrich_GO_terms.R b/R/enrich_GO_terms.R index cfb8b3d..8fa5d88 100644 --- a/R/enrich_GO_terms.R +++ b/R/enrich_GO_terms.R @@ -32,19 +32,33 @@ setMethod( # Extract pvalues Data<-Data[,grep("\\.pvalue",names(Data)),with=FALSE] - # count significant pvalues by condition - Data<-Data[,lapply(.SD,function(x){sum(x<0.01,na.rm = TRUE)}),.SDcols=seq_len(ncol(Data))] + # pvalues threshlod according condition + p<-vapply(slot(object,"topGO"),function(x){ - # melt the table - Data<-melt.data.table( - Data, - measure.vars=names(Data), - variable.name = "conditions", - value.name = "significant GO terms number" - ) + # unlist + x=unlist(x) + + # extract pvalue threshold + as.numeric( + sub( + "^.+<", + "", + x[grep("test_name",names(x))] + ) + ) + },0) - # remove .pvalue in conditions column - Data[,"conditions":=gsub("\\.pvalue","",Data$conditions)] + # count significant pvalues by condition + Data<-lapply(seq_len(ncol(Data)),function(x){ + + data.table( + conditions=sub("\\.pvalue","",names(Data)[x]), + `significant GO terms number`=sum(Data[,x,with=FALSE]<p[x],na.rm=TRUE) + ) + }) + + # bind results + Data<-rbindlist(Data) # get topGO information topGO<-slot(object,"topGO") @@ -86,7 +100,7 @@ setMethod( "\n- input:\n ", paste(paste(names(object@input), vapply(slot(object,"input"),function(x){paste(x,collapse=", ")},""),sep=": "),collapse="\n "), "\n- topGO summary:\n ", topGO, - "\n- enrich GOs data.table (p<0.01 in at least one list): ",nrow(slot(object,"data"))," GO terms of ",nrow(Data)," conditions.", + "\n- enrich GOs (in at least one list): ",nrow(slot(object,"data"))," GO terms of ",nrow(Data)," conditions.", paste("\n ",Data$conditions,":",Data$`significant GO terms number`,"terms"),sep="" ) } diff --git a/R/merge_enrich_terms.R b/R/merge_enrich_terms.R index d1f4c18..27e29f6 100644 --- a/R/merge_enrich_terms.R +++ b/R/merge_enrich_terms.R @@ -154,7 +154,7 @@ setMethod( # extract quering objects names x=Input[[x]] - # check existance + # check existence values<-ls(envir=envir) # check if available @@ -171,7 +171,7 @@ setMethod( # objects type obj.type=vapply(x,class,"") - # extract ontoloy type + # extract ontology type vapply(seq_along(x),function(y){ # extract ontology slot @@ -265,8 +265,8 @@ setMethod( # scored GOs GO_scored=length(slot(x[[y]],"score")), - # significant GOs - GO_significant=table(slot(x[[y]],"score")<0.01)[2], + # significant GOs according cutOff + GO_significant=table(slot(x[[y]],"score")<as.numeric(sub("^.+[[:space:]]","",slot(x[[y]],"testName"))))[2], # feasibles genes feasible_genes=slot(x[[y]],"geneData")[1], @@ -326,7 +326,7 @@ setMethod( # tested algorithm algorithms<-Data[pos] - # extract significvant pvalues results + # extract significant pvalues results unlist( lapply(algorithms,function(y){ @@ -334,7 +334,11 @@ setMethod( pvalues<-topGO::score(y) # extract names of enrich terms - as.vector(names(pvalues[pvalues<0.01])) + as.vector( + names( + pvalues[pvalues<as.numeric(sub("^.+[[:space:]]","",slot(y,"testName")))] + ) + ) }) ) }) @@ -375,7 +379,7 @@ setMethod( stop("No enrich GO terms available in at least one condition") } - # initialyse input + # initialize input input=list() # combine results @@ -709,23 +713,23 @@ setMethod( pvalues=data.table(do.call("cbind",pvalues)) # algoritms - algorithms=vapply(algorithms,function(x){slot(x,"algorithm")},"") + algo=vapply(algorithms,function(x){slot(x,"algorithm")},"") # if use of different algorithms - if(length(algorithms)>1){ + if(length(algo)>1){ # add names - names(pvalues)[-1]<-paste(rep(algorithms,each=2),names(pvalues)[-1],sep=".") - } + names(pvalues)[-1]<-paste(rep(algo,each=2),names(pvalues)[-1],sep=".") + } - # return input params - assign( - "input", - c(input,list(algorithms)), - inherits=TRUE - ) + # return input params + assign( + "input", + c(input,list(algo)), + inherits=TRUE + ) - ## combine results# + ## combine results # all results in list Results<-list( @@ -751,8 +755,14 @@ setMethod( # remove NA in GO.Id column Results<-Results[!is.na(Results$GO.ID)] + # extract pvalue threshold + p<-unique(vapply(algorithms,function(x){ as.numeric(sub("^.+[[:space:]]","",slot(x,"testName")))},0)) + + # stop if more than one pvalue threshold among comparison + if(length(p)>1){stop("Only one pvalue theshold is allowed by list element.")} + # Remove gene ID and symbol if GO term not significant - Results[Results$pvalue>=0.01,`:=`(Significant_genes=NA,Significant_genes_symbol=NA)] + Results[Results$pvalue>=p,`:=`(Significant_genes=NA,Significant_genes_symbol=NA)] if(!is.null(names(Input))){ @@ -817,7 +827,7 @@ setMethod( allResults[,"GO.ID":=NULL] ) - # rename the fisrt 3 columns + # rename the first 3 columns names(allResults)[seq_len(3)]<-c("GO.ID","term","definition") # significant results in at least one condition diff --git a/README.md b/README.md index f078327..5c35505 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ remotes::install_gitlab( devtools::build("ViSEAGO") # install package (from R console) - install.packages("ViSEAGO_1.3.5.tar.gz", repos = NULL, type = "source") + install.packages("ViSEAGO_1.3.6.tar.gz", repos = NULL, type = "source") ``` ## Citation diff --git a/vignettes/mouse_bionconductor.Rmd b/vignettes/mouse_bionconductor.Rmd index 8e035c3..c9ff44e 100644 --- a/vignettes/mouse_bionconductor.Rmd +++ b/vignettes/mouse_bionconductor.Rmd @@ -179,23 +179,26 @@ Now, we perform the GO enrichment tests for BP category with Fisher's exact test <u>NB</u>: p-values of enriched GO terms are not adjusted and considered significant if below 0.01. ```{r Enrichment_data_tests} -# perform TopGO tests +# perform topGO tests elim_BP_PregnantvsLactate<-topGO::runTest( BP_PregnantvsLactate, algorithm ="elim", - statistic = "fisher" + statistic = "fisher", + cutOff=0.01 ) elim_BP_VirginvsLactate<-topGO::runTest( BP_VirginvsLactate, algorithm ="elim", - statistic = "fisher" + statistic = "fisher", + cutOff=0.01 ) elim_BP_VirginvsPregnant<-topGO::runTest( BP_VirginvsPregnant, algorithm ="elim", - statistic = "fisher" + statistic = "fisher", + cutOff=0.01 ) ``` @@ -206,7 +209,8 @@ The printed table contains for each enriched GO terms, additional columns includ ```{r Enrichment_merge} # merge topGO results -BP_sResults<-ViSEAGO::merge_enrich_terms( +ViSEAGO:: +BP_sResults<-merge_enrich_terms( Input=list( PregnantvsLactate=c( "BP_PregnantvsLactate", -- GitLab