From bddff5337923650da50956aed5b3e050083c54d4 Mon Sep 17 00:00:00 2001
From: Aurelien Brionne <aurelien.brionne@inrae.fr>
Date: Wed, 15 Jul 2020 14:42:48 +0200
Subject: [PATCH] merge_enrich_terms pvalue cutoff upgrade

---
 DESCRIPTION                       |  2 +-
 NEWS                              |  1 +
 R/enrich_GO_terms.R               | 38 +++++++++++++++--------
 R/merge_enrich_terms.R            | 50 ++++++++++++++++++-------------
 README.md                         |  2 +-
 vignettes/mouse_bionconductor.Rmd | 14 +++++----
 6 files changed, 68 insertions(+), 39 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e2b177a..0748244 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: ViSEAGO
-Version: 1.3.5
+Version: 1.3.6
 Title: ViSEAGO: a Bioconductor package for clustering biological functions using Gene Ontology and semantic similarity
 Author: Aurelien Brionne [aut, cre],
         Amelie Juanchich [aut],
diff --git a/NEWS b/NEWS
index 8dd0ac6..4f5f07d 100644
--- a/NEWS
+++ b/NEWS
@@ -71,4 +71,5 @@ CHANGES IN VERSION 1.3
  o Ensembl2GO() biomart update
  o show_heatmap() upgrade
  o upset print update
+ o merge_enrich_terms upgrade pvalue cutoff
 
diff --git a/R/enrich_GO_terms.R b/R/enrich_GO_terms.R
index cfb8b3d..8fa5d88 100644
--- a/R/enrich_GO_terms.R
+++ b/R/enrich_GO_terms.R
@@ -32,19 +32,33 @@ setMethod(
         # Extract pvalues
         Data<-Data[,grep("\\.pvalue",names(Data)),with=FALSE]
 
-        # count significant pvalues by condition
-        Data<-Data[,lapply(.SD,function(x){sum(x<0.01,na.rm = TRUE)}),.SDcols=seq_len(ncol(Data))]
+        # pvalues threshlod according condition
+        p<-vapply(slot(object,"topGO"),function(x){
 
-        # melt the table
-        Data<-melt.data.table(
-            Data,
-            measure.vars=names(Data),
-            variable.name = "conditions",
-            value.name = "significant GO terms number"
-        )
+            #  unlist
+            x=unlist(x)
+
+            # extract pvalue threshold
+            as.numeric(
+                sub(
+                    "^.+<",
+                    "",
+                    x[grep("test_name",names(x))]
+                )
+            )
+        },0)
 
-        # remove .pvalue in conditions column
-        Data[,"conditions":=gsub("\\.pvalue","",Data$conditions)]
+        # count significant pvalues by condition
+        Data<-lapply(seq_len(ncol(Data)),function(x){
+            
+            data.table(
+                conditions=sub("\\.pvalue","",names(Data)[x]),
+                `significant GO terms number`=sum(Data[,x,with=FALSE]<p[x],na.rm=TRUE)
+            )
+        })
+        
+        # bind results
+        Data<-rbindlist(Data)
 
         # get topGO information
         topGO<-slot(object,"topGO")
@@ -86,7 +100,7 @@ setMethod(
             "\n- input:\n        ", paste(paste(names(object@input),
             vapply(slot(object,"input"),function(x){paste(x,collapse=", ")},""),sep=": "),collapse="\n        "),
             "\n- topGO summary:\n ", topGO,
-            "\n- enrich GOs data.table (p<0.01 in at least one list): ",nrow(slot(object,"data"))," GO terms of ",nrow(Data)," conditions.",
+            "\n- enrich GOs (in at least one list): ",nrow(slot(object,"data"))," GO terms of ",nrow(Data)," conditions.",
             paste("\n       ",Data$conditions,":",Data$`significant GO terms number`,"terms"),sep=""
         )
     }
diff --git a/R/merge_enrich_terms.R b/R/merge_enrich_terms.R
index d1f4c18..27e29f6 100644
--- a/R/merge_enrich_terms.R
+++ b/R/merge_enrich_terms.R
@@ -154,7 +154,7 @@ setMethod(
                     # extract  quering objects names
                     x=Input[[x]]
 
-                    # check existance
+                    # check existence
                     values<-ls(envir=envir)
 
                     # check if available
@@ -171,7 +171,7 @@ setMethod(
                     # objects type
                     obj.type=vapply(x,class,"")
 
-                    # extract ontoloy type
+                    # extract ontology type
                     vapply(seq_along(x),function(y){
 
                         # extract ontology slot
@@ -265,8 +265,8 @@ setMethod(
                         # scored GOs
                         GO_scored=length(slot(x[[y]],"score")),
 
-                        # significant GOs
-                        GO_significant=table(slot(x[[y]],"score")<0.01)[2],
+                        # significant GOs according cutOff
+                        GO_significant=table(slot(x[[y]],"score")<as.numeric(sub("^.+[[:space:]]","",slot(x[[y]],"testName"))))[2],
 
                         # feasibles genes
                         feasible_genes=slot(x[[y]],"geneData")[1],
@@ -326,7 +326,7 @@ setMethod(
             # tested algorithm
             algorithms<-Data[pos]
 
-            # extract significvant pvalues results
+            # extract significant pvalues results
             unlist(
                 lapply(algorithms,function(y){
 
@@ -334,7 +334,11 @@ setMethod(
                     pvalues<-topGO::score(y)
 
                     # extract names of enrich terms
-                    as.vector(names(pvalues[pvalues<0.01]))
+                    as.vector(
+                        names(
+                            pvalues[pvalues<as.numeric(sub("^.+[[:space:]]","",slot(y,"testName")))]
+                        )
+                    )
                 })
             )
         })
@@ -375,7 +379,7 @@ setMethod(
             stop("No enrich GO terms available in at least one condition")
         }
 
-        # initialyse input
+        # initialize input
         input=list()
 
         # combine results
@@ -709,23 +713,23 @@ setMethod(
             pvalues=data.table(do.call("cbind",pvalues))
 
             # algoritms
-            algorithms=vapply(algorithms,function(x){slot(x,"algorithm")},"")
+            algo=vapply(algorithms,function(x){slot(x,"algorithm")},"")
 
             # if use of different algorithms
-            if(length(algorithms)>1){
+            if(length(algo)>1){
 
                 # add names
-                names(pvalues)[-1]<-paste(rep(algorithms,each=2),names(pvalues)[-1],sep=".")
-                }
+                names(pvalues)[-1]<-paste(rep(algo,each=2),names(pvalues)[-1],sep=".")
+            }
 
-                # return input params
-                assign(
-                    "input",
-                    c(input,list(algorithms)),
-                    inherits=TRUE
-                )
+            # return input params
+            assign(
+                "input",
+                c(input,list(algo)),
+                inherits=TRUE
+            )
 
-            ## combine results#
+            ## combine results
 
             # all results in list
             Results<-list(
@@ -751,8 +755,14 @@ setMethod(
             # remove NA in GO.Id column
             Results<-Results[!is.na(Results$GO.ID)]
 
+            # extract pvalue threshold
+            p<-unique(vapply(algorithms,function(x){ as.numeric(sub("^.+[[:space:]]","",slot(x,"testName")))},0))
+
+            # stop if more than one pvalue threshold among comparison
+            if(length(p)>1){stop("Only one pvalue theshold is allowed by list element.")}
+
             # Remove gene ID and symbol if GO term not significant
-            Results[Results$pvalue>=0.01,`:=`(Significant_genes=NA,Significant_genes_symbol=NA)]
+            Results[Results$pvalue>=p,`:=`(Significant_genes=NA,Significant_genes_symbol=NA)]
 
             if(!is.null(names(Input))){
 
@@ -817,7 +827,7 @@ setMethod(
             allResults[,"GO.ID":=NULL]
         )
 
-        # rename the fisrt 3 columns
+        # rename the first 3 columns
         names(allResults)[seq_len(3)]<-c("GO.ID","term","definition")
 
         # significant results in at least one condition
diff --git a/README.md b/README.md
index f078327..5c35505 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ remotes::install_gitlab(
     devtools::build("ViSEAGO")
 
     # install package (from R console)
-    install.packages("ViSEAGO_1.3.5.tar.gz", repos = NULL, type = "source")
+    install.packages("ViSEAGO_1.3.6.tar.gz", repos = NULL, type = "source")
 ```
 
 ## Citation
diff --git a/vignettes/mouse_bionconductor.Rmd b/vignettes/mouse_bionconductor.Rmd
index 8e035c3..c9ff44e 100644
--- a/vignettes/mouse_bionconductor.Rmd
+++ b/vignettes/mouse_bionconductor.Rmd
@@ -179,23 +179,26 @@ Now, we perform the GO enrichment tests for BP category with Fisher's exact test
 <u>NB</u>: p-values of enriched GO terms are not adjusted and considered significant if below 0.01.
 
 ```{r Enrichment_data_tests}
-# perform TopGO tests
+# perform topGO tests
 elim_BP_PregnantvsLactate<-topGO::runTest(
     BP_PregnantvsLactate,
     algorithm ="elim",
-    statistic = "fisher"
+    statistic = "fisher",
+    cutOff=0.01
 )
 
 elim_BP_VirginvsLactate<-topGO::runTest(
     BP_VirginvsLactate,
     algorithm ="elim",
-    statistic = "fisher"
+    statistic = "fisher",
+    cutOff=0.01
 )
 
 elim_BP_VirginvsPregnant<-topGO::runTest(
     BP_VirginvsPregnant,
     algorithm ="elim",
-    statistic = "fisher"
+    statistic = "fisher",
+    cutOff=0.01
 )
 ```
 
@@ -206,7 +209,8 @@ The printed table contains for each enriched GO terms, additional columns includ
 
 ```{r Enrichment_merge}
 # merge topGO results
-BP_sResults<-ViSEAGO::merge_enrich_terms(
+ViSEAGO::
+BP_sResults<-merge_enrich_terms(
     Input=list(
         PregnantvsLactate=c(
             "BP_PregnantvsLactate",
-- 
GitLab