Commit b7efced1 authored by Sebastien Theil's avatar Sebastien Theil
Browse files

Merge branch 'master' of forgemia.inra.fr:umrf/ranomaly

parents 1bbdab90 47d964ac
Pipeline #33236 passed with stage
in 17 seconds
......@@ -22,9 +22,13 @@ Imports:
vctrs,
biomformat,
Biostrings,
Biobase,
BiocGenerics,
gtools,
ShortRead,
dada2,
decontam,
dplyr,
microbiome,
DESeq2,
metagenomeSeq,
......@@ -33,11 +37,11 @@ Imports:
pairwiseAdonis,
phyloseq,
psadd,
DESeq2,
VennDiagram,
digest,
futile.logger,
ggplot2,
glue,
grid,
gridExtra,
vegan,
......@@ -48,9 +52,14 @@ Imports:
taxa,
rmarkdown,
ranacapa,
reshape2,
doParallel,
foreach,
DECIPHER,
mixOmics,
venn
venn,
htmlwidgets,
scales
Remotes:
github::omegahat/XML,
bioc::3.10/biomformat,
......@@ -61,11 +70,11 @@ Remotes:
bioc::3.10/metagenomeSeq,
bioc::3.10/phyloseq,
bioc::3.10/microbiome,
bioc::3.10/DESeq2,
bioc::3.10/DECIPHER,
bioc::3.10/mixOmics,
github::pmartinezarbizu/pairwiseAdonis/pairwiseAdonis,
github::cpauvert/psadd,
github::gauravsk/ranacapa,
github::grunwaldlab/metacoder,
github::mixOmicsTeam/mixOmics
github::mixOmicsTeam/mixOmics,
github::mikelove/DESeq2
......@@ -6,6 +6,7 @@ export(aggregate_fun)
export(assign_taxo_fun)
export(bars_fun)
export(check_tax_fun)
export(core_soft_fun)
export(csv2phyloseq_fun)
export(dada2_fun)
export(decontam_fun)
......@@ -21,6 +22,7 @@ export(heatmap_fun)
export(idTaxa_assign)
export(idtaxa_assign_fasta_fun)
export(idtaxa_traindb)
export(list_venn_fun)
export(metacoder_fun)
export(metagenomeseq_fun)
export(phy2cyto_fun)
......@@ -81,6 +83,8 @@ importFrom(metacoder,heat_tree)
importFrom(metacoder,parse_phyloseq)
importFrom(metacoder,zero_low_counts)
importFrom(microbiome,aggregate_top_taxa)
importFrom(microbiome,core_members)
importFrom(microbiome,transform)
importFrom(nlme,lme)
importFrom(phangorn,NJ)
importFrom(phangorn,dist.ml)
......
......@@ -234,10 +234,10 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
#'
#' @param TF list containing vectors to plot.
#' @param mode = 1: length(TF)<=5, mode = 2 5<length(TF)<7
#' @param TITRE
#' @param output
#' @param refseq1
#' @param alltax
#' @param TITRE Plot title.
#' @param output Output path.
#' @param refseq1 Reference sequences.
#' @param alltax Taxonomy table.
#'
#'
#' @return Exports a venn diagram with corresponding tabulated file.
......
......@@ -12,6 +12,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)
#' @param comp Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.
#' @param returnval Boolean for function to return values.
#'
#' @return Export final CSV files, barplot with top significant ASV and Venn Digramm.
#'
......
......@@ -8,6 +8,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param confidence Bootstrap threshold 0...100
#' @param returnval Boolean to return values in console or not.
#' @param ncpu Number of cpus to use.
#'
#'
#' @return Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
......@@ -22,7 +23,7 @@
assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db = "/PathToDB/UNITE_idtaxa.Rdata", confidence = 50, verbose = 1, returnval = TRUE){
assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db = "/PathToDB/UNITE_idtaxa.Rdata", confidence = 50, verbose = 1, returnval = TRUE, ncpu=NULL){
if(verbose == 3){
......@@ -47,7 +48,7 @@ assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db =
taxid_list=vector("list", length(db_list)+1)
for (i in 1:length(db_list)){
db_file <- db_list[i]
taxid_list[[i]] <- idTaxa_assign(db_file, dna, colnames(dada_res$seqtab.export), confidence)
taxid_list[[i]] <- idTaxa_assign(db_file, dna, colnames(dada_res$seqtab.export), confidence, ncpu)
}
if(verbose == 3){
......
......@@ -41,6 +41,7 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
#' @param Fact1 Variable used to change X axis tick labels and color (when split = FALSE)
#' @param split if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)
#' @param relative Plot relative (TRUE, default) or raw abundance plot (FALSE)
#' @param outfile Output html file.
#'
#' @return Returns barplots in an interactive plotly community plot
#'
......
#' core_soft_fun
#'
#' @description Define core microbiome, soft micriome and transitory microbiome.
#'
#' @param data A phyloseq object
#' @param fact Factor to test (must be in sample_variables(data))
#' @param group Choose which level of the factor, if NULL generate a list for each level.
#' @param freq frequence threshold of microbiome::core_members function
#' @param prev prevalence threshold of microbiome::core_members function
#' @param rank Taxonomy rank.
#'
#' @importFrom microbiome transform core_members
#'
#' @export
core_soft_fun <- function(data = NULL, fact = NULL, group = NULL, freq = 0.001, prev = 0.5, rank = "ASV"){
if(is.null(data)|is.null(fact)){stop("Require a phyloseq object, factor and group...")}
if(! any( sample_variables(data) == fact ) ){stop(glue::glue("{fact} is not in sample_variables(phyloseq_object)..."))}
if(rank == "ASV"){
glomps <- data
}else{
glomps <- tax_glom(data, taxrank = rank)
taxa_names(glomps) = glomps@tax_table@.Data[,rank]
}
fun2 <- function(glomps = glomps, fact = fact, group = group){
LL = list()
fun <- glue::glue("sub_phy <- subset_samples(glomps, {fact} == '{group}')")
eval(parse(text=fun))
sub_phy <- prune_taxa(taxa_sums(sub_phy) > 0, sub_phy)
sub_phy.rel <- microbiome::transform(sub_phy, "compositional")
#core
LL$core <- core.taxa.standard <- core_members(sub_phy.rel, detection = freq, prevalence = prev)
#soft in at least 1 sample
core.plus <- core_members(sub_phy.rel, detection = freq, prevalence = 1/nsamples(data))
LL$soft = setdiff(core.plus, core.taxa.standard)
# transit
LL$transit = setdiff(taxa_names(sub_phy.rel), core.plus)
return(LL)
}
if(!is.null(group)){
return(fun2(glomps = glomps, fact = fact , group = group))
}else{
LL2 = list()
for(group in levels(as.data.frame(as.matrix(sample_data(data)))[,fact])){
nlist = gsub(" ", "_" ,group)
fun <- glue::glue("LL2${nlist} <- fun2(glomps = glomps, fact = fact, group = '{group}')")
eval(parse(text=fun))
}
return(LL2)
}
}
#' List generator for venndiagram
#'
#' @description Output list to generate venn diagram with VENNFUN function.
#'
#' @param inlist Output of core_soft_fun() function
#' @param part which part of microbiome to output (core, soft or transit)
#'
#' @export
list_venn_fun <- function(inlist = NULL, part="core"){
if(! any(part == c("core", "soft", "transit"))){stop("Choose 'core', 'soft' or 'transit' for 'part' argument")}
outlist = list()
for( i in names(inlist)){
fun = glue::glue("outlist${i} = inlist${i}${part}")
eval(parse(text=fun))
}
return(outlist)
}
......@@ -5,6 +5,7 @@
#' @param taxtable Tabulated taxonomy table file path.
#' @param seq Tabulated sequence file path.
#' @param metadata Tabulated metadata file path.
#' @param generateTree Boolean to generate the phylogenetic tree.
#' @param output Output directory
#' @param returnval Boolean to return values in console or not.
#'
......
......@@ -5,7 +5,7 @@
#' @param amplicon Choose amplipcon "16S" or "ITS"
#' @param path Read files folder path
#' @param outpath output .Rdata file name
#' @param pool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param dadapool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param f_trunclen Forward read tuncate length (only for paired end 16S)
#' @param r_trunclen Reverse read tuncate length (only for paired end 16S)
#' @param f_primer Forward primer sequence (only for ITS)
......
......@@ -4,7 +4,7 @@
#'
#' @param data a phyloseq object (output from decontam or generate_phyloseq)
#' @param output Output directory
#' @param correc If TRUE, correct metadata to replace most common special characters (eg. é -> e), save the new file in meta_stampOK.tsv.
#' @param correc If TRUE, correct metadata to replace most common special characters, save the new file in meta_stampOK.tsv.
#'
#' @return Export 2 text files ready to use with STAMP.
#'
......
......@@ -3,7 +3,7 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param taxtable Results of assign_taxo_fun()
#' @param tax.table Results of assign_taxo_fun()
#' @param tree Results of generate_tree_fun()
#' @param metadata Path of metadata file (tab separated with header)
#' @param output Output directory
......
......@@ -3,7 +3,9 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param output Output directory
#' @param output Output directory.
#' @param psobj Phyloseq object with sequences.
#' @param verbose Verbosity level.
#' @param returnval Boolean to return values in console or not.
#'
#' @return Return a formatted tree object ready to use in phyloseq.
......
......@@ -55,6 +55,7 @@ fill_tax_fun <- function(taxtable = taxtable, prefix = TRUE){
#'
#' @param taxtable Output from fill_tax_table function.
#' @param output output path
#' @param rank Deepest taxonomy rank at which correction begin (7 for Species, 6 for Genus etc...).
#' @param verbose verbose mode
#' @param returnval Boolean to return values in console or not.
......@@ -64,23 +65,23 @@ fill_tax_fun <- function(taxtable = taxtable, prefix = TRUE){
check_tax_fun <- function(taxtable = taxtable, output = NULL, verbose=3, returnval = TRUE){
check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=3, returnval = TRUE){
RANKS = c("_domain","_phylum","_class","_order","_family","_genus","_species")
print("Check taxonomy consistency...")
# Check for multiple ancestors at each rank, choose first occurence for each problematic taxon
sink(paste('./check_tax_fun.log', sep=""), split = TRUE)
for(rank in 7:2){
if(verbose==3){flog.info(paste(colnames(taxtable)[rank],".",sep=""))}
for(rk in rank:2){
if(verbose==3){flog.info(paste(colnames(taxtable)[rk],".",sep=""))}
stockLres = 100; nloop=1
while(max(stockLres)>1){
if(verbose==3){flog.info(paste("Loop",nloop,".",sep=""))}
stockLres = NULL
allTax = apply(taxtable[,1:rank] ,1, function(x){paste(x, collapse = ";")})
allTax = apply(taxtable[,1:rk] ,1, function(x){paste(x, collapse = ";")})
uniqTax <- unique(allTax)
#Test if unique Genus has unique taxonomy.
for (i in unique(taxtable[,rank])){
for (i in unique(taxtable[,rk])){
# if(verbose==3){flog.info(paste(i,".",sep=""))}
res=grep( paste(';',i,'$', sep="" ) , uniqTax)
stockLres = c(stockLres,length(res))
......@@ -89,7 +90,7 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, verbose=3, returnv
cat("\n");print(i);
print(uniqTax[res]);
tax2 <- apply(taxtable[taxtable[,rank]==i,1:rank], 1, function(x){paste(x, collapse = ";")})
tax2 <- apply(taxtable[taxtable[,rk]==i,1:rk], 1, function(x){paste(x, collapse = ";")})
#Taxonomy with no filltax function annotation are chosen first.
nofill = !grepl(paste(RANKS, collapse = "|"), tax2, ignore.case = FALSE, perl = FALSE, fixed = FALSE)
if(all(nofill == FALSE)){
......@@ -104,11 +105,11 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, verbose=3, returnv
{paste(ftax, collapse = ';')}" )
); cat("\n")
#Change taxonomy with final ftax. the most common in taxtable
for(j in row.names(taxtable[taxtable[,rank]==i,])){
if(rank == 7){
for(j in row.names(taxtable[taxtable[,rk]==i,])){
if(rk == 7){
taxtable[j,] = ftax
}else{
taxtable[j,] = c(ftax, taxtable[j,(rank+1):ncol(taxtable)])
taxtable[j,] = c(ftax, taxtable[j,(rk+1):ncol(taxtable)])
}
}
}
......@@ -135,8 +136,8 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, verbose=3, returnv
#' @param taxtable data.frame
#' @param seqs path to fasta file or readDNAStringSet
#' @param prunedb maximum number of sequences per unique taxa.
#' @param outputDIR
#' @output
#' @param outputDIR Output directory.
#' @return List with taxonomy table and corresponding sequences.
#'
......
......@@ -6,6 +6,7 @@
#' @param dna A ‘DNAStringSet’ of unaligned sequences.
#' @param asv_names sequences IDs in same order.
#' @param confidence Bootstrap threshold 0...100
#' @param ncpu Number of cpu to use
#'
#' @return return taxonomic assignment of given sequences.
#' @import futile.logger
......@@ -14,10 +15,10 @@
#' @export
idTaxa_assign = function(db_file, dna, asv_names, confidence){
idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULLS){
flog.info(paste('Using database ',db_file,sep=''))
toto <- load(db_file)
ids <- IdTaxa(dna, trainingSet, strand="both", processors=NULL, verbose=TRUE)
ids <- IdTaxa(dna, trainingSet, strand="both", processors=ncpu, verbose=TRUE)
names(ids) <- asv_names
flog.info("Confidence filtering...")
IDCONF = as.numeric(confidence)
......
......@@ -97,12 +97,12 @@ phy2cyto_fun <- function(data = data, output = "./cytoscape/", column1 = NULL, r
LINKS = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
}
} else{
#sinon partage et création d'un lien pour chaque environnement source.
#sinon partage et creation d'un lien pour chaque environnement source.
LINKS = NULL
for(rep in reps){
sdat3 = sdat2[sdat2[,repl]==rep,]
srcs = as.character(unique(sdat3[,column1]))
# Lien seulement si l'asv présent dans même sample à 2 environnements ou plus
# Lien seulement si l'asv present dans meme sample a 2 environnements ou plus
if(length(srcs)>1){
link = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
# print(glue("{rep} {srcs} envs"))
......
#' subset_fastx
#'
#' Allows subset fastq or fasta files at a given threshold. This fonction can convert fastq to fasta.
#' Allows subset fastq or fasta files at a given threshold. This function can convert fastq to fasta.
#'
#' @param path Path to the fastq files directory
#' @param format fasta or fastq format are allowed.
......@@ -36,7 +36,7 @@ subset_fastx <- function(path = NULL, format = "fastq", outformat = "fastq", out
writeXStringSet(X, glue::glue("{output}/{L2[i]}.{outformat}"), format = outformat, compress=compress)
return("Done")
}
if(length(X)>nbseq){
if(random){
if(!is.null(seed)){set.seed(seed)}
......
......@@ -59,4 +59,4 @@ Murali, Adithya, Aniruddha Bhargava, et Erik S. Wright. « IDTAXA: a novel appr
GPL 3.0
## Copyright
2021 INRA
2021 INRAE
......@@ -18,7 +18,13 @@ VENNFUN(
\item{mode}{= 1: length(TF)<=5, mode = 2 5<length(TF)<7}
\item{alltax}{}
\item{TITRE}{Plot title.}
\item{output}{Output path.}
\item{refseq1}{Reference sequences.}
\item{alltax}{Taxonomy table.}
}
\value{
Exports a venn diagram with corresponding tabulated file.
......
......@@ -38,6 +38,8 @@ aggregate_fun(
\item{rank}{Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)}
\item{comp}{Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.}
\item{returnval}{Boolean for function to return values.}
}
\value{
Export final CSV files, barplot with top significant ASV and Venn Digramm.
......
......@@ -10,7 +10,8 @@ assign_taxo_fun(
id_db = "/PathToDB/UNITE_idtaxa.Rdata",
confidence = 50,
verbose = 1,
returnval = TRUE
returnval = TRUE,
ncpu = NULL
)
}
\arguments{
......@@ -25,6 +26,8 @@ assign_taxo_fun(
\item{verbose}{Verbose level. (1: quiet, 3: verbal)}
\item{returnval}{Boolean to return values in console or not.}
\item{ncpu}{Number of cpus to use.}
}
\value{
Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment