Commit c049a1be authored by Sebastien Theil's avatar Sebastien Theil
Browse files

Merge branch 'master' into 'assign2'

# Conflicts:
#   R/assign_taxo_fun.R
parents bb3e4f2a b7efced1
......@@ -22,9 +22,13 @@ Imports:
vctrs,
biomformat,
Biostrings,
Biobase,
BiocGenerics,
gtools,
ShortRead,
dada2,
decontam,
dplyr,
microbiome,
DESeq2,
metagenomeSeq,
......@@ -33,11 +37,11 @@ Imports:
pairwiseAdonis,
phyloseq,
psadd,
DESeq2,
VennDiagram,
digest,
futile.logger,
ggplot2,
glue,
grid,
gridExtra,
vegan,
......@@ -48,9 +52,14 @@ Imports:
taxa,
rmarkdown,
ranacapa,
reshape2,
doParallel,
foreach,
DECIPHER,
mixOmics,
venn
venn,
htmlwidgets,
scales
Remotes:
github::omegahat/XML,
bioc::3.10/biomformat,
......@@ -61,11 +70,11 @@ Remotes:
bioc::3.10/metagenomeSeq,
bioc::3.10/phyloseq,
bioc::3.10/microbiome,
bioc::3.10/DESeq2,
bioc::3.10/DECIPHER,
bioc::3.10/mixOmics,
github::pmartinezarbizu/pairwiseAdonis/pairwiseAdonis,
github::cpauvert/psadd,
github::gauravsk/ranacapa,
github::grunwaldlab/metacoder,
github::mixOmicsTeam/mixOmics
github::mixOmicsTeam/mixOmics,
github::mikelove/DESeq2
......@@ -134,18 +134,13 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
# write.table(TT, paste(output,"/otu_table_sp_",LOC,".csv",sep=""), sep="\t", quote=FALSE, col.names=NA)
}
# print(names(TFtax))
# print(TFdata)
print(str(TFtax))
# stop()
## Venn diag
flog.info('Defining unique taxa ...')
alltax <- do.call(rbind, TFtax)
alltax <- alltax[!duplicated(alltax[,1]),]
row.names(alltax)=alltax[,1]
# print(alltax)
# stop()
# print(alltax)
flog.info('Plotting ...')
# Specific use to screen taxonomic composition of shared taxa...
......@@ -158,6 +153,7 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
TF2 <- list(env1, others1)
names(TF2) <- c(krona, "others")
#Venn 2
# pdf(NULL)
venn.plot <- venn.diagram(TF2, filename = NULL, col = "black",
fill = rainbow(length(TF2)), alpha = 0.50,
cex = 1.5, cat.col = 1, lty = "blank",
......@@ -193,7 +189,7 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
TFbak <- TF <- sapply(TFtax, row.names, simplify = FALSE)
names(TFbak) = names(TF) = level1
names(TFbak) = names(TF) = lvls
# print( length(unique(unlist(TF))) )
if(length(lvls)>5){
......@@ -238,10 +234,10 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
#'
#' @param TF list containing vectors to plot.
#' @param mode = 1: length(TF)<=5, mode = 2 5<length(TF)<7
#' @param TITRE
#' @param output
#' @param refseq1
#' @param alltax
#' @param TITRE Plot title.
#' @param output Output path.
#' @param refseq1 Reference sequences.
#' @param alltax Taxonomy table.
#'
#'
#' @return Exports a venn diagram with corresponding tabulated file.
......@@ -253,7 +249,8 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
VENNFUN <- function(TF = TF, mode = 1, TITRE = TITRE, output = "./", refseq1 = NULL, alltax=NULL){
if(mode==1){
venn::venn(TF, zcol = rainbow(7), ilcs = 2, sncs = 2) #, col=rainbow(7)
# pdf(NULL)
venn::venn(TF, zcol = rainbow(7), ilcs = 2, sncs = 2, ggplot = FALSE) #, col=rainbow(7)
venn.plot <- recordPlot()
invisible(dev.off())
......@@ -298,8 +295,8 @@ VENNFUN <- function(TF = TF, mode = 1, TITRE = TITRE, output = "./", refseq1 = N
write.table(TABf, paste(output,"/",TITRE,"_venn_table.csv",sep=""), sep="\t", quote=FALSE, row.names=FALSE)
}
} else if(mode == 2){ # more than 5 environments
venn.plot <- venn::venn(TF, zcol = rainbow(7), ilcs = 2, sncs = 2) #, col=rainbow(7)
# pdf(NULL)
venn.plot <- venn::venn(TF, zcol = rainbow(7), ilcs = 2, sncs = 2, ggplot = FALSE) #, col=rainbow(7)
venn.plot <- recordPlot()
invisible(dev.off())
......
......@@ -12,6 +12,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)
#' @param comp Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.
#' @param returnval Boolean for function to return values.
#'
#' @return Export final CSV files, barplot with top significant ASV and Venn Digramm.
#'
......
......@@ -8,6 +8,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param confidence Bootstrap threshold 0...100
#' @param returnval Boolean to return values in console or not.
#' @param ncpu Number of cpus to use.
#'
#'
#' @return Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
......@@ -22,7 +23,7 @@
assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db = "/PathToDB/UNITE_idtaxa.Rdata", confidence = 50, verbose = 1, returnval = TRUE){
assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db = "/PathToDB/UNITE_idtaxa.Rdata", confidence = 50, verbose = 1, returnval = TRUE, ncpu=NULL){
if(verbose == 3){
......
......@@ -41,6 +41,7 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
#' @param Fact1 Variable used to change X axis tick labels and color (when split = FALSE)
#' @param split if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)
#' @param relative Plot relative (TRUE, default) or raw abundance plot (FALSE)
#' @param outfile Output html file.
#'
#' @return Returns barplots in an interactive plotly community plot
#'
......
......@@ -7,6 +7,7 @@
#' @param group Choose which level of the factor, if NULL generate a list for each level.
#' @param freq frequence threshold of microbiome::core_members function
#' @param prev prevalence threshold of microbiome::core_members function
#' @param rank Taxonomy rank.
#'
#' @importFrom microbiome transform core_members
#'
......
......@@ -5,6 +5,7 @@
#' @param taxtable Tabulated taxonomy table file path.
#' @param seq Tabulated sequence file path.
#' @param metadata Tabulated metadata file path.
#' @param generateTree Boolean to generate the phylogenetic tree.
#' @param output Output directory
#' @param returnval Boolean to return values in console or not.
#'
......
......@@ -5,7 +5,7 @@
#' @param amplicon Choose amplipcon "16S" or "ITS"
#' @param path Read files folder path
#' @param outpath output .Rdata file name
#' @param pool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param dadapool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param f_trunclen Forward read tuncate length (only for paired end 16S)
#' @param r_trunclen Reverse read tuncate length (only for paired end 16S)
#' @param f_primer Forward primer sequence (only for ITS)
......@@ -31,9 +31,19 @@
#' @import futile.logger
#' @import digest
#' @import phyloseq
#' @import stringr
#' @export
get.sample.name <- function(fname){
# res <- stringr::str_match(basename(fname), "^(.*)_R[12]\\.fastq\\.gz$")
# tt <- res[1,2]
tt <- strsplit(basename(fname), "_")[[1]][1]
return(tt)
}
# DADA2 function
dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_trunclen = 240, r_trunclen = 240, dadapool = "pseudo",
......@@ -70,8 +80,8 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
}
flog.debug("File list...")
flog.debug(fnFs)
flog.debug(fnRs)
flog.debug(length(fnFs))
flog.debug(length(fnRs))
flog.info('Done.')
if(amplicon=="ITS"){
......@@ -104,11 +114,11 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
# print(fnRs.filtN)
flog.info('filterAndTrim...')
# if(! dir.exists(paste(path,'/filtN',sep=''))){
filterAndTrim(fwd = fnFs, filt = fnFs.filtN, rev = fnRs, filt.rev = fnRs.filtN, maxN = 0, multithread = TRUE, verbose=TRUE, rm.phix = TRUE, compress=compress)
# }else{
# flog.info('Filtered files exist, skipping...')
# }
if(! dir.exists(paste(path,'/filtN',sep=''))){
filterAndTrim(fwd = fnFs, filt = fnFs.filtN, rev = fnRs, filt.rev = fnRs.filtN, maxN = 0, multithread = TRUE, verbose=TRUE, rm.phix = TRUE, compress=compress)
}else{
flog.info('Filtered files exist, skipping...')
}
flog.info('Done.')
......@@ -174,7 +184,8 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
cutRs <- sort(list.files(path.cut, pattern = "_R2.fastq", full.names = TRUE))
# Extract sample names, assuming filenames have format:
get.sample.name <- function(fname) strsplit(basename(fname), "_")[[1]][1]
# get.sample.name <- function(fname) strsplit(basename(fname), "_")[[1]][1]
sample.names <- unname(sapply(cutFs, get.sample.name))
#head(sample.names)
......@@ -189,6 +200,15 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
names(filtFs) <- sample.names
names(filtRs) <- sample.names
flog.debug(length(cutFs))
print(head(cutFs))
flog.debug(length(filtFs))
print(head(filtFs))
flog.debug(length(cutRs))
print(head(cutRs))
flog.debug(length(filtRs))
print(head(filtRs))
out <- filterAndTrim(cutFs, filtFs, cutRs, filtRs, maxN = 0, maxEE = c(2, 2),
truncQ = 2, minLen = 50, compress = FALSE, multithread = TRUE) # on windows, set multithread = FALSE
#head(out)
......@@ -209,7 +229,9 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
flog.debug(length(fnFs))
flog.debug(length(fnRs))
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
sample.names <- unname(sapply(fnFs, get.sample.name))
flog.debug(head(sample.names))
# sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
if(compress){
filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
......
......@@ -4,7 +4,7 @@
#'
#' @param data a phyloseq object (output from decontam or generate_phyloseq)
#' @param output Output directory
#' @param correc If TRUE, correct metadata to replace most common special characters (eg. é -> e), save the new file in meta_stampOK.tsv.
#' @param correc If TRUE, correct metadata to replace most common special characters, save the new file in meta_stampOK.tsv.
#'
#' @return Export 2 text files ready to use with STAMP.
#'
......
......@@ -3,7 +3,7 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param taxtable Results of assign_taxo_fun()
#' @param tax.table Results of assign_taxo_fun()
#' @param tree Results of generate_tree_fun()
#' @param metadata Path of metadata file (tab separated with header)
#' @param output Output directory
......
......@@ -3,7 +3,9 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param output Output directory
#' @param output Output directory.
#' @param psobj Phyloseq object with sequences.
#' @param verbose Verbosity level.
#' @param returnval Boolean to return values in console or not.
#'
#' @return Return a formatted tree object ready to use in phyloseq.
......
......@@ -137,8 +137,8 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=
#' @param taxtable data.frame
#' @param seqs path to fasta file or readDNAStringSet
#' @param prunedb maximum number of sequences per unique taxa.
#' @param outputDIR
#' @output
#' @param outputDIR Output directory.
#' @return List with taxonomy table and corresponding sequences.
#'
......
......@@ -6,6 +6,7 @@
#' @param dna A ‘DNAStringSet’ of unaligned sequences.
#' @param asv_names sequences IDs in same order.
#' @param confidence Bootstrap threshold 0...100
#' @param ncpu Number of cpu to use
#'
#' @return return taxonomic assignment of given sequences.
#' @import futile.logger
......@@ -14,10 +15,10 @@
#' @export
idTaxa_assign = function(db_file, dna, asv_names, confidence){
idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULLS){
flog.info(paste('Using database ',db_file,sep=''))
toto <- load(db_file)
ids <- IdTaxa(dna, trainingSet, strand="both", processors=NULL, verbose=TRUE)
ids <- IdTaxa(dna, trainingSet, strand="both", processors=ncpu, verbose=TRUE)
names(ids) <- asv_names
flog.info("Confidence filtering...")
IDCONF = as.numeric(confidence)
......
......@@ -97,12 +97,12 @@ phy2cyto_fun <- function(data = data, output = "./cytoscape/", column1 = NULL, r
LINKS = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
}
} else{
#sinon partage et création d'un lien pour chaque environnement source.
#sinon partage et creation d'un lien pour chaque environnement source.
LINKS = NULL
for(rep in reps){
sdat3 = sdat2[sdat2[,repl]==rep,]
srcs = as.character(unique(sdat3[,column1]))
# Lien seulement si l'asv présent dans même sample à 2 environnements ou plus
# Lien seulement si l'asv present dans meme sample a 2 environnements ou plus
if(length(srcs)>1){
link = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
# print(glue("{rep} {srcs} envs"))
......
#' subset_fastx
#'
#' Allows subset fastq or fasta files at a given threshold. This fonction can convert fastq to fasta.
#' Allows subset fastq or fasta files at a given threshold. This function can convert fastq to fasta.
#'
#' @param path Path to the fastq files directory
#' @param format fasta or fastq format are allowed.
......@@ -36,7 +36,7 @@ subset_fastx <- function(path = NULL, format = "fastq", outformat = "fastq", out
writeXStringSet(X, glue::glue("{output}/{L2[i]}.{outformat}"), format = outformat, compress=compress)
return("Done")
}
if(length(X)>nbseq){
if(random){
if(!is.null(seed)){set.seed(seed)}
......
......@@ -59,4 +59,4 @@ Murali, Adithya, Aniruddha Bhargava, et Erik S. Wright. « IDTAXA: a novel appr
GPL 3.0
## Copyright
2021 INRA
2021 INRAE
......@@ -18,7 +18,13 @@ VENNFUN(
\item{mode}{= 1: length(TF)<=5, mode = 2 5<length(TF)<7}
\item{alltax}{}
\item{TITRE}{Plot title.}
\item{output}{Output path.}
\item{refseq1}{Reference sequences.}
\item{alltax}{Taxonomy table.}
}
\value{
Exports a venn diagram with corresponding tabulated file.
......
......@@ -38,6 +38,8 @@ aggregate_fun(
\item{rank}{Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)}
\item{comp}{Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.}
\item{returnval}{Boolean for function to return values.}
}
\value{
Export final CSV files, barplot with top significant ASV and Venn Digramm.
......
......@@ -10,7 +10,8 @@ assign_taxo_fun(
id_db = "/PathToDB/UNITE_idtaxa.Rdata",
confidence = 50,
verbose = 1,
returnval = TRUE
returnval = TRUE,
ncpu = NULL
)
}
\arguments{
......@@ -25,6 +26,8 @@ assign_taxo_fun(
\item{verbose}{Verbose level. (1: quiet, 3: verbal)}
\item{returnval}{Boolean to return values in console or not.}
\item{ncpu}{Number of cpus to use.}
}
\value{
Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
......
......@@ -29,6 +29,8 @@ bars_fun(
\item{split}{if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)}
\item{relative}{Plot relative (TRUE, default) or raw abundance plot (FALSE)}
\item{outfile}{Output html file.}
}
\value{
Returns barplots in an interactive plotly community plot
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment