Commit 7e4f6567 authored by Sebastien Theil's avatar Sebastien Theil
Browse files

Merge branch 'master' of forgemia.inra.fr:umrf/ranomaly

parents 461a4e16 5b8fc635
# Generated by roxygen2: do not edit by hand
export(ASVenn_fun)
export(VENNFUN)
export(aggregate_fun)
export(assign_taxo_fun)
export(bars_fun)
......@@ -28,6 +29,7 @@ export(plsda_fun)
export(prune_db_fun)
export(rarefaction)
export(split_table_fun)
export(subset_fastx)
export(taxid_fun)
export(update_metadata_fun)
import(Biostrings)
......@@ -37,6 +39,8 @@ import(VennDiagram)
import(dada2)
import(decontam)
import(digest)
import(doParallel)
import(foreach)
import(futile.logger)
import(ggplot2)
import(metagenomeSeq)
......@@ -56,6 +60,9 @@ importFrom(DECIPHER,AlignSeqs)
importFrom(DESeq2,DESeq)
importFrom(DESeq2,results)
importFrom(DESeq2,resultsNames)
importFrom(dplyr,across)
importFrom(dplyr,group_by)
importFrom(dplyr,group_map)
importFrom(ggpubr,ggtexttable)
importFrom(ggpubr,text_grob)
importFrom(ggpubr,ttheme)
......@@ -63,6 +70,7 @@ importFrom(glue,glue)
importFrom(grid,grid.draw)
importFrom(gridExtra,grid.arrange)
importFrom(gridExtra,marrangeGrob)
importFrom(gtools,mixedsort)
importFrom(htmlwidgets,saveWidget)
importFrom(metacoder,calc_n_samples)
importFrom(metacoder,calc_obs_props)
......
......@@ -16,11 +16,11 @@
rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
plot_rare <- ggrare(data, step = step, color = col, plot = FALSE)
plot_rare <- plot_rare + facet_wrap(col, ncol = 4) + theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust=1),
legend.position = "none",axis.text=element_text(size=18, angle = 45, hjust=1),
axis.title=element_text(size=16,face="bold"),
strip.text.x = element_text(size = 18,face="bold"),
title=element_text(size=16,face="bold"))
theme(axis.text.x = element_text(angle = 45, hjust=1),
legend.position = "none",axis.text=element_text(size=18, angle = 45, hjust=1),
axis.title=element_text(size=16,face="bold"),
strip.text.x = element_text(size = 18,face="bold"),
title=element_text(size=16,face="bold"))
if(ggplotly){
......@@ -37,8 +37,9 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
#' @param data a phyloseq object (output from decontam or generate_phyloseq)
#' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)
#' @param top Number of top taxa to plot
#' @param Ord1 Variable used to order sample (X axis)
#' @param Fact1 Variable used to change X axis tick labels and color
#' @param Ord1 Variable used to order sample (X axis) or split the barplot if split = TRUE
#' @param Fact1 Variable used to change X axis tick labels and color (when split = FALSE)
#' @param split if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)
#' @param relative Plot relative (TRUE, default) or raw abundance plot (FALSE)
#'
#' @return Returns barplots in an interactive plotly community plot
......@@ -47,98 +48,127 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
#' @importFrom microbiome aggregate_top_taxa
#' @importFrom reshape2 melt
#' @importFrom gtools mixedsort
#' @importFrom dplyr group_map group_by across
#'
#'
#' @export
bars_fun <- function(data = data, rank = "Genus", top = 10, Ord1 = NULL, Fact1 = NULL, relative = TRUE, outfile="plot_compo.html"){
bars_fun <- function(data = data, rank = "Genus", top = 10, Ord1 = NULL, Fact1 = NULL, split = FALSE,
relative = TRUE,
outfile="plot_compo.html"){
# Fdata <- prune_samples(sample_names(r$data16S())[r$rowselect()], r$data16S())
# Fdata <- prune_taxa(taxa_sums(Fdata) > 0, Fdata)
# if( r$RankGlom() == "ASV"){
# Fdata <- prune_taxa(r$asvselect(), Fdata)
# }
# Fdata <- prune_samples(sample_names(r$data16S())[r$rowselect()], r$data16S())
# Fdata <- prune_taxa(taxa_sums(Fdata) > 0, Fdata)
# if( r$RankGlom() == "ASV"){
# Fdata <- prune_taxa(r$asvselect(), Fdata)
# }
Fdata = data
# print("top")
psobj.top <- aggregate_top_taxa(Fdata, rank, top = top)
Fdata = data
# print("top")
psobj.top <- aggregate_top_taxa(Fdata, rank, top = top)
# print("get data")
sdata = as.data.frame(sample_data(psobj.top))
sdata$sample.id = sample_names(psobj.top)
otable = as.data.frame(otu_table(psobj.top))
row.names(otable) = tax_table(psobj.top)[,rank]
# print("get data")
sdata = as.data.frame(sample_data(psobj.top))
sdata$sample.id = sample_names(psobj.top)
otable = as.data.frame(otu_table(psobj.top))
row.names(otable) = tax_table(psobj.top)[,rank]
# print("melt data")
dat <- as.data.frame(t(otable))
dat <- cbind.data.frame(sdata, dat)
meltdat <- reshape2::melt(dat, id.vars=1:ncol(sdata))
tt <- levels(meltdat$variable)
meltdat$variable <- factor(meltdat$variable, levels= c("Other", tt[tt!="Other"]))
# print("melt data")
dat <- as.data.frame(t(otable))
dat <- cbind.data.frame(sdata, dat)
meltdat <- reshape2::melt(dat, id.vars=1:ncol(sdata))
tt <- levels(meltdat$variable)
meltdat$variable <- factor(meltdat$variable, levels= c("Other", tt[tt!="Other"]))
LL=list()
# print(head(meltdat))
# print(levels(meltdat$sample.id))
# save(list = ls(all.names = TRUE), file = "debug.rdata", envir = environment())
LL=list()
# print(head(meltdat))
# print(levels(meltdat$sample.id))
# save(list = ls(all.names = TRUE), file = "debug.rdata", envir = environment())
# TODO: Message d'erreur si factor n'est pas dans les sample_data
# TODO: Message d'erreur si factor n'est pas dans les sample_data
fun = glue( "xform <- list(categoryorder = 'array',
fun = glue( "xform <- list(categoryorder = 'array',
categoryarray = unique(meltdat$sample.id[gtools::mixedorder(meltdat${Ord1})]),
title = 'Samples',
tickmode = 'array',
tickvals = 0:nrow(sdata),
ticktext = sdata[unique(meltdat$sample.id[gtools::mixedorder(meltdat${Ord1})]), '{Fact1}']@.Data[[1]],
tickangle = -90)")
eval(parse(text=fun))
# subplot to vizualize groups
# print(head(sdata))
df1 <- cbind.data.frame(x=sdata[unique(meltdat$sample.id[gtools::mixedorder(meltdat[,Ord1])]), "sample.id"]@.Data[[1]],
g=sdata[unique(meltdat$sample.id[gtools::mixedorder(meltdat[,Ord1])]), Fact1]@.Data[[1]],
y=1)
subp1 <- df1 %>% plot_ly(
type = 'bar',
x = ~x,
y = ~y,
color = ~g,
legendgroup = ~g,
showlegend = FALSE
) %>% layout(xaxis = list(zeroline = FALSE,showline = FALSE, showgrid = FALSE),
yaxis=list(showticklabels = FALSE,title = "",showgrid = FALSE))
if(relative){
#relative abondance
otable=apply(otable,2, function(x){Tot=sum(x); x/Tot})
dat= as.data.frame(t(otable))
dat <- cbind.data.frame(sdata, dat)
meltdat = reshape2::melt(dat, id.vars=1:ncol(sdata))
tt=levels(meltdat$variable)
meltdat$variable = factor(meltdat$variable, levels= c("Other", tt[tt!="Other"]))
p1=plot_ly(meltdat, x = ~sample.id, y = ~value, type = 'bar', name = ~variable, color = ~variable) %>% #, color = ~variable
layout(title="Relative abundance", yaxis = list(title = 'Relative abundance'), xaxis = xform, barmode = 'stack')
p1 <- subplot(p1, subp1, nrows = 2, shareX = T, heights=c(0.95,0.05)) %>%
layout(xaxis = xform)
}else{
#raw abundance
p1=plot_ly(meltdat, x = ~sample.id, y = ~value, type = 'bar', name = ~variable, color = ~variable) %>% #, color = ~variable
layout(title="Raw abundance", yaxis = list(title = 'Raw abundance'), xaxis = xform, barmode = 'stack')
p1 <- subplot(p1, subp1, nrows = 2, shareX = T, heights=c(0.95,0.05)) %>%
layout(xaxis = xform)
}
eval(parse(text=fun))
# subplot to vizualize groups
# print(head(sdata))
df1 <- cbind.data.frame(x=sdata[unique(meltdat$sample.id[gtools::mixedorder(meltdat[,Ord1])]), "sample.id"]@.Data[[1]],
g=sdata[unique(meltdat$sample.id[gtools::mixedorder(meltdat[,Ord1])]), Fact1]@.Data[[1]],
y=1)
subp1 <- df1 %>% plot_ly(
type = 'bar',
x = ~x,
y = ~y,
color = ~g,
legendgroup = ~g,
showlegend = FALSE
) %>% layout(xaxis = list(zeroline = FALSE,showline = FALSE, showgrid = FALSE),
yaxis=list(showticklabels = FALSE,title = "",showgrid = FALSE))
if(relative){
#relative abondance
plottitle = "Relative abundance"
otable=apply(otable,2, function(x){Tot=sum(x); x/Tot})
dat= as.data.frame(t(otable))
dat <- cbind.data.frame(sdata, dat)
meltdat = reshape2::melt(dat, id.vars=1:ncol(sdata))
tt=levels(meltdat$variable)
meltdat$variable = factor(meltdat$variable, levels= c("Other", tt[tt!="Other"]))
# dir.create(outpath, recursive = TRUE)
# htmlwidgets::saveWidget(p1, glue::glue("{outpath}/{outfile}"))
if(!is.null(outfile)){
htmlwidgets::saveWidget(p1, outfile)
}
p1=plot_ly(meltdat, x = ~sample.id, y = ~value, type = 'bar', name = ~variable, color = ~variable) %>% #, color = ~variable
layout(title="Relative abundance", yaxis = list(title = 'Relative abundance'), xaxis = xform, barmode = 'stack')
return(p1)
if(length(df1$x) != length(unique(df1$g))){
p1 <- subplot(p1, subp1, nrows = 2, shareX = T, heights=c(0.95,0.05)) %>%
layout(xaxis = xform)
}
}else{
#raw abundance
plottitle = "Raw abundance"
p1=plot_ly(meltdat, x = ~sample.id, y = ~value, type = 'bar', name = ~variable, color = ~variable) %>% #, color = ~variable
layout(title="Raw abundance", yaxis = list(title = 'Raw abundance'), xaxis = xform, barmode = 'stack')
if(length(df1$x) != length(unique(df1$g))){
p1 <- subplot(p1, subp1, nrows = 2, shareX = T, heights=c(0.95,0.05)) %>%
layout(xaxis = xform)
}
}
# dir.create(outpath, recursive = TRUE)
# htmlwidgets::saveWidget(p1, glue::glue("{outpath}/{outfile}"))
if(!is.null(outfile)){
htmlwidgets::saveWidget(p1, outfile)
}
# facet_wrap output
if(!split) {
return(p1)
} else {
p1 = meltdat %>% group_by(across({Ord1})) %>%
dplyr::group_map(~ plot_ly(data=., x = ~sample.id, y = ~value, type = 'bar',
name = ~variable,
color = ~variable, legendgroup = ~variable,
showlegend = (.y == levels(meltdat[, Ord1])[1])),
keep = TRUE) %>%
subplot(nrows = 1, shareX = TRUE, shareY=TRUE, titleX = FALSE) %>%
layout(title=plottitle,
xaxis = list(title = glue("{Ord1} = {unique(meltdat[, Ord1])[1]}")),
yaxis = list(title = 'Relative abundance'),
barmode = 'stack')
for (i in 2:length(unique(meltdat[, Ord1]))) {
p1$x$layoutAttrs[[1]][[paste0("xaxis", i)]] = NULL
p1$x$layoutAttrs[[1]][[paste0("xaxis", i)]]$title <- glue("{Ord1} = {unique(meltdat[, Ord1])[i]}")
}
return(p1)
}
}
#' subset_fastx
#'
#' Allows subset fastq or fasta files at a given threshold. This fonction can convert fastq to fasta.
#'
#' @param path Path to the fastq files directory
#' @param format fasta or fastq format are allowed.
#' @param output Path to the output directory
#' @param nbseq Number of sequences to output per fastq files; if NULL no subset.
#' @param ncores Number of CPU used to process.
#' @param compress set on TRUE to get .fastx.gz compressed files
#' @param verbose Print information while processing
#' @param random Pick randomly 'nbseq' sequences.
#' @param seed Fix seed with numeric to reproduce random picking (eg. 123)
#'
#' @return Output fastx files in the output directory
#'
#' @import Biostrings
#' @import foreach
#' @import doParallel
#' @export
subset_fastx <- function(path = NULL, format = "fastq", outformat = "fastq", output = "./subset_fastq/", nbseq = 10000, ncores = 3, compress=FALSE, verbose=FALSE, random = FALSE, seed = NULL){
if(is.null(path)){stop("Require path to fastq files directory...")}
if(format != "fasta" & format != "fastq"){stop("fastq and fasta format are allowed...")}
registerDoParallel(ncores)
if(!dir.exists(output)){dir.create(output, recursive=TRUE)}
L1 = list.files(path, pattern = glue::glue("*{format}*"), full.names = TRUE)
L2 = tools::file_path_sans_ext(list.files(path, pattern = glue::glue("*{format}*"), full.names = FALSE))
X=NULL
foreach (i=1:length(L1)) %dopar% {
X <- readDNAStringSet(L1[i], format=format, with.qualities=TRUE)
if(verbose){print(L2[i]);print(length(X))}
if(is.null(nbseq)){
writeXStringSet(X, glue::glue("{output}/{L2[i]}.{outformat}"), format = outformat, compress=compress)
return("Done")
}
if(length(X)>nbseq){
if(random){
if(!is.null(seed)){set.seed(seed)}
rdm = sample(1:length(X),nbseq)
Xout = X[rdm]
}else{
Xout = X[1:nbseq]
}
}else{Xout = X}
writeXStringSet(Xout, glue::glue("{output}/{L2[i]}.{outformat}"), format = outformat, compress=compress)
}
return("Done")
}
......@@ -9,10 +9,9 @@
<!-- badges: end -->
rANOMALY is the R Package of the first version of
[ANOMALY](https://forgemia.inra.fr/umrf/anomaly). [Here the
poster](https://hal.archives-ouvertes.fr/hal-02340484/)
presenting this workflow.
rANOMALY is an R Package integrating AmplicoN wOrkflow for Microbial community AnaLYsis. [Here the
F1000 reference paper](https://f1000research.com/articles/10-7) and [the
poster](https://hal.archives-ouvertes.fr/hal-02340484/) presenting this workflow.
## Installation
......@@ -41,3 +40,15 @@ wikipage](https://forgemia.inra.fr/umrf/ranomaly/-/wikis/home)
IDTAXA formatted references databases for 16S and ITS are [available
here](https://nextcloud.inrae.fr/s/YHi3fmDdEJt5cqR).
## Citation
Sebastien Theil and Etienne Rifa. « RANOMALY: AmplicoN WOrkflow for Microbial Community AnaLYsis ». F1000Research 10 (07/01/2021): 7. https://doi.org/10.12688/f1000research.27268.1.
If you use rANOMALY, please cite following tools:
Callahan, Benjamin J., Paul J. McMurdie, Michael J. Rosen, Andrew W. Han, Amy Jo A. Johnson, et Susan P. Holmes. « DADA2: High-Resolution Sample Inference from Illumina Amplicon Data ». Nature Methods 13, nᵒ 7 (juillet 2016): 581‑83. https://doi.org/10.1038/nmeth.3869.
McMurdie, Paul J., et Susan Holmes. « Phyloseq: An R Package for Reproducible Interactive Analysis and Graphics of Microbiome Census Data ». PLOS ONE 8, nᵒ 4 (22 avril 2013): e61217. https://doi.org/10.1371/journal.pone.0061217.
Murali, Adithya, Aniruddha Bhargava, et Erik S. Wright. « IDTAXA: a novel approach for accurate taxonomic classification of microbiome sequences ». Microbiome 6, nᵒ 1 (9 août 2018): 140. https://doi.org/10.1186/s40168-018-0521-5.
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ASVenn_fun.R
\name{VENNFUN}
\alias{VENNFUN}
\title{VENNFUN}
\usage{
VENNFUN(
TF = TF,
mode = 1,
TITRE = TITRE,
output = "./",
refseq1 = NULL,
alltax = NULL
)
}
\arguments{
\item{TF}{list containing vectors to plot.}
\item{mode}{= 1: length(TF)<=5, mode = 2 5<length(TF)<7}
\item{alltax}{}
}
\value{
Exports a venn diagram with corresponding tabulated file.
}
\description{
Plotting Venn Function and table of shared feature generation
}
......@@ -10,8 +10,8 @@ bars_fun(
top = 10,
Ord1 = NULL,
Fact1 = NULL,
split = FALSE,
relative = TRUE,
outpath = "./",
outfile = "plot_compo.html"
)
}
......@@ -22,18 +22,16 @@ bars_fun(
\item{top}{Number of top taxa to plot}
\item{Ord1}{Variable used to order sample (X axis)}
\item{Ord1}{Variable used to order sample (X axis) or split the barplot if split = TRUE}
\item{Fact1}{Variable used to change X axis tick labels and color}
\item{Fact1}{Variable used to change X axis tick labels and color (when split = FALSE)}
\item{relative}{Plot relative (TRUE, default) or raw abundance plot (FALSE)}
\item{outpath}{Output directory}
\item{split}{if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)}
\item{outfile}{Output file name}
\item{relative}{Plot relative (TRUE, default) or raw abundance plot (FALSE)}
}
\value{
Exports barplots in an interactive plotly community plot
Returns barplots in an interactive plotly community plot
}
\description{
Barplots plotly
......
......@@ -17,7 +17,10 @@ dada2_fun(
compress = FALSE,
verbose = 1,
torrent_single = FALSE,
returnval = TRUE
returnval = TRUE,
paired = TRUE,
trim_l = 15,
trim_r = 0
)
}
\arguments{
......@@ -45,7 +48,15 @@ dada2_fun(
\item{returnval}{Boolean to return values in console or not.}
\item{paired}{Boolean for Illumina Paired End Reads.}
\item{trim_l}{Trim left size.}
\item{trim_r}{Trim right size.}
\item{pool}{option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.}
\item{torrent_trim}{Sequence length to trim at 3' and 5' for single end torrent data. (0 = no trim)}
}
\value{
Return raw otu table in phyloseq object and export it in an Rdata file.
......
......@@ -6,7 +6,7 @@
\usage{
decontam_fun(
data = data,
domain = TRUE,
domain = "Bacteria",
output = "./decontam_out/",
number = 4000,
prev = 2,
......
......@@ -11,7 +11,8 @@ diversity_alpha_fun(
column2 = "",
column3 = "",
supcovs = "",
measures = c("Observed", "Shannon", "Simpson", "InvSimpson")
measures = c("Observed", "Shannon", "Simpson", "InvSimpson"),
verbose = TRUE
)
}
\arguments{
......@@ -28,6 +29,8 @@ diversity_alpha_fun(
\item{supcovs}{One or more supplementary covariables to test in anova (provided as comma separated vector)}
\item{measures}{Diversity indices (provided as comma separated vector)}
\item{verbose}{Boolean}
}
\value{
Return plots and tests as list and export them in the output directory.
......
......@@ -9,6 +9,7 @@ phy2cyto_fun(
output = "./cytoscape/",
column1 = NULL,
repl = NULL,
displayedRank = "Phylum",
verbose = 1
)
}
......@@ -21,6 +22,8 @@ phy2cyto_fun(
\item{repl}{Replicates column.}
\item{displayedRank}{The rank that will be in the node table.}
\item{verbose}{Verbose level. (1: quiet, 3: verbal)}
}
\value{
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/subset_fastx.R
\name{subset_fastx}
\alias{subset_fastx}
\title{subset_fastx}
\usage{
subset_fastx(
path = NULL,
format = "fastq",
outformat = "fastq",
output = "./subset_fastq/",
nbseq = 10000,
ncores = 3,
compress = FALSE,
verbose = FALSE,
random = FALSE,
seed = NULL
)
}
\arguments{
\item{path}{Path to the fastq files directory}
\item{format}{fasta or fastq format are allowed.}
\item{output}{Path to the output directory}
\item{nbseq}{Number of sequences to output per fastq files; if NULL no subset.}
\item{ncores}{Number of CPU used to process.}
\item{compress}{set on TRUE to get .fastx.gz compressed files}
\item{verbose}{Print information while processing}
\item{random}{Pick randomly 'nbseq' sequences.}
\item{seed}{Fix seed with numeric to reproduce random picking (eg. 123)}
}
\value{
Output fastx files in the output directory
}
\description{
Allows subset fastq or fasta files at a given threshold. This fonction can convert fastq to fasta.
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment