Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
umrf
ranomaly
Commits
c049a1be
Commit
c049a1be
authored
Jun 08, 2021
by
Sebastien Theil
Browse files
Merge branch 'master' into 'assign2'
# Conflicts: # R/assign_taxo_fun.R
parents
bb3e4f2a
b7efced1
Changes
29
Hide whitespace changes
Inline
Side-by-side
DESCRIPTION
View file @
c049a1be
...
...
@@ -22,9 +22,13 @@ Imports:
vctrs,
biomformat,
Biostrings,
Biobase,
BiocGenerics,
gtools,
ShortRead,
dada2,
decontam,
dplyr,
microbiome,
DESeq2,
metagenomeSeq,
...
...
@@ -33,11 +37,11 @@ Imports:
pairwiseAdonis,
phyloseq,
psadd,
DESeq2,
VennDiagram,
digest,
futile.logger,
ggplot2,
glue,
grid,
gridExtra,
vegan,
...
...
@@ -48,9 +52,14 @@ Imports:
taxa,
rmarkdown,
ranacapa,
reshape2,
doParallel,
foreach,
DECIPHER,
mixOmics,
venn
venn,
htmlwidgets,
scales
Remotes:
github::omegahat/XML,
bioc::3.10/biomformat,
...
...
@@ -61,11 +70,11 @@ Remotes:
bioc::3.10/metagenomeSeq,
bioc::3.10/phyloseq,
bioc::3.10/microbiome,
bioc::3.10/DESeq2,
bioc::3.10/DECIPHER,
bioc::3.10/mixOmics,
github::pmartinezarbizu/pairwiseAdonis/pairwiseAdonis,
github::cpauvert/psadd,
github::gauravsk/ranacapa,
github::grunwaldlab/metacoder,
github::mixOmicsTeam/mixOmics
github::mixOmicsTeam/mixOmics,
github::mikelove/DESeq2
R/ASVenn_fun.R
View file @
c049a1be
...
...
@@ -134,18 +134,13 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
# write.table(TT, paste(output,"/otu_table_sp_",LOC,".csv",sep=""), sep="\t", quote=FALSE, col.names=NA)
}
# print(names(TFtax))
# print(TFdata)
print
(
str
(
TFtax
))
# stop()
## Venn diag
flog.info
(
'Defining unique taxa ...'
)
alltax
<-
do.call
(
rbind
,
TFtax
)
alltax
<-
alltax
[
!
duplicated
(
alltax
[,
1
]),]
row.names
(
alltax
)
=
alltax
[,
1
]
# print(alltax)
# stop()
# print(alltax)
flog.info
(
'Plotting ...'
)
# Specific use to screen taxonomic composition of shared taxa...
...
...
@@ -158,6 +153,7 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
TF2
<-
list
(
env1
,
others1
)
names
(
TF2
)
<-
c
(
krona
,
"others"
)
#Venn 2
# pdf(NULL)
venn.plot
<-
venn.diagram
(
TF2
,
filename
=
NULL
,
col
=
"black"
,
fill
=
rainbow
(
length
(
TF2
)),
alpha
=
0.50
,
cex
=
1.5
,
cat.col
=
1
,
lty
=
"blank"
,
...
...
@@ -193,7 +189,7 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
TFbak
<-
TF
<-
sapply
(
TFtax
,
row.names
,
simplify
=
FALSE
)
names
(
TFbak
)
=
names
(
TF
)
=
l
evel1
names
(
TFbak
)
=
names
(
TF
)
=
l
vls
# print( length(unique(unlist(TF))) )
if
(
length
(
lvls
)
>
5
){
...
...
@@ -238,10 +234,10 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
#'
#' @param TF list containing vectors to plot.
#' @param mode = 1: length(TF)<=5, mode = 2 5<length(TF)<7
#' @param TITRE
#' @param output
#' @param refseq1
#' @param alltax
#' @param TITRE
Plot title.
#' @param output
Output path.
#' @param refseq1
Reference sequences.
#' @param alltax
Taxonomy table.
#'
#'
#' @return Exports a venn diagram with corresponding tabulated file.
...
...
@@ -253,7 +249,8 @@ ASVenn_fun <- function(data = data, output = "./ASVenn/", rank = "ASV",
VENNFUN
<-
function
(
TF
=
TF
,
mode
=
1
,
TITRE
=
TITRE
,
output
=
"./"
,
refseq1
=
NULL
,
alltax
=
NULL
){
if
(
mode
==
1
){
venn
::
venn
(
TF
,
zcol
=
rainbow
(
7
),
ilcs
=
2
,
sncs
=
2
)
#, col=rainbow(7)
# pdf(NULL)
venn
::
venn
(
TF
,
zcol
=
rainbow
(
7
),
ilcs
=
2
,
sncs
=
2
,
ggplot
=
FALSE
)
#, col=rainbow(7)
venn.plot
<-
recordPlot
()
invisible
(
dev.off
())
...
...
@@ -298,8 +295,8 @@ VENNFUN <- function(TF = TF, mode = 1, TITRE = TITRE, output = "./", refseq1 = N
write.table
(
TABf
,
paste
(
output
,
"/"
,
TITRE
,
"_venn_table.csv"
,
sep
=
""
),
sep
=
"\t"
,
quote
=
FALSE
,
row.names
=
FALSE
)
}
}
else
if
(
mode
==
2
){
# more than 5 environments
venn.plot
<-
venn
::
venn
(
TF
,
zcol
=
rainbow
(
7
),
ilcs
=
2
,
sncs
=
2
)
#, col=rainbow(7)
# pdf(NULL)
venn.plot
<-
venn
::
venn
(
TF
,
zcol
=
rainbow
(
7
),
ilcs
=
2
,
sncs
=
2
,
ggplot
=
FALSE
)
#, col=rainbow(7)
venn.plot
<-
recordPlot
()
invisible
(
dev.off
())
...
...
R/aggregate_fun.R
View file @
c049a1be
...
...
@@ -12,6 +12,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)
#' @param comp Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.
#' @param returnval Boolean for function to return values.
#'
#' @return Export final CSV files, barplot with top significant ASV and Venn Digramm.
#'
...
...
R/assign_taxo_fun.R
View file @
c049a1be
...
...
@@ -8,6 +8,7 @@
#' @param verbose Verbose level. (1: quiet, 3: verbal)
#' @param confidence Bootstrap threshold 0...100
#' @param returnval Boolean to return values in console or not.
#' @param ncpu Number of cpus to use.
#'
#'
#' @return Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
...
...
@@ -22,7 +23,7 @@
assign_taxo_fun
<-
function
(
dada_res
=
dada_res
,
output
=
"./idtaxa/"
,
id_db
=
"/PathToDB/UNITE_idtaxa.Rdata"
,
confidence
=
50
,
verbose
=
1
,
returnval
=
TRUE
){
assign_taxo_fun
<-
function
(
dada_res
=
dada_res
,
output
=
"./idtaxa/"
,
id_db
=
"/PathToDB/UNITE_idtaxa.Rdata"
,
confidence
=
50
,
verbose
=
1
,
returnval
=
TRUE
,
ncpu
=
NULL
){
if
(
verbose
==
3
){
...
...
R/bars_fun.R
View file @
c049a1be
...
...
@@ -41,6 +41,7 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){
#' @param Fact1 Variable used to change X axis tick labels and color (when split = FALSE)
#' @param split if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)
#' @param relative Plot relative (TRUE, default) or raw abundance plot (FALSE)
#' @param outfile Output html file.
#'
#' @return Returns barplots in an interactive plotly community plot
#'
...
...
R/core_soft.R
View file @
c049a1be
...
...
@@ -7,6 +7,7 @@
#' @param group Choose which level of the factor, if NULL generate a list for each level.
#' @param freq frequence threshold of microbiome::core_members function
#' @param prev prevalence threshold of microbiome::core_members function
#' @param rank Taxonomy rank.
#'
#' @importFrom microbiome transform core_members
#'
...
...
R/csv2phyloseq_fun.R
View file @
c049a1be
...
...
@@ -5,6 +5,7 @@
#' @param taxtable Tabulated taxonomy table file path.
#' @param seq Tabulated sequence file path.
#' @param metadata Tabulated metadata file path.
#' @param generateTree Boolean to generate the phylogenetic tree.
#' @param output Output directory
#' @param returnval Boolean to return values in console or not.
#'
...
...
R/dada2_fun.R
View file @
c049a1be
...
...
@@ -5,7 +5,7 @@
#' @param amplicon Choose amplipcon "16S" or "ITS"
#' @param path Read files folder path
#' @param outpath output .Rdata file name
#' @param pool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param
dada
pool option for dada function (FALSE, TRUE or "pseudo"), default is "pseudo". See ? dada.
#' @param f_trunclen Forward read tuncate length (only for paired end 16S)
#' @param r_trunclen Reverse read tuncate length (only for paired end 16S)
#' @param f_primer Forward primer sequence (only for ITS)
...
...
@@ -31,9 +31,19 @@
#' @import futile.logger
#' @import digest
#' @import phyloseq
#' @import stringr
#' @export
get.sample.name
<-
function
(
fname
){
# res <- stringr::str_match(basename(fname), "^(.*)_R[12]\\.fastq\\.gz$")
# tt <- res[1,2]
tt
<-
strsplit
(
basename
(
fname
),
"_"
)[[
1
]][
1
]
return
(
tt
)
}
# DADA2 function
dada2_fun
<-
function
(
amplicon
=
"16S"
,
path
=
""
,
outpath
=
"./dada2_out/"
,
f_trunclen
=
240
,
r_trunclen
=
240
,
dadapool
=
"pseudo"
,
...
...
@@ -70,8 +80,8 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
}
flog.debug
(
"File list..."
)
flog.debug
(
fnFs
)
flog.debug
(
fnRs
)
flog.debug
(
length
(
fnFs
)
)
flog.debug
(
length
(
fnRs
)
)
flog.info
(
'Done.'
)
if
(
amplicon
==
"ITS"
){
...
...
@@ -104,11 +114,11 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
# print(fnRs.filtN)
flog.info
(
'filterAndTrim...'
)
#
if(! dir.exists(paste(path,'/filtN',sep=''))){
filterAndTrim
(
fwd
=
fnFs
,
filt
=
fnFs.filtN
,
rev
=
fnRs
,
filt.rev
=
fnRs.filtN
,
maxN
=
0
,
multithread
=
TRUE
,
verbose
=
TRUE
,
rm.phix
=
TRUE
,
compress
=
compress
)
#
}else{
#
flog.info('Filtered files exist, skipping...')
#
}
if
(
!
dir.exists
(
paste
(
path
,
'/filtN'
,
sep
=
''
))){
filterAndTrim
(
fwd
=
fnFs
,
filt
=
fnFs.filtN
,
rev
=
fnRs
,
filt.rev
=
fnRs.filtN
,
maxN
=
0
,
multithread
=
TRUE
,
verbose
=
TRUE
,
rm.phix
=
TRUE
,
compress
=
compress
)
}
else
{
flog.info
(
'Filtered files exist, skipping...'
)
}
flog.info
(
'Done.'
)
...
...
@@ -174,7 +184,8 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
cutRs
<-
sort
(
list.files
(
path.cut
,
pattern
=
"_R2.fastq"
,
full.names
=
TRUE
))
# Extract sample names, assuming filenames have format:
get.sample.name
<-
function
(
fname
)
strsplit
(
basename
(
fname
),
"_"
)[[
1
]][
1
]
# get.sample.name <- function(fname) strsplit(basename(fname), "_")[[1]][1]
sample.names
<-
unname
(
sapply
(
cutFs
,
get.sample.name
))
#head(sample.names)
...
...
@@ -189,6 +200,15 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
names
(
filtFs
)
<-
sample.names
names
(
filtRs
)
<-
sample.names
flog.debug
(
length
(
cutFs
))
print
(
head
(
cutFs
))
flog.debug
(
length
(
filtFs
))
print
(
head
(
filtFs
))
flog.debug
(
length
(
cutRs
))
print
(
head
(
cutRs
))
flog.debug
(
length
(
filtRs
))
print
(
head
(
filtRs
))
out
<-
filterAndTrim
(
cutFs
,
filtFs
,
cutRs
,
filtRs
,
maxN
=
0
,
maxEE
=
c
(
2
,
2
),
truncQ
=
2
,
minLen
=
50
,
compress
=
FALSE
,
multithread
=
TRUE
)
# on windows, set multithread = FALSE
#head(out)
...
...
@@ -209,7 +229,9 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t
flog.debug
(
length
(
fnFs
))
flog.debug
(
length
(
fnRs
))
sample.names
<-
sapply
(
strsplit
(
basename
(
fnFs
),
"_"
),
`[`
,
1
)
sample.names
<-
unname
(
sapply
(
fnFs
,
get.sample.name
))
flog.debug
(
head
(
sample.names
))
# sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
if
(
compress
){
filtFs
<-
file.path
(
path
,
"filtered"
,
paste0
(
sample.names
,
"_F_filt.fastq.gz"
))
...
...
R/export_to_stamp_fun.R
View file @
c049a1be
...
...
@@ -4,7 +4,7 @@
#'
#' @param data a phyloseq object (output from decontam or generate_phyloseq)
#' @param output Output directory
#' @param correc If TRUE, correct metadata to replace most common special characters
(eg. é -> e)
, save the new file in meta_stampOK.tsv.
#' @param correc If TRUE, correct metadata to replace most common special characters, save the new file in meta_stampOK.tsv.
#'
#' @return Export 2 text files ready to use with STAMP.
#'
...
...
R/generate_phyloseq_fun.R
View file @
c049a1be
...
...
@@ -3,7 +3,7 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param taxtable Results of assign_taxo_fun()
#' @param tax
.
table Results of assign_taxo_fun()
#' @param tree Results of generate_tree_fun()
#' @param metadata Path of metadata file (tab separated with header)
#' @param output Output directory
...
...
R/generate_tree_fun.R
View file @
c049a1be
...
...
@@ -3,7 +3,9 @@
#'
#'
#' @param dada_res Results of dada2_fun()
#' @param output Output directory
#' @param output Output directory.
#' @param psobj Phyloseq object with sequences.
#' @param verbose Verbosity level.
#' @param returnval Boolean to return values in console or not.
#'
#' @return Return a formatted tree object ready to use in phyloseq.
...
...
R/idtaxaDB_formatting_functions.R
View file @
c049a1be
...
...
@@ -137,8 +137,8 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=
#' @param taxtable data.frame
#' @param seqs path to fasta file or readDNAStringSet
#' @param prunedb maximum number of sequences per unique taxa.
#' @param outputDIR
#' @output
#' @param outputDIR
Output directory.
#' @return List with taxonomy table and corresponding sequences.
#'
...
...
R/idtaxa_assign_fun.R
View file @
c049a1be
...
...
@@ -6,6 +6,7 @@
#' @param dna A ‘DNAStringSet’ of unaligned sequences.
#' @param asv_names sequences IDs in same order.
#' @param confidence Bootstrap threshold 0...100
#' @param ncpu Number of cpu to use
#'
#' @return return taxonomic assignment of given sequences.
#' @import futile.logger
...
...
@@ -14,10 +15,10 @@
#' @export
idTaxa_assign
=
function
(
db_file
,
dna
,
asv_names
,
confidence
){
idTaxa_assign
=
function
(
db_file
,
dna
,
asv_names
,
confidence
,
ncpu
=
NULLS
){
flog.info
(
paste
(
'Using database '
,
db_file
,
sep
=
''
))
toto
<-
load
(
db_file
)
ids
<-
IdTaxa
(
dna
,
trainingSet
,
strand
=
"both"
,
processors
=
NULL
,
verbose
=
TRUE
)
ids
<-
IdTaxa
(
dna
,
trainingSet
,
strand
=
"both"
,
processors
=
ncpu
,
verbose
=
TRUE
)
names
(
ids
)
<-
asv_names
flog.info
(
"Confidence filtering..."
)
IDCONF
=
as.numeric
(
confidence
)
...
...
R/phy2cyto_fun.R
View file @
c049a1be
...
...
@@ -97,12 +97,12 @@ phy2cyto_fun <- function(data = data, output = "./cytoscape/", column1 = NULL, r
LINKS
=
cbind
(
rep
(
asv
,
length
(
srcs
)),
glue
(
"type_{srcs}"
),
srcs
)
}
}
else
{
#sinon partage et cr
é
ation d'un lien pour chaque environnement source.
#sinon partage et cr
e
ation d'un lien pour chaque environnement source.
LINKS
=
NULL
for
(
rep
in
reps
){
sdat3
=
sdat2
[
sdat2
[,
repl
]
==
rep
,]
srcs
=
as.character
(
unique
(
sdat3
[,
column1
]))
# Lien seulement si l'asv pr
é
sent dans m
ê
me sample
à
2 environnements ou plus
# Lien seulement si l'asv pr
e
sent dans m
e
me sample
a
2 environnements ou plus
if
(
length
(
srcs
)
>
1
){
link
=
cbind
(
rep
(
asv
,
length
(
srcs
)),
glue
(
"type_{srcs}"
),
srcs
)
# print(glue("{rep} {srcs} envs"))
...
...
R/subset_fastx.R
View file @
c049a1be
#' subset_fastx
#'
#' Allows subset fastq or fasta files at a given threshold. This f
o
nction can convert fastq to fasta.
#' Allows subset fastq or fasta files at a given threshold. This f
u
nction can convert fastq to fasta.
#'
#' @param path Path to the fastq files directory
#' @param format fasta or fastq format are allowed.
...
...
@@ -36,7 +36,7 @@ subset_fastx <- function(path = NULL, format = "fastq", outformat = "fastq", out
writeXStringSet
(
X
,
glue
::
glue
(
"{output}/{L2[i]}.{outformat}"
),
format
=
outformat
,
compress
=
compress
)
return
(
"Done"
)
}
if
(
length
(
X
)
>
nbseq
){
if
(
random
){
if
(
!
is.null
(
seed
)){
set.seed
(
seed
)}
...
...
README.md
View file @
c049a1be
...
...
@@ -59,4 +59,4 @@ Murali, Adithya, Aniruddha Bhargava, et Erik S. Wright. « IDTAXA: a novel appr
GPL 3.0
## Copyright
2021 INRA
2021 INRA
E
man/VENNFUN.Rd
View file @
c049a1be
...
...
@@ -18,7 +18,13 @@ VENNFUN(
\item{mode}{= 1: length(TF)<=5, mode = 2 5<length(TF)<7}
\item{alltax}{}
\item{TITRE}{Plot title.}
\item{output}{Output path.}
\item{refseq1}{Reference sequences.}
\item{alltax}{Taxonomy table.}
}
\value{
Exports a venn diagram with corresponding tabulated file.
...
...
man/aggregate_fun.Rd
View file @
c049a1be
...
...
@@ -38,6 +38,8 @@ aggregate_fun(
\item{rank}{Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)}
\item{comp}{Comparison to test. Comma separated and comparisons are informed with a tilde (A~C,A~B,B~C). If empty, test all combination.}
\item{returnval}{Boolean for function to return values.}
}
\value{
Export final CSV files, barplot with top significant ASV and Venn Digramm.
...
...
man/assign_taxo_fun.Rd
View file @
c049a1be
...
...
@@ -10,7 +10,8 @@ assign_taxo_fun(
id_db = "/PathToDB/UNITE_idtaxa.Rdata",
confidence = 50,
verbose = 1,
returnval = TRUE
returnval = TRUE,
ncpu = NULL
)
}
\arguments{
...
...
@@ -25,6 +26,8 @@ assign_taxo_fun(
\item{verbose}{Verbose level. (1: quiet, 3: verbal)}
\item{returnval}{Boolean to return values in console or not.}
\item{ncpu}{Number of cpus to use.}
}
\value{
Return a taxonomy table with multiple ancestor checking and incongruence checking when 2 databases are used.
...
...
man/bars_fun.Rd
View file @
c049a1be
...
...
@@ -29,6 +29,8 @@ bars_fun(
\item{split}{if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)}
\item{relative}{Plot relative (TRUE, default) or raw abundance plot (FALSE)}
\item{outfile}{Output html file.}
}
\value{
Returns barplots in an interactive plotly community plot
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment