diff --git a/NAMESPACE b/NAMESPACE index 5ce1a3d4a78cd156409c7a9869371af560fa9c6c..30872a739dd77ee4843eaa34449f390a71617df5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(ASVenn_fun) export(VENNFUN) export(aggregate_fun) +export(aggregate_top_taxa) export(assign_taxo_fun) export(bars_fun) export(check_tax_fun) @@ -65,6 +66,7 @@ importFrom(DESeq2,resultsNames) importFrom(dplyr,across) importFrom(dplyr,group_by) importFrom(dplyr,group_map) +importFrom(dplyr,left_join) importFrom(ggpubr,ggtexttable) importFrom(ggpubr,text_grob) importFrom(ggpubr,ttheme) @@ -84,8 +86,9 @@ importFrom(metacoder,heat_tree) importFrom(metacoder,parse_phyloseq) importFrom(metacoder,taxon_names) importFrom(metacoder,zero_low_counts) -importFrom(microbiome,aggregate_top_taxa) +importFrom(microbiome,aggregate_taxa) importFrom(microbiome,core_members) +importFrom(microbiome,top_taxa) importFrom(microbiome,transform) importFrom(nlme,lme) importFrom(phangorn,NJ) @@ -95,4 +98,5 @@ importFrom(phangorn,pml) importFrom(plotly,ggplotly) importFrom(ranacapa,ggrare) importFrom(reshape2,melt) +importFrom(tibble,rownames_to_column) importFrom(venn,venn) diff --git a/R/bars_fun.R b/R/bars_fun.R index d131ce3fdfe297e9c46892cb3509ee3a8ac4b902..4b1001964d7bed20733f0f946002d875bf6d8177 100644 --- a/R/bars_fun.R +++ b/R/bars_fun.R @@ -31,6 +31,32 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){ } +#' aggregate_top_taxa from microbiome package +#' +#' +#' @param x phyloseq object +#' @param top Keep the top-n taxa, and merge the rest under the category 'Other'. Instead of top-n numeric this can also be a character vector listing the groups to combine. +#' @param level Summarization level (from ‘rank_names(pseq)’) +#' +#' @importFrom microbiome aggregate_taxa +#' @importFrom microbiome top_taxa +#' +#' @export + +aggregate_top_taxa <- function (x, top, level){ + x <- aggregate_taxa(x, level) + tops <- top_taxa(x, top) + tax <- tax_table(x) + inds <- which(!rownames(tax) %in% tops) + tax[inds, level] <- "Other" + tax_table(x) <- tax + tt <- tax_table(x)[, level] + tax_table(x) <- tax_table(tt) + aggregate_taxa(x, level) +} + + + #' Barplots plotly #' #' @@ -38,16 +64,16 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){ #' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom) #' @param top Number of top taxa to plot #' @param Ord1 Variable used to order sample (X axis) or split the barplot if split = TRUE -#' @param Fact1 Variable used to change X axis tick labels and color (when split = FALSE) +#' @param sample_labels If true, x axis labels are sample IDS, if false labels displayed are levels from Ord1 argument. Ignored if split = TRUE (FALSE) #' @param split if TRUE make a facet_wrap like grouped by Ord1 (default FALSE) #' @param relative Plot relative (TRUE, default) or raw abundance plot (FALSE) +#' @param autoorder Automatic ordering xaxis labels based on Ord1 factor levels with gtools::mixedorder function (TRUE). #' @param ylab Y axis title ("Abundance") #' @param outfile Output html file. #' #' @return Returns barplots in an interactive plotly community plot #' #' @import plotly -#' @importFrom microbiome aggregate_top_taxa #' @importFrom reshape2 melt #' @importFrom gtools mixedsort #' @importFrom dplyr group_map group_by across @@ -56,8 +82,8 @@ rarefaction <- function(data = data, col = NULL, step = 100, ggplotly = TRUE){ #' @export -bars_fun <- function(data = data, rank = "Genus", top = 10, Ord1 = NULL, Fact1 = NULL, split = FALSE, - relative = TRUE, ylab = "Abundance", outfile="plot_compo.html", verbose = TRUE){ +bars_fun <- function(data = data, rank = "Genus", top = 10, Ord1 = NULL, sample_labels = FALSE, split = FALSE, split_sid_order = FALSE, + relative = TRUE, autoorder = TRUE, ylab = "Abundance", outfile="plot_compo.html", verbose = TRUE){ if(verbose){ invisible(flog.threshold(INFO)) @@ -66,15 +92,14 @@ bars_fun <- function(data = data, rank = "Genus", top = 10, Ord1 = NULL, Fact1 = } -if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ - stop(paste("Wrong value in Ord1 or Fact1 arguments, please use variables existing in the phyloseq object:", toString(sample_variables(data)))) +if( all(Ord1 != sample_variables(data))){ + stop(paste("Wrong value in Ord1, please use variables existing in the phyloseq object:", toString(sample_variables(data)))) } -#{paste(sample_variables(data), collapse = ",")} + flog.info('Preprocess...') Fdata = data psobj.top <- aggregate_top_taxa(Fdata, rank, top = top) - # print("get data") sdata <- as.data.frame(sample_data(psobj.top), stringsAsFactors = TRUE) sdata$sample.id = sample_names(psobj.top) otable = as.data.frame(otu_table(psobj.top)) @@ -83,6 +108,8 @@ if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ # print("melt data") dat <- as.data.frame(t(otable)) dat <- cbind.data.frame(sdata, dat) + + flog.info(' Melting table...') meltdat <- reshape2::melt(dat, id.vars=1:ncol(sdata)) tt <- levels(meltdat$variable) meltdat$variable <- factor(meltdat$variable, levels= c("Other", tt[tt!="Other"])) @@ -92,29 +119,49 @@ if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ # print(levels(meltdat$sample.id)) # save(list = ls(all.names = TRUE), file = "debug.rdata", envir = environment()) - # TODO: Message d'erreur si factor n'est pas dans les sample_data - fun = glue( "xform <- list(categoryorder = 'array', - categoryarray = unique(meltdat$sample.id[gtools::mixedorder(as.character(meltdat${Ord1}))]), - title = 'Samples', - tickmode = 'array', - tickvals = 0:nrow(sdata), - ticktext = sdata[unique(meltdat$sample.id[gtools::mixedorder(as.character(meltdat${Ord1}))]), '{Fact1}']@.Data[[1]], - tickangle = -90)") - eval(parse(text=fun)) + if(autoorder){ + flog.info(' Ordering samples...') + fun = glue( "labs = gtools::mixedorder(as.character(meltdat${Ord1}))" ) + eval(parse(text=fun)) - # subplot to vizualize groups + orderedIDS <- unique(meltdat$sample.id[gtools::mixedorder(as.character(meltdat[,Ord1]))]) + orderedOrd1 <- meltdat[,Ord1][gtools::mixedorder(as.character(meltdat[,Ord1]))] + orderedOrd1 <- factor(orderedOrd1, levels = gtools::mixedsort(levels(orderedOrd1))) + }else{ + labs = 1:nrow(meltdat) + + orderedIDS <- unique(meltdat$sample.id) + orderedOrd1 <- meltdat[,Ord1] + } + + + + if(sample_labels){ + lab1 = "sample.id" + }else{ + lab1 = Ord1 + } + + flog.info(' Set labels...') + xform <- list(categoryorder = 'array', + categoryarray = unique(meltdat$sample.id[labs]), + title = 'Samples', + tickmode = 'array', + tickvals = 0:nrow(sdata), + ticktext = sdata[as.character(unique(meltdat$sample.id[labs])), lab1]@.Data[[1]], + tickangle = -90) - orderedIDS <- unique(meltdat$sample.id[gtools::mixedorder(as.character(meltdat[,Ord1]))]) - orderedOrd1 <- meltdat[,Ord1][gtools::mixedorder(as.character(meltdat[,Ord1]))] + # subplot to vizualize groups + flog.info(' Subplot...') df1 <- cbind.data.frame(x=sdata[orderedIDS, "sample.id"]@.Data[[1]], - g=sdata[orderedIDS, Fact1]@.Data[[1]], + g=sdata[orderedIDS, Ord1]@.Data[[1]], y=1) fun = glue( "df1$g <- factor(df1$g, levels = as.character(unique(orderedOrd1)))") eval(parse(text=fun)) - fun = glue( "meltdat${Ord1} <- factor(meltdat${Ord1}, levels = as.character(unique(orderedOrd1)))") + fun = glue( "meltdat${Ord1} <- factor(meltdat${Ord1}, levels = as.character(unique(orderedOrd1)))") # keep sample ids order from metadata. eval(parse(text=fun)) subp1 <- df1 %>% plot_ly( @@ -134,6 +181,7 @@ if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ otable=apply(otable,2, function(x){Tot=sum(x); x/Tot}) dat= as.data.frame(t(otable)) dat <- cbind.data.frame(sdata, dat) + meltdat <- reshape2::melt(dat, id.vars=1:ncol(sdata)) tt <- levels(meltdat$variable) meltdat$variable <- factor(meltdat$variable, levels= c("Other", tt[tt!="Other"])) @@ -160,7 +208,7 @@ if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ } } - # facet_wrap output + # Splitted plot output if(!split) { if(!is.null(outfile)){ htmlwidgets::saveWidget(p1, outfile) @@ -169,15 +217,20 @@ if( all(Ord1 != sample_variables(data)) | all(Fact1 != sample_variables(data))){ return(p1) } else { flog.info('Splitted plot...') - p1 = meltdat %>% group_by(across({Ord1})) %>% - dplyr::group_map(~ plot_ly(data=., x = ~sample.id, y = ~value, type = 'bar', - name = ~variable, - color = ~variable, legendgroup = ~variable, - showlegend = (.y == levels(meltdat[, Ord1])[1])), - keep = TRUE) %>% + if(split_sid_order){ + meltdat$sample.id = factor(meltdat$sample.id, levels = unique(meltdat$sample.id)) # keep sample ids order from metadata. + } + + p1 = meltdat %>% arrange(across({Ord1})) %>% group_by(across({Ord1})) %>% + mutate(across(where(is.character), as.factor)) %>% + dplyr::group_map(~ plot_ly(data=., x = ~sample.id, y = ~value, type = 'bar', + name = ~variable, + color = ~variable, legendgroup = ~variable, + showlegend = (.y == levels(meltdat[, Ord1])[1])), + keep = TRUE) %>% subplot(nrows = 1, shareX = TRUE, shareY=TRUE, titleX = FALSE) %>% layout(title="", - xaxis = list(title = glue("{Ord1} = {unique(meltdat[, Ord1])[1]}")), + xaxis = list(title = glue("{Ord1} = {levels(meltdat[, Ord1])[1]}")), yaxis = list(title = ylab), barmode = 'stack') diff --git a/R/dada2_fun.R b/R/dada2_fun.R index b25e5c8ea15ff04b80a0ce2fb57a7d30b1d353a1..471dc974e786490a03fdb51279e9138424bce102 100644 --- a/R/dada2_fun.R +++ b/R/dada2_fun.R @@ -30,6 +30,8 @@ #' @import futile.logger #' @import digest #' @import phyloseq +#' @importFrom tibble rownames_to_column +#' @importFrom dplyr left_join #' @export # DADA2 function @@ -235,19 +237,27 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t flog.info('Filtering reads...') - - out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(f_trunclen,r_trunclen), + out0 <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(f_trunclen,r_trunclen), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, trimLeft=trim_l, compress=compress, multithread=TRUE) + row.names(out0) = sample.names + out <- as.data.frame(out0) %>%tibble::rownames_to_column(var = "sample.id") + + + filtFs_out <- grep("F_filt",list.files(glue::glue("{path}/filtered/"), full.names = TRUE), value = TRUE) + filtRs_out <- grep("R_filt",list.files(glue::glue("{path}/filtered/"), full.names = TRUE), value = TRUE) + + samples_names <- grep(".fastq",list.files("./data_arch_links/", full.names = FALSE), value = TRUE) + flog.info('Done.') } - + #COMMON flog.info('Learning error model...') - errF <- learnErrors(filtFs, multithread=TRUE) - errR <- learnErrors(filtRs, multithread=TRUE) + errF <- learnErrors(filtFs_out, multithread=TRUE) + errR <- learnErrors(filtRs_out, multithread=TRUE) flog.info('Done.') @@ -269,8 +279,8 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t flog.info('Dereplicating fastq...') - derepFs <- derepFastq(filtFs, verbose=TRUE) - derepRs <- derepFastq(filtRs, verbose=TRUE) + derepFs <- derepFastq(filtFs_out, verbose=TRUE) + derepRs <- derepFastq(filtRs_out, verbose=TRUE) flog.info('Done.') flog.info('dada2...') @@ -285,7 +295,6 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t seqtab <- makeSequenceTable(mergers) - flog.info('Removing chimeras...') seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) flog.debug(sum(seqtab.nochim)) @@ -295,19 +304,26 @@ dada2_fun <- function(amplicon = "16S", path = "", outpath = "./dada2_out/", f_t flog.info('Done.') - track <- cbind.data.frame(out, stockFs, stockRs, sapply(mergers, getN), rowSums(seqtab.nochim)) + track0 <- cbind.data.frame(stockFs, stockRs, sapply(mergers, getN), rowSums(seqtab.nochim)) + rownames(track0) <- stringr::str_remove(rownames(track0), "_F_filt.fastq") + # If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs) - colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") - rownames(track) <- sample.names - head(track) - write.table(track, paste(outpath,"/read_tracking.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + track <- track0 %>% tibble::rownames_to_column(var="sample.id") + + + final_track <- out %>% dplyr::left_join(y = track, by = "sample.id") + colnames(final_track) <- c("sample.id", "input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") + head(final_track) + + write.table(final_track, paste(outpath,"/read_tracking.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) seqtab.export <- seqtab.nochim colnames(seqtab.export) <- sapply(colnames(seqtab.export), digest::digest, algo="md5") otu.table <- phyloseq::otu_table(t(seqtab.export), taxa_are_rows = TRUE) + colnames(otu.table) <- stringr::str_remove(colnames(otu.table), "_F_filt.fastq") flog.info('Writing raw tables.') write.table(cbind(t(seqtab.export), "Sequence" = colnames(seqtab.nochim)), paste(outpath,"/raw_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) diff --git a/R/decontam_fun.R b/R/decontam_fun.R index 0f36706c13a9737d0b59d8f5b5be841baa85128a..446d9272201b7886e6954d1678e89f96845ad9c6 100644 --- a/R/decontam_fun.R +++ b/R/decontam_fun.R @@ -272,7 +272,12 @@ decontam_fun <- function(data = data, domain = "Bacteria", output = "./decontam_ TABf=cbind.data.frame( TABtest_filt, TABf ) names(TABf)[1] = names(TF)[j] } - TABff <- cbind(as.matrix(TABf), as.matrix(data_no_filtering@tax_table[row.names(TABf),])) + + if(!is.null(data@tax_table)){ + TABff <- cbind(as.matrix(TABf), as.matrix(data_no_filtering@tax_table[row.names(TABf),])) + }else{ + TABff <- as.matrix(TABf) + } write.table(TABff, file = paste(output,'/Exclu_out.csv',sep=''), sep = "\t", col.names=NA) @@ -312,11 +317,25 @@ decontam_fun <- function(data = data, domain = "Bacteria", output = "./decontam_ flog.info(paste("AFTER FILTERING: ",nsamples(data), "samples and", ntaxa(data),"ASVs in otu table") ) flog.info('Writing raw tables.') - write.table(cbind(otu_table(data),"Consensus Lineage" = apply(tax_table(data), 1, paste, collapse = ";"), "sequences"=as.data.frame(refseq(data)) ), paste(output,"/raw_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + if(!is.null(data@tax_table)){ + write.table(cbind(otu_table(data),"Consensus Lineage" = apply(tax_table(data), 1, paste, collapse = ";"), "sequences"=as.data.frame(refseq(data)) ), paste(output,"/raw_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + + }else{ + write.table(cbind(otu_table(data), "sequences"=as.data.frame(refseq(data)) ), paste(output,"/raw_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + + } flog.info('Writing relative tables.') data_rel <- transform_sample_counts(data, function(x) x / sum(x) ) - write.table(cbind(otu_table(data_rel),"Consensus Lineage" = apply(tax_table(data_rel), 1, paste, collapse = ";"), "sequences"=as.data.frame(refseq(data_rel))),paste(output,"/relative_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + if(!is.null(data@tax_table)){ + write.table(cbind(otu_table(data_rel),"Consensus Lineage" = apply(tax_table(data_rel), 1, paste, collapse = ";"), "sequences"=as.data.frame(refseq(data_rel))),paste(output,"/relative_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + + + }else{ + write.table(cbind(otu_table(data_rel), "sequences"=as.data.frame(refseq(data_rel))),paste(output,"/relative_otu-table.csv",sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) + + } + flog.info('Saving R objects.') save(data, file=paste(output,'/robjects.Rdata',sep='')) diff --git a/R/diversity_alpha_fun.R b/R/diversity_alpha_fun.R index f3b64a39fd5d8227ffbcd499f4fac5d0a4d7ae13..a775bb25a2a7ee362f94f0e57dc36a844cab873e 100644 --- a/R/diversity_alpha_fun.R +++ b/R/diversity_alpha_fun.R @@ -15,10 +15,11 @@ alphaPlot <- function(data = data, col1 = "", col2 = "", measures = c("Shannon") p <- plot_richness(data,x=col1, color=col2, measures=measures) } p$layers <- p$layers[-1] + if(col2 != ""){legpos = "right"}else{legpos = "none"} p <- p + ggtitle('Alpha diversity indexes') + geom_boxplot(alpha = 1, outlier.shape = NA) + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust=1), - legend.position = "none",axis.text=element_text(size=15), + legend.position = legpos, axis.text=element_text(size=15), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 18,face="bold"), title=element_text(size=16,face="bold")) @@ -44,7 +45,7 @@ alphaPlotly <- function(data=data, alpha=alpha, col1='', col2='', measures=c("Sh #' #' @param data a phyloseq object (output from decontam or generate_phyloseq) #' @param output Output directory -#' @param column1 Column name of first factor to test (covariable in ANOVA). +#' @param column1 Column name of first factor to test (covariable in ANOVA), or principal factor if only one factor to test. #' @param column2 Column name of second factor to test (last factor in ANOVA). #' @param column3 Column name of subjects in case of repeated mesures (mixed model) #' @param supcovs One or more supplementary covariables to test in anova (provided as comma separated vector) @@ -104,8 +105,8 @@ diversity_alpha_fun <- function(data = data, output = "./plot_div_alpha/", colum #alpha diversité tableau resAlpha = list() + flog.info('Alpha diversity tab ...') - resAlpha$alphatable <- estimate_richness(data, measures = measures ) # row.names(resAlpha$alphatable) <- gsub("X","",row.names(resAlpha$alphatable)) write.table(resAlpha$alphatable,paste(output,'/alphaDiversity_table.csv',sep=''), sep="\t", row.names=TRUE, col.names=NA, quote=FALSE) flog.info('Done.') @@ -115,9 +116,10 @@ diversity_alpha_fun <- function(data = data, output = "./plot_div_alpha/", colum resAlpha$plot = p ggsave(paste(output,'/alpha_diversity.eps',sep=''), plot=p, height = 15, width = 30, units="cm", dpi = 500, device="eps") - - anova_data <- cbind(sample_data(data), resAlpha$alphatable) + alphatable <- estimate_richness(data, measures = measures ) + anova_data <- cbind(sample_data(data), alphatable) anova_data$Depth <- sample_sums(data) + resAlpha$alphatable <- anova_data if(length(levels(as.factor(anova_data[,column1])))>1 & mean(table(anova_data[,column1])) != 1 ){ flog.info('ANOVA ...') # variables <- paste(sep=" + ", "Depth", var1) @@ -163,7 +165,7 @@ diversity_alpha_fun <- function(data = data, output = "./plot_div_alpha/", colum # } flog.info(paste("\n##pvalues of pairwise wilcox test on ", m, "with FDR correction \n"), sep="") - fun <- paste("wilcox_res1 <- pairwise.wilcox.test(anova_data$",m,", anova_data[,column1], p.adjust.method='fdr')", sep="") + fun <- paste("wilcox_res1 <- pairwise.wilcox.test(anova_data$",m,", anova_data[,column1], p.adjust.method='none')", sep="") eval(parse(text = fun)) # print(round(wilcox_res1$p.value,3)) wilcox_col1 <- round(wilcox_res1$p.value,3) @@ -175,7 +177,7 @@ diversity_alpha_fun <- function(data = data, output = "./plot_div_alpha/", colum if(column2 != ''){ flog.info(paste("\n##pvalues of pairwise wilcox test on ", m, "with FDR correction \n"), sep="") - fun <- paste("wilcox_res1 <- pairwise.wilcox.test(anova_data$",m,", anova_data[,column2], p.adjust.method='fdr')", sep="") + fun <- paste("wilcox_res1 <- pairwise.wilcox.test(anova_data$",m,", anova_data[,column2], p.adjust.method='none')", sep="") eval(parse(text = fun)) wilcox_col2_fdr <- round(wilcox_res1$p.value,3) diff --git a/R/diversity_beta_light.R b/R/diversity_beta_light.R index 9f9e53054e772350d2f0c99eb8f61613c56a1a78..dd5987355bfe35ff797d0e0cc29ee5554e598be4 100644 --- a/R/diversity_beta_light.R +++ b/R/diversity_beta_light.R @@ -94,9 +94,18 @@ diversity_beta_light <- function(psobj, rank = "ASV", col = NULL, cov = NULL, di }else{ - p1 <- plot_samples(data_rank, ordinate(data_rank, ord0, dist0), color = col, axes = axes ) + - theme_bw() + ggtitle(glue::glue("{ord0} + {dist0}")) - if(ellipse){p1 <- p1 + stat_ellipse()} + + p1 <- phyloseq::plot_ordination(physeq = data_rank, ordination = ordinate(data_rank, ord0, dist0), axes = axes) + p1$layers[[1]] <- NULL + + sample.id = sample_names(data_rank) + sdata <- sample_data(data_rank) + fact <- as.matrix(sdata[,col]) + p1 <- p1 + aes(color = fact, sample.id = sample.id) + p1 <- p1 + stat_ellipse(aes(group = fact)) + p1 <- p1 + geom_point() + theme_bw() + resBeta$plotly1 <- ggplotly(p1, tooltip=c("x", "y", "sample.id")) %>% config(toImageButtonOptions = list(format = "svg")) + } # plot(p1) flog.info('Plot ok...') diff --git a/R/generate_phyloseq_fun.R b/R/generate_phyloseq_fun.R index fce0935f9a17a355d5f72a9c721a878933bc42aa..02cb34e65065ae411bef2ffd98baefb1dbd693b4 100644 --- a/R/generate_phyloseq_fun.R +++ b/R/generate_phyloseq_fun.R @@ -42,8 +42,16 @@ generate_phyloseq_fun <- function(dada_res = dada_res, tax.table = tax.table, tr flog.info('Done.') if(length(setdiff(colnames(dada_res$otu.table), sampledata$sample.id)) > 0){ - flog.info(setdiff(colnames(dada_res$otu.table), sampledata$sample.id)) - stop("ERROR: number of samples in metadata differ from otu table.") + message("ERROR: sample names in otu table differs from sample metadata.") + print(setdiff(colnames(dada_res$otu.table), sampledata$sample.id)) + stop("Please check metadata table.",call. = FALSE) + } + + if(length(setdiff(sampledata$sample.id, colnames(dada_res$otu.table))) > 0){ + message("WARNING: One or more samples in metadata are not present in otu table:") + print(setdiff(sampledata$sample.id, colnames(dada_res$otu.table))) + + sample.metadata <- sample.metadata[colnames(dada_res$otu.table),] } flog.info("Sequences..") @@ -51,13 +59,31 @@ generate_phyloseq_fun <- function(dada_res = dada_res, tax.table = tax.table, tr flog.debug('Generate MD5 ids...') names(sequences) <- sapply(sequences,digest,algo='md5') - if(is.null(tree)){ - flog.info('Building phyloseq object without tree...') - data <- phyloseq(dada_res$otu.table, tax_table(as.matrix(tax.table)), sample.metadata, DNAStringSet(sequences)) - } else{ - flog.info('Building phyloseq object with tree...') - data <- phyloseq(dada_res$otu.table, tax_table(as.matrix(tax.table)), sample.metadata, phy_tree(tree), DNAStringSet(sequences)) + if(!is.null(tax.table)){ + + if(is.null(tree)){ + flog.info('Building phyloseq object without tree...') + data <- phyloseq(dada_res$otu.table[,row.names(sample.metadata)], tax_table(as.matrix(tax.table)), sample.metadata, DNAStringSet(sequences)) + } else{ + flog.info('Building phyloseq object with tree...') + data <- phyloseq(dada_res$otu.table[,row.names(sample.metadata)], tax_table(as.matrix(tax.table)), sample.metadata, phy_tree(tree), DNAStringSet(sequences)) + } + + }else{ + + if(is.null(tree)){ + flog.info('Building phyloseq object without tree...') + data <- phyloseq(dada_res$otu.table[,row.names(sample.metadata)], sample.metadata, DNAStringSet(sequences)) + } else{ + flog.info('Building phyloseq object with tree...') + data <- phyloseq(dada_res$otu.table[,row.names(sample.metadata)], sample.metadata, phy_tree(tree), DNAStringSet(sequences)) + } + + } + + + flog.info('Done.') data_rel <- transform_sample_counts(data, function(x) x / sum(x) ) diff --git a/man/aggregate_top_taxa.Rd b/man/aggregate_top_taxa.Rd new file mode 100644 index 0000000000000000000000000000000000000000..82707c5f84b8ec70c828d814ff82b241467139be --- /dev/null +++ b/man/aggregate_top_taxa.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bars_fun.R +\name{aggregate_top_taxa} +\alias{aggregate_top_taxa} +\title{aggregate_top_taxa from microbiome package} +\usage{ +aggregate_top_taxa(x, top, level) +} +\arguments{ +\item{x}{phyloseq object} + +\item{top}{Keep the top-n taxa, and merge the rest under the category 'Other'. Instead of top-n numeric this can also be a character vector listing the groups to combine.} + +\item{level}{Summarization level (from ‘rank_names(pseq)’)} +} +\description{ +aggregate_top_taxa from microbiome package +} diff --git a/man/bars_fun.Rd b/man/bars_fun.Rd index 371979201a9b73d5f853fac985e6ce625c33e9e0..44373c9cda4802255c11ed3f61f118a6152b7e3e 100644 --- a/man/bars_fun.Rd +++ b/man/bars_fun.Rd @@ -9,9 +9,11 @@ bars_fun( rank = "Genus", top = 10, Ord1 = NULL, - Fact1 = NULL, + sample_labels = FALSE, split = FALSE, + split_sid_order = FALSE, relative = TRUE, + autoorder = TRUE, ylab = "Abundance", outfile = "plot_compo.html", verbose = TRUE @@ -26,12 +28,14 @@ bars_fun( \item{Ord1}{Variable used to order sample (X axis) or split the barplot if split = TRUE} -\item{Fact1}{Variable used to change X axis tick labels and color (when split = FALSE)} +\item{sample_labels}{If true, x axis labels are sample IDS, if false labels displayed are levels from Ord1 argument. Ignored if split = TRUE (FALSE)} \item{split}{if TRUE make a facet_wrap like grouped by Ord1 (default FALSE)} \item{relative}{Plot relative (TRUE, default) or raw abundance plot (FALSE)} +\item{autoorder}{Automatic ordering xaxis labels based on Ord1 factor levels with gtools::mixedorder function (TRUE).} + \item{ylab}{Y axis title ("Abundance")} \item{outfile}{Output html file.} diff --git a/man/diversity_alpha_fun.Rd b/man/diversity_alpha_fun.Rd index 4f60f8d3b6db8079a37a898271a741811c8aba8c..714642e6bc092d32ebfabb7f8f392979af2cc146 100644 --- a/man/diversity_alpha_fun.Rd +++ b/man/diversity_alpha_fun.Rd @@ -20,7 +20,7 @@ diversity_alpha_fun( \item{output}{Output directory} -\item{column1}{Column name of first factor to test (covariable in ANOVA).} +\item{column1}{Column name of first factor to test (covariable in ANOVA), or principal factor if only one factor to test.} \item{column2}{Column name of second factor to test (last factor in ANOVA).} diff --git a/man/diversity_beta_light.Rd b/man/diversity_beta_light.Rd index 87425ad5ad6dee4957be5cc2bded53235c3484f6..357f044afdb36ba9663cd11363ad7fa8d1cf8a04 100644 --- a/man/diversity_beta_light.Rd +++ b/man/diversity_beta_light.Rd @@ -12,7 +12,10 @@ diversity_beta_light( dist0 = "bray", ord0 = "MDS", output = "./plot_div_beta/", - tests = TRUE + axes = c(1, 2), + tests = TRUE, + verbose = 2, + ellipse = TRUE ) } \arguments{ @@ -30,7 +33,13 @@ diversity_beta_light( \item{output}{The output file directory.} +\item{axes}{Axes to plot (c(1,2))} + \item{tests}{Whether to compute tests or not (TRUE/FALSE)} + +\item{verbose}{Verbose level. (1: quiet, 2: print infos, 3: print infos + debug)} + +\item{ellipse}{Plot ellipse (TRUE)} } \value{ Return specific plots and tests in list and output them in the output directory.