Commit 92462db4 authored by Olivier Rue's avatar Olivier Rue
Browse files

amplicon comparison

parent 76548b16
---
title: "Amplicon comparison (ITS1/ITS2/D1D1/RPB2)"
description: |
METABARFOOD project.
author: Olivier Rué
date: '`r Sys.Date()`'
output:
html_document:
self_contained: false
number_sections: FALSE
code_folding: "show"
toc: true
toc_depth: 5
toc_float: true
---
<link href="https://cdnjs.cloudflare.com/ajax/libs/noUiSlider/7.0.10/jquery.nouislider.min.css" rel="stylesheet"/>
<script src="https://cdnjs.cloudflare.com/ajax/libs/noUiSlider/7.0.10/jquery.nouislider.min.js"></script>
<link href="https://cdnjs.cloudflare.com/ajax/libs/selectize.js/0.12.0/css/selectize.bootstrap3.min.css" rel="stylesheet" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/selectize.js/0.12.0/js/standalone/selectize.min.js"></script>
```{r setup, include=FALSE}
knitr::opts_chunk$set(eval=FALSE, echo =FALSE, cache = FALSE, message = FALSE, warning = FALSE, cache.lazy = FALSE, fig.height = 3.5, fig.width = 10.5)
```
```{r, setup1, echo=FALSE, message = FALSE, warning = FALSE, eval=TRUE}
library(DT)
library(ggpubr)
library(kableExtra)
library(seqinr)
library(gridExtra)
library(phyloseq)
library(phyloseq.extended)
library(tidyverse)
library(plotly)
library(qiime2R)
library(tidyverse)
library(ComplexHeatmap)
library(phyloseqCompanion)
library(circlize)
```
```{r datatable global options, echo=FALSE, eval=TRUE}
options(DT.options = list(pageLength = 10,
scrollX = TRUE,
language = list(search = 'Filter:'),
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
```
```{r, eval=TRUE}
source("functions.R")
metabarfood <- TRUE
amplicons <- c("ITS1","ITS2", "D1D2", "RPB2")
types <- c("ADN","PCR")
absent_color <- "#F6CFB8"
tools <- c("DADA2_FROGS", "FROGS", "USEARCH", "DADA2-pe", "DADA2-se", "QIIME-se", "QIIME-pe")
#tools <- c("DADA2_FROGS")
dedicated_palette <- c("#cccc00", "#8eadac", "#000000", "#56B4E9", "#8a6479", "#8a8964", "#ad8e9f", "#adac8e")
method_palette <- setNames(
object = dedicated_palette,
nm = c("EXPECTED", "FROGS", "DADA2_FROGS", "USEARCH", "DADA2-se", "QIIME-se", "DADA2-pe", "QIIME-pe")
)
dedicated_palette_wo_real <- c("#8eadac", "#000000", "#56B4E9", "#8a6479", "#8a8964", "#ad8e9f", "#adac8e")
method_palette_wo_real <- setNames(
object = dedicated_palette_wo_real,
nm = c("FROGS", "DADA2_FROGS", "USEARCH", "DADA2-se", "QIIME-se", "DADA2-pe", "QIIME-pe")
)
```
# Amplicon comparison
## MEAT {.tabset}
```{r, eval=TRUE}
sp <- "MEAT"
```
```{r, fig.height=10, results="asis", eval=TRUE}
for(tool in tools){
cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
colnames(submat) <- paste0(its," ",type)
colnames(submat_presence) <- paste0(its," ",type)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
}
}
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
# draw(l_presence, row_title = "Expected species",
# column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
# Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color)),
# Legend(at = 2, title = "", labels=c("Not well reconstructed"), legend_gp = gpar(fill="#b1c5c4")),
# Legend(at = 3, title = "", labels=c("Perfect"), legend_gp = gpar(fill="#86a5a4"))
# )
# )
# cat(" \n")
}
```
## CHEESE {.tabset}
```{r, eval=TRUE}
sp <- "CHEESE"
```
```{r, fig.height=10, results="asis", eval=TRUE}
for(tool in tools){
cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
colnames(submat) <- paste0(its," ",type)
colnames(submat_presence) <- paste0(its," ",type)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
}
}
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
# draw(l_presence, row_title = "Expected species",
# column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
# Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color)),
# Legend(at = 2, title = "", labels=c("Not well reconstructed"), legend_gp = gpar(fill="#b1c5c4")),
# Legend(at = 3, title = "", labels=c("Perfect"), legend_gp = gpar(fill="#86a5a4"))
# )
# )
# cat(" \n")
}
```
## BREAD {.tabset}
```{r, eval=TRUE}
sp <- "BREAD"
```
```{r, fig.height=10, results="asis", eval=TRUE}
for(tool in tools){
cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
colnames(submat) <- paste0(its," ",type)
colnames(submat_presence) <- paste0(its," ",type)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
}
}
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
# draw(l_presence, row_title = "Expected species",
# column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
# Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color)),
# Legend(at = 2, title = "", labels=c("Not well reconstructed"), legend_gp = gpar(fill="#b1c5c4")),
# Legend(at = 3, title = "", labels=c("Perfect"), legend_gp = gpar(fill="#86a5a4"))
# )
# )
# cat(" \n")
}
```
## WINE {.tabset}
```{r, eval=TRUE}
sp <- "WINE"
```
```{r, fig.height=10, results="asis", eval=TRUE}
for(tool in tools){
cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
colnames(submat) <- paste0(its," ",type)
colnames(submat_presence) <- paste0(its," ",type)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
}
}
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
# draw(l_presence, row_title = "Expected species",
# column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
# Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color)),
# Legend(at = 2, title = "", labels=c("Not well reconstructed"), legend_gp = gpar(fill="#b1c5c4")),
# Legend(at = 3, title = "", labels=c("Perfect"), legend_gp = gpar(fill="#86a5a4"))
# )
# )
# cat(" \n")
}
```
```{r, fig.height=10, results="asis", eval=F}
create_frogs_object <- function(path, marker, type, file, multifile){
biom <- file.path(path,marker,type,file)
physeq <- import_frogs(biom, taxMethod = "blast")
metadata_file <- file.path(path,marker,type,"metadata.tsv")
metadata <- read.table(metadata_file, row.names=1, header=TRUE, sep="\t", stringsAsFactors = FALSE)
sample_data(physeq) <- metadata
multiaff_file <- file.path(path,marker,type,multifile)
physeq <- update_frogs_physeq_with_multi(physeq, multiaff_file)
physeq = transform_sample_counts(physeq, function(x) x / sum(x) )
sample_names(physeq) <- glue::glue(paste("{sample_names(physeq)}","frogs",marker, sep="_"))
sample_data(physeq)$method <- "DADA2_FROGS"
sample_data(physeq)$Type <- type
sample_data(physeq)$Marker <- marker
physeq_aglom <- phyloseq.extended::fast_tax_glom(physeq, taxrank = "Species")
#physeq_aglom <- prune_taxa(tax_table(physeq_aglom)[,7] %in% references, physeq_aglom)
taxa_names(physeq_aglom) <- cbind(tax_table(physeq_aglom)[,7])
return(physeq_aglom)
}
update_frogs_physeq_with_multi <- function(physeq, multiaff_file){
infos <- readr::read_tsv(file = multiaff_file) %>%
as_tibble() %>%
distinct(`#observation_name`, .keep_all = T) %>%
select(`#observation_name`, blast_taxonomy) %>%
separate(blast_taxonomy, ";(?=[\\S])", into = c("Kingdom","Phylum","Class","Order","Family","Genus","Species")) %>%
#mutate(across(Kingdom:Species, ~ stringr::str_remove(.x, ".:"))) %>%
tibble::column_to_rownames(var = "#observation_name") %>%
as("matrix")
good <- tax_table(subset_taxa(physeq, Species != "Multi-affiliation" & Genus != "Multi-affiliation" & Family != "Multi-affiliation" & Order != "Multi-affiliation" & Class != "Multi-affiliation" & Phylum != "Multi-affiliation" & Kingdom != "Multi-affiliation" )) %>% as("matrix")
#na <- tax_table(subset_taxa(physeq, is.na(Species))) %>% as("matrix")
all_infos <- unique(rbind(infos,good))#,na) # case where two species are identical but are multi-affiliated
tax_table(physeq) <- all_infos
return(physeq)
}
keep_only_interest_species <- function(physeq, true_species){
tax_tab <- tax_table(physeq) %>% as("matrix")
tax_tab <- cbind(tax_tab[ , 1:7],
Status = ifelse(tax_tab[ , "Species"] %in% true_species, "true species", "artefact"))
tax_table(physeq) <- tax_tab
return(physeq)
}
path <- "MEAT/ITS1/ADN"
metadata_file <- file.path(path,"metadata.tsv")
path <- "MEAT"
metadata <- read.table(metadata_file, row.names=1, header=TRUE, sep="\t", stringsAsFactors = FALSE)
real_biom <- file.path("MEAT/ITS1/ADN/real.biom")
real <- create_phyloseq_object(real_biom, "real", metadata, "blast")
taxa_names(real) <- cbind(tax_table(real)[,7])
true_species <- as.character(tax_table(real)[ , "Species"])
tax_tab <- tax_table(real) %>% as("matrix")
tax_tab <- cbind(tax_tab[ , 1:7],
Status = ifelse(tax_tab[ , "Species"] %in% true_species, "true species", "artefact"))
tax_table(real) <- tax_tab
biomfile <- "dada2_frogs.biom"
multiafffile <- "dada2_frogs_multiaff.tsv"
frogs_its1_adn <- create_frogs_object(path,"ITS1", "ADN", biomfile, multiafffile)
frogs_its1_adn <- keep_only_interest_species(frogs_its1_adn, true_species)
frogs_its1_pcr <- create_frogs_object(path,"ITS1", "PCR", biomfile, multiafffile)
frogs_its1_pcr <- keep_only_interest_species(frogs_its1_pcr, true_species)
frogs_its2_adn <- create_frogs_object(path,"ITS2", "ADN", biomfile, multiafffile)
frogs_its2_adn <- keep_only_interest_species(frogs_its2_adn, true_species)
frogs_its2_pcr <- create_frogs_object(path,"ITS2", "PCR", biomfile, multiafffile)
frogs_its2_pcr <- keep_only_interest_species(frogs_its2_pcr, true_species)
frogs_d1d2_adn <- create_frogs_object(path,"D1D2", "ADN", biomfile, multiafffile)
frogs_d1d2_adn <- keep_only_interest_species(frogs_d1d2_adn, true_species)
frogs_d1d2_pcr <- create_frogs_object(path,"D1D2", "PCR", biomfile, multiafffile)
frogs_d1d2_pcr <- keep_only_interest_species(frogs_d1d2_pcr, true_species)
frogs_rpb2_adn <- create_frogs_object(path,"RPB2", "ADN", biomfile, multiafffile)
frogs_rpb2_adn <- keep_only_interest_species(frogs_rpb2_adn, true_species)
frogs_rpb2_pcr <- create_frogs_object(path,"RPB2", "PCR", biomfile, multiafffile)
frogs_rpb2_pcr <- keep_only_interest_species(frogs_rpb2_pcr, true_species)
sample_data(frogs_its1_adn)$Type <- "DNA"
sample_data(frogs_its2_adn)$Type <- "DNA"
sample_data(frogs_d1d2_adn)$Type <- "DNA"
sample_data(frogs_rpb2_adn)$Type <- "DNA"
all <- merge_phyloseq(real, frogs_its1_adn, frogs_its1_pcr, frogs_its2_adn, frogs_its2_pcr, frogs_d1d2_adn, frogs_d1d2_pcr, frogs_rpb2_adn, frogs_rpb2_pcr)
sample_data(all)$Type <- factor(sample_data(all)$Type, levels=c("DNA","PCR"))
sample_data(all)$Marker <- factor(sample_data(all)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
Method <- get_variable(all, "method")
p <- plot_heatmap(
all,
"NMDS", "bray", "Type", taxa.label = "Species",
low="#66CCFF", high="#000033", na.value="white", title = paste0("Relative abundances with ",tool), sample.order = row.names(sample_data(all)), taxa.order = sort(true_species, decreasing=TRUE))
p <- p + facet_grid(~Marker, scales = "free") + guides(fill=guide_legend(title="White: 0"))
cat("\n")
print(p)
cat("\n")
```
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="author" content="Olivier Rué" />
<meta name="date" content="2021-09-22" />
<title>Amplicon comparison (ITS1/ITS2/D1D1/RPB2)</title>
<script src="amplicon_comparison_files/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="amplicon_comparison_files/bootstrap-3.3.5/css/bootstrap.min.css" rel="stylesheet" />
<script src="amplicon_comparison_files/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="amplicon_comparison_files/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="amplicon_comparison_files/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
<script src="amplicon_comparison_files/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="amplicon_comparison_files/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="amplicon_comparison_files/tocify-1.9.1/jquery.tocify.js"></script>
<script src="amplicon_comparison_files/navigation-1.1/tabsets.js"></script>
<script src="amplicon_comparison_files/navigation-1.1/codefolding.js"></script>
<link href="amplicon_comparison_files/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="amplicon_comparison_files/highlightjs-9.12.0/highlight.js"></script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
pre code {
padding: 0;
}
</style>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "&#xe258;";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
.code-folding-btn { margin-bottom: 4px; }
</style>
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
@media print {
.toc-content {