Commit 46666453 authored by Olivier Rue's avatar Olivier Rue
Browse files

real samples

parent 00af4935
Pipeline #48275 passed with stage
in 16 seconds
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -20,7 +20,7 @@ output:
<script src="https://cdnjs.cloudflare.com/ajax/libs/selectize.js/0.12.0/js/standalone/selectize.min.js"></script>
```{r setup, include=FALSE}
knitr::opts_chunk$set(eval=FALSE, echo =T, cache = FALSE, message = FALSE, warning = FALSE, cache.lazy = FALSE, fig.height = 3.5, fig.width = 10.5)
knitr::opts_chunk$set(eval=FALSE, echo =F, cache = FALSE, message = FALSE, warning = FALSE, cache.lazy = FALSE, fig.height = 3.5, fig.width = 10.5)
```
```{r, setup1, echo=FALSE, message = FALSE, warning = FALSE, eval=TRUE}
......@@ -115,14 +115,11 @@ raw_data <- read.csv2("data/metadata_real_reads.tsv" , sep = "\t", stringsAsFact
```{r, eval=T}
dir <- paste("REAL",eco, sep="_")
ecos <- c("MEAT")
```{r, eval=T, echo=F}
ecos <- c("MEAT","WINE","CHEESE")
markers <- c("ITS1","ITS2","D1D2","RPB2")
for(eco in ecos){
dir <- paste("REAL",eco, sep="_")
for(marker in markers){
#if (!file.exists(file.path(dir,paste0(marker,".rds")))) {
biomfile <- file.path(dir,paste0(marker,".biom"))
......@@ -172,1336 +169,58 @@ for(eco in ecos){
}
```
# Sequencing depth {.tabset}
## MEAT {.tabset}
### ITS1
```{r, eval=T}
physeq <- readRDS("REAL_MEAT/ITS1.rds")
physeq_final <- readRDS("REAL_MEAT/ITS1_final.rds")
physeq_all <- merge_phyloseq(physeq, physeq_final)
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_bar(physeq_all, x="Type") + facet_grid(~SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
```
### ITS2
```{r, eval=T}
physeq <- readRDS("REAL_MEAT/ITS2.rds")
physeq_final <- readRDS("REAL_MEAT/ITS2_final.rds")
physeq_all <- merge_phyloseq(physeq, physeq_final)
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_bar(physeq_all, x="Type") + facet_grid(~SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
```
### D1D2
```{r, eval=T}
physeq <- readRDS("REAL_MEAT/D1D2.rds")
physeq_final <- readRDS("REAL_MEAT/D1D2_final.rds")
physeq_all <- merge_phyloseq(physeq, physeq_final)
p <- plot_composition(physeq, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_composition(physeq_final, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_bar(physeq_all, x="Type") + facet_grid(~SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
```
### RPB2
```{r, eval=T}
physeq <- readRDS("REAL_MEAT/RPB2.rds")
physeq_final <- readRDS("REAL_MEAT/RPB2_final.rds")
physeq_all <- merge_phyloseq(physeq, physeq_final)
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
p <- plot_bar(physeq_all, x="Type") + facet_grid(~SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p
```
```{r}
#if (!file.exists(file.path(dir,paste0(marker,".rds")))) {
biomfile <- file.path(dir,paste0(marker,".biom"))
physeq <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq) <- gsub(tax_table(physeq), pattern = "[a-z]__", replacement = "")
metadata <- read.table(file.path(dir,"metadata.tsv"), row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq) <- metadata
sample_data(physeq)$Marker <- marker
sample_data(physeq)$Type <- "Raw"
sample_data(physeq)$Reads <- raw_data %>% filter(Ecosystem == eco) %>% filter(Marker == marker) %>% pull(Reads)
sample_names(physeq) <- glue::glue(paste("{sample_names(physeq)}",marker,"RAW",sep="_"))
saveRDS(physeq,file.path(dir,paste0(marker,".rds")))
if(eco == "MEAT"){
if(marker == "ITS1") physeq_final <- curation_meat_its1(physeq)
if(marker == "ITS2") physeq_final <- curation_meat_its2(physeq)
if(marker == "D1D2") physeq_final <- curation_meat_d1d2(physeq)
if(marker == "RPB2") physeq_final <- curation_meat_rpb2(physeq)
}else if(eco == "CHEESE"){
if(marker == "ITS1") physeq_final <- curation_cheese_its1(physeq)
if(marker == "ITS2") physeq_final <- curation_cheese_its2(physeq)
if(marker == "D1D2") physeq_final <- curation_cheese_d1d2(physeq)
if(marker == "RPB2") physeq_final <- curation_cheese_rpb2(physeq)
}else if(eco == "BREAD"){
if(marker == "ITS1") physeq_final <- curation_bread_its1(physeq)
if(marker == "ITS2") physeq_final <- curation_bread_its2(physeq)
if(marker == "D1D2") physeq_final <- curation_bread_d1d2(physeq)
if(marker == "RPB2") physeq_final <- curation_bread_rpb2(physeq)
}else if(eco == "WINE"){
if(marker == "ITS1") physeq_final <- curation_wine_its1(physeq)
if(marker == "ITS2") physeq_final <- curation_wine_its2(physeq)
if(marker == "D1D2") physeq_final <- curation_wine_d1d2(physeq)
if(marker == "RPB2") physeq_final <- curation_wine_rpb2(physeq)
}
if(marker == "ITS1"){
physeq_final <- curation_meat_its1(physeq)
```{r, eval=T, results="asis"}
ecos <- c("MEAT","CHEESE","WINE")#,"WINE","CHEESE")
markers <- c("ITS1","ITS2","D1D2","RPB2")
for(eco in ecos){
dir <- paste("REAL",eco, sep="_")
cat("\n## ",eco," {.tabset}\n\n")
for(marker in markers){
cat("\n### ", marker, "\n\n")
physeq <- readRDS(file.path(dir,paste0(marker,".rds")))
physeq_final <- readRDS(file.path(dir,paste0(marker,"_final.rds")))
taxa_names(physeq) <- paste(taxa_names(physeq), "2", sep = "_")
physeq_all <- merge_phyloseq(physeq_final, physeq)
cat("\n**Abundances** \n")
p <- plot_bar(physeq_all, x="Type") + facet_grid(~SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
print(p)
cat(" \n")
cat("\n**Compositions** \n")
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Family", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
print(p)
cat("\n")
p <- plot_composition(physeq_all, "Kingdom", "Fungi", "Genus", x = "Type") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
facet_grid(~ SampleID)
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
print(p)
cat("\n")
cat("\n**Alpha-diversity** \n")
p <- plot_richness(physeq = physeq_all, x="SampleID", color = "Type", shape = NULL, title = "Alpha diversity graphics", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson"))
p$data$Type <- factor(p$data$Type, levels = c("Raw","Curated"))
p <- p + geom_boxplot() + NULL
print(p)
cat("\n")
#cat("\n#### beta-diversity \n")
#beta.dist <- distance(physeq_all, method = "bray")
#ord <- ordinate(physeq_all, method = "MDS", distance = beta.dist)
#p <- plot_ordination(physeq = physeq_all, ordination = ord, type = "samples", axes = c(1, 2), color = "SampleID", shape = NULL, label = NULL, title = "Samples ordination graphic, bray-curtis distance")
#p <- p + theme_bw()
#print(p)
#cat("\n")
}
sample_data(physeq_final)$Type <- "Curated"
sample_data(physeq_final)$Reads <- sample_sums(physeq_final)
sample_names(physeq_final) <- glue::glue(paste("{sample_data(physeq)$Sample}",marker,"CURATED", sep="_"))
saveRDS(physeq_final,file.path(dir,paste0(marker,"_final.rds")))
#} else {
# physeq_its1 <- readRDS("REAL_MEAT/ITS1.rds")
#}
#if (!file.exists("REAL_MEAT/ITS1_final.rds")) {
physeq_its1_final <- curation_meat_its1(physeq_its1)
sample_data(physeq_its1_final)$Type <- "Curated"
sample_data(physeq_its1_final)$Reads <- sample_sums(physeq_its1_final)
sample_names(physeq_its1_final) <- glue::glue(paste("{sample_data(physeq_its1)$Sample}","ITS1","CURATED", sep="_"))
saveRDS(physeq_its1_final,"REAL_MEAT/ITS1_final.rds")
#} else {
# physeq_its1_final <- readRDS("REAL_MEAT/ITS1_final.rds")
#}
#if (!file.exists("REAL_MEAT/ITS2.rds")) {
biomfile <- "REAL_MEAT/ITS2.biom"
physeq_its2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_its2) <- gsub(tax_table(physeq_its2), pattern = "[a-z]__", replacement = "")
metadata <- read.table("REAL_MEAT/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_its2) <- metadata
sample_data(physeq_its2)$Marker <- "ITS2"
sample_data(physeq_its2)$Type <- "Raw"
sample_data(physeq_its2)$Reads <- raw_data %>% filter(Ecosystem == "MEAT") %>% filter(Marker == "ITS2") %>% pull(Reads)
sample_names(physeq_its2) <- glue::glue(paste("{sample_names(physeq_its2)}","ITS2", sep="_"))
saveRDS(physeq_its2,"REAL_MEAT/ITS2.rds")
#} else {
#physeq_its2 <- readRDS("REAL_MEAT/ITS2.rds")
#}
#if (!file.exists("REAL_MEAT/ITS2_final.rds")) {
physeq_its2_final <- curation_meat_its2(physeq_its2)
sample_data(physeq_its2_final)$Type <- "Curated"
sample_data(physeq_its2_final)$Reads <- sample_sums(physeq_its2_final)
sample_names(physeq_its2_final) <- glue::glue(paste("{sample_data(physeq_its2)$Sample}","ITS2","CURATED", sep="_"))
saveRDS(physeq_its2_final,"REAL_MEAT/ITS2_final.rds")
#} else {
# physeq_its2_final <- readRDS("REAL_MEAT/ITS2_final.rds")
#}
#if (!file.exists("REAL_MEAT/D1D2.rds")) {
biomfile <- "REAL_MEAT/D1D2.biom"
physeq_d1d2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_d1d2) <- gsub(tax_table(physeq_d1d2), pattern = "[a-z]__", replacement = "")
tax_table(physeq_d1d2) <- gsub(tax_table(physeq_d1d2), pattern = "Eukaryota", replacement = "Fungi")
metadata <- read.table("REAL_MEAT/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_d1d2) <- metadata
sample_data(physeq_d1d2)$Marker <- "D1D2"
sample_data(physeq_d1d2)$Type <- "Raw"
sample_data(physeq_d1d2)$Reads <- raw_data %>% filter(Ecosystem == "MEAT") %>% filter(Marker == "D1D2") %>% pull(Reads)
sample_names(physeq_d1d2) <- glue::glue(paste("{sample_names(physeq_d1d2)}","D1D2", sep="_"))
saveRDS(physeq_d1d2,"REAL_MEAT/D1D2.rds")
#} else {
# physeq_d1d2 <- readRDS("REAL_MEAT/D1D2.rds")
#}
#if (!file.exists("REAL_MEAT/RPB2.rds")) {
biomfile <- "REAL_MEAT/RPB2.biom"
physeq_rpb2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_rpb2) <- gsub(tax_table(physeq_rpb2), pattern = "[a-z]__", replacement = "")
metadata <- read.table("REAL_MEAT/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_rpb2) <- metadata
sample_data(physeq_rpb2)$Marker <- "RPB2"
sample_data(physeq_rpb2)$Type <- "Raw"
sample_data(physeq_rpb2)$Reads <- raw_data %>% filter(Ecosystem == "MEAT") %>% filter(Marker == "RPB2") %>% pull(Reads)
sample_names(physeq_rpb2) <- glue::glue(paste("{sample_names(physeq_rpb2)}","RPB2", sep="_"))
saveRDS(physeq_rpb2,"REAL_MEAT/RPB2.rds")
#} else {
# physeq_rpb2 <- readRDS("REAL_MEAT/RPB2.rds")
#}
```
```{r, eval=T}
#if (!file.exists("REAL_MEAT/meat.rds")){
meat_all <- merge_phyloseq(physeq_its1, physeq_its2, physeq_d1d2, physeq_rpb2)
sample_data(meat_all)$Marker <- factor(sample_data(meat_all)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
saveRDS(meat_all,"REAL_MEAT/meat.rds")
#}else{
# meat_all <- readRDS("REAL_MEAT/meat.rds")
#}
```
### Sequencing depth {.tabset}
```{r, eval=T}
df <- sample_data(meat_all) %>% as("data.frame") %>%
as_tibble(rownames = "SampleID") %>%
mutate(Final = sample_sums(meat_all)) %>%
rename(Initial = Reads)
ggplot(df %>% pivot_longer(cols = c(Initial, Final), names_to = "Step", values_to = "Reads"),
aes(x = SampleID, y = Reads, fill=Step)) +
geom_bar(stat='identity', position='identity') +
facet_grid(~Marker, scales = "free_x") +
scale_fill_brewer(palette = "Reds", direction = -1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
### Compositions
```{r, eval=T}
p <- plot_composition(physeq = meat_all, taxaRank1 = "Kingdom", taxaSet1 = "Fungi", taxaRank2 = "Species", numberOfTaxa = 22, x = "Sample")
p + facet_grid(". ~ Marker", scales = "free_x", space = "free")
```
### Richness
```{r, eval=T}
p <- plot_richness(physeq = meat_all, x = "Marker", color = "Marker", shape = NULL, title = "Alpha diversity graphics", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson"))
p + geom_boxplot() + NULL
```
### $\beta$ diversity
```{r, eval=T}
beta.dist <- distance(meat_all, method = "bray")
ord <- ordinate(meat_all, method = "MDS", distance = beta.dist)
p <- plot_ordination(physeq = meat_all, ordination = ord, type = "samples", axes = c(1, 2), color = "Marker", shape = NULL, label = NULL, title = "Samples ordination graphic, bray-curtis distance")
p <- p + stat_ellipse(aes_string(group = "Marker"))
p + theme_bw()
```
### Common families
```{r, eval=T}
x <- list(
ITS1 = as_tibble(tax_table(physeq_its1))$Family %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2))$Family %>% as.vector(),
D1D2 = as_tibble(tax_table(physeq_d1d2))$Family %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2))$Family %>% as.vector()
)
ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
```
### Common genus
```{r, eval=T}
x <- list(
ITS1 = as_tibble(tax_table(physeq_its1))$Genus %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2))$Genus %>% as.vector(),
D1D2 = as_tibble(tax_table(physeq_d1d2))$Genus %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2))$Genus %>% as.vector()
)
ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
```
## Manual curation {.tabset}
```{r, eval=T}
physeq_its1_final <- curation_meat_its1(physeq_its1)
sample_data(physeq_its1_final)$Type <- "Curated"
sample_data(physeq_its1_final)$Reads <- sample_sums(physeq_its1_final)
sample_names(physeq_its1_final) <- glue::glue(paste("{sample_data(physeq_its1)$Sample}","ITS1","CURATED", sep="_"))
toto <- merge_phyloseq(physeq_its1, physeq_its1_final)
p <- plot_composition(physeq = toto, taxaRank1 = "Kingdom", taxaSet1 = "Fungi", taxaRank2 = "Species", numberOfTaxa = 22, x = "Sample")
p <- p + facet_grid(". ~ Sample", scales = "free_x", space = "free")
p + theme(legend.position="bottom")
p
saveRDS(physeq_its1_final,"REAL_MEAT/ITS1_final.rds")
physeq_its2_final <- curation_meat_its2(physeq_its2)
saveRDS(physeq_its2_final,"REAL_MEAT/ITS2_final.rds")
physeq_d1d2_final <- curation_meat_d1d2(physeq_d1d2)
saveRDS(physeq_d1d2_final,"REAL_MEAT/D1D2_final.rds")
physeq_rpb2_final <- curation_meat_rpb2(physeq_rpb2)
saveRDS(physeq_rpb2_final,"REAL_MEAT/RPB2_final.rds")
```
```{r, eval=T}
#if (!file.exists("REAL_MEAT/meat_final.rds")){
meat_all_final <- merge_phyloseq(physeq_its1_final, physeq_its2_final, physeq_d1d2_final, physeq_rpb2_final)
sample_data(meat_all_final)$Marker <- factor(sample_data(meat_all_final)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
saveRDS(meat_all_final,"REAL_MEAT/meat_final.rds")
#}else{
# meat_all_final <- readRDS("REAL_MEAT/meat_final.rds")
#}
```
### Sequencing depth {.tabset}
```{r, eval=T}
df <- sample_data(meat_all_final) %>% as("data.frame") %>%
as_tibble(rownames = "SampleID") %>%
mutate(Final = sample_sums(meat_all_final)) %>%
rename(Initial = Reads)
ggplot(df %>% pivot_longer(cols = c(Initial, Final), names_to = "Step", values_to = "Reads"),
aes(x = SampleID, y = Reads, fill=Step)) +
geom_bar(stat='identity', position='identity') +
facet_grid(~Marker, scales = "free_x") +
scale_fill_brewer(palette = "Reds", direction = -1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
### Compositions
```{r, eval=T}
p <- plot_composition(physeq = meat_all_final, taxaRank1 = "Kingdom", taxaSet1 = "Fungi", taxaRank2 = "Species", numberOfTaxa = 22, x = "Sample")
p <- p + facet_grid(". ~ Marker", scales = "free_x", space = "free")
p + theme(legend.position="bottom")
```
### Richness
```{r, eval=T}
p <- plot_richness(physeq = meat_all_final, x = "Marker", color = "Marker", shape = NULL, title = "Alpha diversity graphics", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson"))
p + geom_boxplot() + NULL
```
### $\beta$ diversity
```{r, eval=T}
beta.dist <- distance(meat_all_final, method = "bray")
ord <- ordinate(meat_all_final, method = "MDS", distance = beta.dist)
p <- plot_ordination(physeq = meat_all_final, ordination = ord, type = "samples", axes = c(1, 2), color = "Marker", shape = NULL, label = NULL, title = "Samples ordination graphic, bray-curtis distance")
p <- p + stat_ellipse(aes_string(group = "Marker"))
p + theme_bw()
```
### Common families
```{r, eval=T}
x <- list(
ITS1 = as_tibble(tax_table(physeq_its1_final))$Family %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2_final))$Family %>% as.vector(),
D1D2 = as_tibble(tax_table(physeq_d1d2_final))$Family %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2_final))$Family %>% as.vector()
)
ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
```
### Common genus
```{r, eval=T}
x <- list(
ITS1 = as_tibble(tax_table(physeq_its1_final))$Genus %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2_final))$Genus %>% as.vector(),
D1D2 = as_tibble(tax_table(physeq_d1d2_final))$Genus %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2_final))$Genus %>% as.vector()
)
ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
```
# CHEESE
```{bash, eval=F}
# Genologin
cd /work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/scripts
sh dada2_frogs.sh
```
```{bash, eval=F}
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS1/affiliation.biom REAL_CHEESE/ITS1.biom
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS2/affiliation.biom REAL_CHEESE/ITS2.biom
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/D1D2/affiliation.biom REAL_CHEESE/D1D2.biom
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/affiliation.biom REAL_CHEESE/RPB2.biom
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS1/affiliation.tsv REAL_CHEESE/ITS1.tsv
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS2/affiliation.tsv REAL_CHEESE/ITS2.tsv
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/D1D2/affiliation.tsv REAL_CHEESE/D1D2.tsv
scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/affiliation.tsv REAL_CHEESE/RPB2.tsv
#scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS1/multiaff.tsv REAL_CHEESE/ITS1_multiaff.tsv
#scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/ITS2/multiaff.tsv REAL_CHEESE/ITS2_multiaff.tsv
#scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/D1D2/multiaff.tsv REAL_CHEESE/D1D2_multiaff.tsv
#scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/multiaff.tsv REAL_CHEESE/RPB2_multiaff.tsv
```
```{bash}
### GET DATA FROM GENOTOUL
scp orue@genotoul.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/affiliation.biom .
scp orue@genotoul.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/filters.fasta .
##########################
otu_filters.py --input-biom affiliation.biom --input-fasta filters.fasta --contaminant /db/frogs_databanks/assignation/Unite_Fungi_8.2_20200204/Unite_Fungi_8.2_20200204.fasta --output-biom 1.biom --output-fasta 1.fasta --nb-cpus 4
otu_filters.py --input-biom 1.biom --input-fasta 1.fasta --contaminant /db/frogs_databanks/assignation/SILVA_132_LSU/SILVA_132_LSU.fasta --nb-cpus 4 --output-biom 2.biom --output-fasta 2.fasta
otu_filters.py --input-biom 2.biom --input-fasta 2.fasta --contaminant /db/frogs_databanks/assignation/silva_138_SSU/silva_138_SSU.fasta --nb-cpus 4 --output-biom 3.biom --output-fasta 3.fasta
otu_filters.py --input-biom 3.biom --input-fasta 3.fasta --contaminant /db/frogs_databanks/assignation/Unite_Euka_8.2_20200204/Unite_Euka_8.2_20200204.fasta --output-biom 4.biom --output-fasta 4.fasta --nb-cpus 4
biom_to_tsv.py --input-biom 4.biom --output-tsv 4.tsv
head -n 1 4.tsv > 4-2.tsv
sed "s/no data/NA;NA;NA;NA;NA;NA;NA/" 4.tsv |sed "s/no data/0/g" | grep 'FROGS_combined' >> 4-2.tsv
tsv_to_biom.py --input-tsv 4-2.tsv --output-biom 4-2.biom
affiliation_filters.py --input-fasta 4.fasta --input-biom 4-2.biom --min-blast-identity 0.1 --delete
biom_to_tsv.py --input-biom affiliation-filtered.biom --output-tsv affiliation-filtered.tsv
affiliation_OTU.py --input-fasta affiliation-filtered.fasta --input-biom affiliation-filtered.biom --nb-cpus 8 --output-biom affiliation.biom --summary affiliation.html --reference ~/work/D1D2.fasta
biom_to_tsv.py --input-biom affiliation.biom --output-tsv affiliation.tsv
head -n 1 4.tsv > final.tsv
awk -F'\t' '{ if ($4 < 80 && $5 > 95 && length($8 > 400)) { print } }' affiliation.tsv >> final.tsv
awk -F'\t' '{print $8}' <(grep -v "^#" final.tsv) > to_keep.lst
head -n 1 4.tsv > affiliation-filtered-final.tsv
grep -f to_keep.lst affiliation-filtered.tsv >> affiliation-filtered-final.tsv
# pour avoir les séquences à rajouter à to_remove :
grep -v -f to_keep.lst affiliation.tsv | grep -v "^#" | awk -F'\t' '{print $8}'
```
## Analysis of raw BIOM files {.tabset}
```{r, eval=T}
if (!file.exists("REAL_CHEESE/ITS1.rds")) {
biomfile <- "REAL_CHEESE/ITS1.biom"
physeq_its1 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_its1) <- gsub(tax_table(physeq_its1), pattern = "[a-z]__", replacement = "")
metadata <- read.table("REAL_CHEESE/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_its1) <- metadata
sample_data(physeq_its1)$Marker <- "ITS1"
sample_data(physeq_its1)$Reads <- raw_data %>% filter(Ecosystem == "CHEESE") %>% filter(Marker == "ITS1") %>% pull(Reads)
sample_names(physeq_its1) <- glue::glue(paste("{sample_names(physeq_its1)}","ITS1", sep="_"))
saveRDS(physeq_its1,"REAL_CHEESE/ITS1.rds")
} else {
physeq_its1 <- readRDS("REAL_CHEESE/ITS1.rds")
}
if (!file.exists("REAL_CHEESE/ITS2.rds")) {
biomfile <- "REAL_CHEESE/ITS2.biom"
physeq_its2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_its2) <- gsub(tax_table(physeq_its2), pattern = "[a-z]__", replacement = "")
metadata <- read.table("REAL_CHEESE/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_its2) <- metadata
sample_data(physeq_its2)$Marker <- "ITS2"
sample_data(physeq_its2)$Reads <- raw_data %>% filter(Ecosystem == "CHEESE") %>% filter(Marker == "ITS2") %>% pull(Reads)
sample_names(physeq_its2) <- glue::glue(paste("{sample_names(physeq_its2)}","ITS2", sep="_"))
saveRDS(physeq_its2,"REAL_CHEESE/ITS2.rds")
} else {
physeq_its2 <- readRDS("REAL_CHEESE/ITS2.rds")
}
if (!file.exists("REAL_CHEESE/D1D2.rds")) {
biomfile <- "REAL_CHEESE/D1D2.biom"
physeq_d1d2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_d1d2) <- gsub(tax_table(physeq_d1d2), pattern = "[a-z]__", replacement = "")
tax_table(physeq_d1d2) <- gsub(tax_table(physeq_d1d2), pattern = "Eukaryota", replacement = "Fungi")
metadata <- read.table("REAL_CHEESE/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_d1d2) <- metadata
sample_data(physeq_d1d2)$Marker <- "D1D2"
sample_data(physeq_d1d2)$Reads <- raw_data %>% filter(Ecosystem == "CHEESE") %>% filter(Marker == "D1D2") %>% pull(Reads)
sample_names(physeq_d1d2) <- glue::glue(paste("{sample_names(physeq_d1d2)}","D1D2", sep="_"))
saveRDS(physeq_d1d2,"REAL_CHEESE/D1D2.rds")
} else {
physeq_d1d2 <- readRDS("REAL_CHEESE/D1D2.rds")
}
if (!file.exists("REAL_CHEESE/RPB2.rds")) {
biomfile <- "REAL_CHEESE/RPB2.biom"
physeq_rpb2 <- import_frogs(biomfile, taxMethod = "blast")
tax_table(physeq_rpb2) <- gsub(tax_table(physeq_rpb2), pattern = "[a-z]__", replacement = "")
metadata <- read.table("REAL_CHEESE/metadata.tsv", row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
sample_data(physeq_rpb2) <- metadata
sample_data(physeq_rpb2)$Marker <- "RPB2"
sample_data(physeq_rpb2)$Reads <- raw_data %>% filter(Ecosystem == "CHEESE") %>% filter(Marker == "RPB2") %>% pull(Reads)
sample_names(physeq_rpb2) <- glue::glue(paste("{sample_names(physeq_rpb2)}","RPB2", sep="_"))
saveRDS(physeq_rpb2,"REAL_CHEESE/RPB2.rds")
} else {
physeq_rpb2 <- readRDS("REAL_CHEESE/RPB2.rds")
}
```
```{r, eval=T}
if (!file.exists("REAL_CHEESE/cheese.rds")){
cheese_all <- merge_phyloseq(physeq_its1, physeq_its2, physeq_d1d2, physeq_rpb2)
sample_data(cheese_all)$Marker <- factor(sample_data(cheese_all)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
saveRDS(cheese_all,"REAL_CHEESE/cheese.rds")
}else{
cheese_all <- readRDS("REAL_CHEESE/cheese.rds")
}
```
### Sequencing depth {.tabset}
```{r, eval=T}
df <- sample_data(cheese_all) %>% as("data.frame") %>%
as_tibble(rownames = "SampleID") %>%
mutate(Final = sample_sums(cheese_all)) %>%
rename(Initial = Reads)
ggplot(df %>% pivot_longer(cols = c(Initial, Final), names_to = "Step", values_to = "Reads"),
aes(x = SampleID, y = Reads, fill=Step)) +
geom_bar(stat='identity', position='identity') +
facet_grid(~Marker, scales = "free_x") +
scale_fill_brewer(palette = "Reds", direction = -1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))