Commit f1ec2546 authored by Olivier Rue's avatar Olivier Rue
Browse files

curation its1 cheese

parent 5a4e2bde
Pipeline #42623 passed with stage
in 13 seconds
......@@ -38,7 +38,7 @@ library(tidyverse)
library(ComplexHeatmap)
library(phyloseqCompanion)
library(circlize)
library(ggVennDiagram)
#library(ggVennDiagram)
library(ggplot2)
```
......@@ -215,8 +215,8 @@ x <- list(
D1D2 = as_tibble(tax_table(physeq_d1d2))$Genus %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2))$Genus %>% as.vector()
)
ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
#ggVennDiagram(x,label_alpha = 0) +
# scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
......@@ -248,6 +248,32 @@ scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABA
#scp orue@genologin.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/multiaff.tsv REAL_CHEESE/RPB2_multiaff.tsv
```
```{bash}
### GET DATA FROM GENOTOUL
scp orue@genotoul.toulouse.inra.fr:/work/frogsfungi/FungiPubli/Real_mock/METABARFOOD/CHEESE_REAL/results/DADA2_FROGS/RPB2/filters.[bf]* .
##########################
otu_filters.py --input-biom filters.biom --input-fasta filters.fasta --contaminant /db/frogs_databanks/assignation/Unite_Fungi_8.2_20200204/Unite_Fungi_8.2_20200204.fasta --output-biom 1.biom --output-fasta 1.fasta --nb-cpus 4
otu_filters.py --input-biom 1.biom --input-fasta 1.fasta --contaminant /db/frogs_databanks/assignation/SILVA_132_LSU/SILVA_132_LSU.fasta --nb-cpus 4 --output-biom 2.biom --output-fasta 2.fasta
otu_filters.py --input-biom 2.biom --input-fasta 2.fasta --contaminant /db/frogs_databanks/assignation/silva_138_SSU/silva_138_SSU.fasta --nb-cpus 4 --output-biom 3.biom --output-fasta 3.fasta
otu_filters.py --input-biom 3.biom --input-fasta 3.fasta --contaminant /db/frogs_databanks/assignation/Unite_Euka_8.2_20200204/Unite_Euka_8.2_20200204.fasta --output-biom 4.biom --output-fasta 4.fasta --nb-cpus 4
biom_to_tsv.py --input-biom 4.biom --output-tsv 4.tsv
head -n 1 4.tsv > 4-2.tsv
sed "s/no data/NA;NA;NA;NA;NA;NA;NA/" 4.tsv |sed "s/no data/0/g" | grep 'FROGS_combined' >> 4-2.tsv
tsv_to_biom.py --input-tsv 4-2.tsv --output-biom 4-2.biom
affiliation_filters.py --input-fasta 4.fasta --input-biom 4-2.biom --min-blast-identity 0.1 --delete
biom_to_tsv.py --input-biom affiliation-filtered.biom --output-tsv affiliation-filtered.tsv
affiliation_OTU.py --input-fasta affiliation-filtered.fasta --input-biom affiliation-filtered.biom --nb-cpus 8 --output-biom affiliation.biom --summary affiliation.html --reference D1D2.fasta
biom_to_tsv.py --input-biom affiliation.biom --output-tsv affiliation.tsv
head -n 1 4.tsv > final.tsv
awk -F'\t' '{ if ($4 < 80 && $5 > 95 && length($8 > 400)) { print } }' affiliation.tsv >> final.tsv
awk -F'\t' '{print $8}' <(grep -v "^#" final.tsv) > to_keep.lst
head -n 1 4.tsv > affiliation-filtered-final.tsv
grep -f to_keep.lst affiliation-filtered.tsv >> affiliation-filtered-final.tsv
```
## Analysis of raw BIOM files {.tabset}
......@@ -306,8 +332,43 @@ if (!file.exists("REAL_CHEESE/RPB2.rds")) {
}
```
## Manual curation
```{r, eval=T}
all <- merge_phyloseq(physeq_its1, physeq_its2, physeq_d1d2, physeq_rpb2)
### ITS1
### Déamrche : tous les Meyerozyma guilliermondii controlés puis éliminés car chimères, pour toutes les séquences abondantes (>200 reads) et dont le %id ou %cov sont <95% check par blast et alignement (contre ASVs proches d'un point de vue taxo mais plus abondantes)
t <- tax_table(physeq_its1)
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAAAAATATTTGTGAATTTACCACAACAAACAAAAATACTATAGTCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Genus"] <- "Galactomyces"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAAAAATATTTGTGAATTTACCACAACAAACAAAAATACTATAGTCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Species"] <- "Galactomyces_sp"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAACAAACATCAATCATACAATCAATAATTAAAAAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Species"] <- "Geotrichum candidum"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAACAAACAAAAATACTATAGTCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Genus"] <- "Galactomyces"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAACAAACAAAAATACTATAGTCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Species"] <- "Galactomyces_sp"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAGCAAACAAAAATCATACAATCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Genus"] <- "Galactomyces"
t["AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAGCAAACAAAAATCATACAATCAAAACAAAAATAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC","Species"] <- "Galactomyces_sp"
to_remove <- c("AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAACGGATCTCTTGGTTCTC_FROGS_combined",
"AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTT_FROGS_combined",
"AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAAATTTCAACAAACAACATCAATTTTATAGTCTATTATTCTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC_FROGS_combined",
"AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAACAAACAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATGAATTAATAATATTTGTGAATTTACCACAACAAACATCAATTTTATAGTCTATTATTTTTAATTAAAACTTTTAACAATGGATCTCTTGGTTCTC_FROGS_combined",
"AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATTGATTTTAGAATACTCTTGTGGATATTTAATTACATTAACTCATCATATCTTTTAATTATAACTATCAACAACGGATCTCTTGGCTCTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTATTGATTTTAGAATACTCTTGTGGATATTTAATTACATTAAACTCATCATATCATTTTAATTATAACTATCAACAACGGATCTCTTGGCTCTC_FROGS_combined")
tax_table(physeq_its1) <- t
keptTaxa <- setdiff(taxa_names(physeq_its1), to_remove)
physeq_its1_final <- prune_taxa(goodTaxa, physeq_its1)
saveRDS(physeq_its1_final,"REAL_CHEESE/ITS1_final.rds")
```
```{r, eval=T}
all <- merge_phyloseq(physeq_its1_final, physeq_its2, physeq_d1d2, physeq_rpb2)
sample_data(all)$Marker <- factor(sample_data(all)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
......@@ -336,7 +397,7 @@ x <- list(
D1D2 = as_tibble(tax_table(physeq_d1d2))$Genus %>% as.vector(),
RPB2 = as_tibble(tax_table(physeq_rpb2))$Genus %>% as.vector()
)
ggVennDiagram(x, show_intersect = T)
#ggVennDiagram(x, show_intersect = T)
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment