Commit 13ee9457 authored by Olivier Rue's avatar Olivier Rue
Browse files

curation

parent 1477125a
Pipeline #45011 passed with stage
in 24 seconds
This diff is collapsed.
...@@ -85,12 +85,15 @@ otu_filters.py --input-biom affiliation.biom --input-fasta filters.fasta --conta ...@@ -85,12 +85,15 @@ otu_filters.py --input-biom affiliation.biom --input-fasta filters.fasta --conta
otu_filters.py --input-biom 1.biom --input-fasta 1.fasta --contaminant /db/frogs_databanks/assignation/SILVA_132_LSU/SILVA_132_LSU.fasta --nb-cpus 4 --output-biom 2.biom --output-fasta 2.fasta otu_filters.py --input-biom 1.biom --input-fasta 1.fasta --contaminant /db/frogs_databanks/assignation/SILVA_132_LSU/SILVA_132_LSU.fasta --nb-cpus 4 --output-biom 2.biom --output-fasta 2.fasta
otu_filters.py --input-biom 2.biom --input-fasta 2.fasta --contaminant /db/frogs_databanks/assignation/silva_138_SSU/silva_138_SSU.fasta --nb-cpus 4 --output-biom 3.biom --output-fasta 3.fasta otu_filters.py --input-biom 2.biom --input-fasta 2.fasta --contaminant /db/frogs_databanks/assignation/silva_138_SSU/silva_138_SSU.fasta --nb-cpus 4 --output-biom 3.biom --output-fasta 3.fasta
otu_filters.py --input-biom 3.biom --input-fasta 3.fasta --contaminant /db/frogs_databanks/assignation/Unite_Euka_8.2_20200204/Unite_Euka_8.2_20200204.fasta --output-biom 4.biom --output-fasta 4.fasta --nb-cpus 4 otu_filters.py --input-biom 3.biom --input-fasta 3.fasta --contaminant /db/frogs_databanks/assignation/Unite_Euka_8.2_20200204/Unite_Euka_8.2_20200204.fasta --output-biom 4.biom --output-fasta 4.fasta --nb-cpus 4
biom_to_tsv.py --input-biom 4.biom --output-tsv 4.tsv
biom_to_tsv.py --input-biom 4.biom --output-tsv 4.tsv --input-fasta 4.fasta
head -n 1 4.tsv > 4-2.tsv head -n 1 4.tsv > 4-2.tsv
sed "s/no data/NA;NA;NA;NA;NA;NA;NA/" 4.tsv |sed "s/no data/0/g" | grep 'FROGS_combined' >> 4-2.tsv sed "s/no data/NA;NA;NA;NA;NA;NA;NA/" 4.tsv |sed "s/no data/0/g" | grep 'FROGS_combined' >> 4-2.tsv
tsv_to_biom.py --input-tsv 4-2.tsv --output-biom 4-2.biom tsv_to_biom.py --input-tsv 4-2.tsv --output-biom 4-2.biom --output-fasta 4-2.fasta
affiliation_filters.py --input-fasta 4.fasta --input-biom 4-2.biom --min-blast-identity 0.1 --delete
affiliation_filters.py --input-fasta 4-2.fasta --input-biom 4-2.biom --min-blast-identity 0.1 --delete
biom_to_tsv.py --input-biom affiliation-filtered.biom --output-tsv affiliation-filtered.tsv biom_to_tsv.py --input-biom affiliation-filtered.biom --output-tsv affiliation-filtered.tsv
affiliation_OTU.py --input-fasta affiliation-filtered.fasta --input-biom affiliation-filtered.biom --nb-cpus 8 --output-biom affiliation.biom --summary affiliation.html --reference D1D2.fasta affiliation_OTU.py --input-fasta affiliation-filtered.fasta --input-biom affiliation-filtered.biom --nb-cpus 8 --output-biom affiliation.biom --summary affiliation.html --reference D1D2.fasta
biom_to_tsv.py --input-biom affiliation.biom --output-tsv affiliation.tsv biom_to_tsv.py --input-biom affiliation.biom --output-tsv affiliation.tsv
...@@ -254,29 +257,37 @@ ggVennDiagram(x,label_alpha = 0) + ...@@ -254,29 +257,37 @@ ggVennDiagram(x,label_alpha = 0) +
## Manual curation {.tabset} ## Manual curation {.tabset}
```{r, eval=F}
tax_table(physeq_its1) <- t ```{r, eval=T}
keptTaxa <- setdiff(taxa_names(physeq_its1), to_remove) physeq_its1_final <- curation_meat_its1(physeq_its1)
physeq_its1_final <- prune_taxa(keptTaxa, physeq_its1)
saveRDS(physeq_its1_final,"REAL_MEAT/ITS1_final.rds") saveRDS(physeq_its1_final,"REAL_MEAT/ITS1_final.rds")
physeq_its2_final <- curation_meat_its2(physeq_its2)
saveRDS(physeq_its2_final,"REAL_MEAT/ITS2_final.rds")
physeq_d1d2_final <- curation_meat_d1d2(physeq_d1d2)
saveRDS(physeq_d1d2_final,"REAL_MEAT/D1D2_final.rds")
physeq_rpb2_final <- curation_meat_rpb2(physeq_rpb2)
saveRDS(physeq_rpb2_final,"REAL_MEAT/RPB2_final.rds")
``` ```
```{r, eval=F} ```{r, eval=T}
if (!file.exists("REAL_MEAT/meat_final.rds")){ #if (!file.exists("REAL_MEAT/meat_final.rds")){
meat_all_final <- merge_phyloseq(physeq_its1_final, physeq_its2, physeq_d1d2, physeq_rpb2) meat_all_final <- merge_phyloseq(physeq_its1_final, physeq_its2_final, physeq_d1d2_final, physeq_rpb2_final)
sample_data(meat_all_final)$Marker <- factor(sample_data(meat_all_final)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2")) sample_data(meat_all_final)$Marker <- factor(sample_data(meat_all_final)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
saveRDS(meat_all_final,"REAL_MEAT/meat_final.rds") saveRDS(meat_all_final,"REAL_CHEESE/meat_final.rds")
}else{ #}else{
meat_all_final <- readRDS("REAL_MEAT/meat_final.rds") # meat_all_final <- readRDS("REAL_MEAT/meat_final.rds")
} #}
``` ```
### Sequencing depth {.tabset} ### Sequencing depth {.tabset}
```{r, eval=F} ```{r, eval=T}
df <- sample_data(meat_all_final) %>% as("data.frame") %>% df <- sample_data(meat_all_final) %>% as("data.frame") %>%
as_tibble(rownames = "SampleID") %>% as_tibble(rownames = "SampleID") %>%
mutate(Final = sample_sums(meat_all_final)) %>% mutate(Final = sample_sums(meat_all_final)) %>%
...@@ -293,7 +304,7 @@ ggplot(df %>% pivot_longer(cols = c(Initial, Final), names_to = "Step", values_t ...@@ -293,7 +304,7 @@ ggplot(df %>% pivot_longer(cols = c(Initial, Final), names_to = "Step", values_t
### Compositions ### Compositions
```{r, eval=F} ```{r, eval=T}
p <- plot_composition(physeq = meat_all_final, taxaRank1 = "Kingdom", taxaSet1 = "Fungi", taxaRank2 = "Species", numberOfTaxa = 22, x = "Sample") p <- plot_composition(physeq = meat_all_final, taxaRank1 = "Kingdom", taxaSet1 = "Fungi", taxaRank2 = "Species", numberOfTaxa = 22, x = "Sample")
p + facet_grid(". ~ Marker", scales = "free_x", space = "free") p + facet_grid(". ~ Marker", scales = "free_x", space = "free")
...@@ -301,14 +312,14 @@ p + facet_grid(". ~ Marker", scales = "free_x", space = "free") ...@@ -301,14 +312,14 @@ p + facet_grid(". ~ Marker", scales = "free_x", space = "free")
### Richness ### Richness
```{r, eval=F} ```{r, eval=T}
p <- plot_richness(physeq = meat_all_final, x = "Marker", color = "Marker", shape = NULL, title = "Alpha diversity graphics", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")) p <- plot_richness(physeq = meat_all_final, x = "Marker", color = "Marker", shape = NULL, title = "Alpha diversity graphics", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"))
p + geom_boxplot() + NULL p + geom_boxplot() + NULL
``` ```
### $\beta$ diversity ### $\beta$ diversity
```{r, eval=F} ```{r, eval=T}
beta.dist <- distance(meat_all_final, method = "bray") beta.dist <- distance(meat_all_final, method = "bray")
ord <- ordinate(meat_all_final, method = "MDS", distance = beta.dist) ord <- ordinate(meat_all_final, method = "MDS", distance = beta.dist)
p <- plot_ordination(physeq = meat_all_final, ordination = ord, type = "samples", axes = c(1, 2), color = "Marker", shape = NULL, label = NULL, title = "Samples ordination graphic, bray-curtis distance") p <- plot_ordination(physeq = meat_all_final, ordination = ord, type = "samples", axes = c(1, 2), color = "Marker", shape = NULL, label = NULL, title = "Samples ordination graphic, bray-curtis distance")
...@@ -318,7 +329,7 @@ p + theme_bw() ...@@ -318,7 +329,7 @@ p + theme_bw()
### Common families ### Common families
```{r, eval=F} ```{r, eval=T}
x <- list( x <- list(
ITS1 = as_tibble(tax_table(physeq_its1_final))$Family %>% as.vector(), ITS1 = as_tibble(tax_table(physeq_its1_final))$Family %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2_final))$Family %>% as.vector(), ITS2 = as_tibble(tax_table(physeq_its2_final))$Family %>% as.vector(),
...@@ -331,7 +342,7 @@ ggVennDiagram(x,label_alpha = 0) + ...@@ -331,7 +342,7 @@ ggVennDiagram(x,label_alpha = 0) +
### Common genus ### Common genus
```{r, eval=F} ```{r, eval=T}
x <- list( x <- list(
ITS1 = as_tibble(tax_table(physeq_its1_final))$Genus %>% as.vector(), ITS1 = as_tibble(tax_table(physeq_its1_final))$Genus %>% as.vector(),
ITS2 = as_tibble(tax_table(physeq_its2_final))$Genus %>% as.vector(), ITS2 = as_tibble(tax_table(physeq_its2_final))$Genus %>% as.vector(),
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
<meta name="author" content="Olivier Rué" /> <meta name="author" content="Olivier Rué" />
<meta name="date" content="2021-11-16" /> <meta name="date" content="2021-11-22" />
<title>Real samples analysis</title> <title>Real samples analysis</title>
...@@ -254,7 +254,7 @@ div.tocify { ...@@ -254,7 +254,7 @@ div.tocify {
<h1 class="title toc-ignore">Real samples analysis</h1> <h1 class="title toc-ignore">Real samples analysis</h1>
<h4 class="author">Olivier Rué</h4> <h4 class="author">Olivier Rué</h4>
<h4 class="date">2021-11-16</h4> <h4 class="date">2021-11-22</h4>
</div> </div>
...@@ -432,17 +432,24 @@ ggVennDiagram(x,label_alpha = 0) + ...@@ -432,17 +432,24 @@ ggVennDiagram(x,label_alpha = 0) +
</div> </div>
<div id="manual-curation" class="section level2 tabset"> <div id="manual-curation" class="section level2 tabset">
<h2>Manual curation</h2> <h2>Manual curation</h2>
<pre class="r"><code>tax_table(physeq_its1) &lt;- t <pre class="r"><code>physeq_its1_final &lt;- curation_meat_its1(physeq_its1)
keptTaxa &lt;- setdiff(taxa_names(physeq_its1), to_remove) saveRDS(physeq_its1_final,&quot;REAL_MEAT/ITS1_final.rds&quot;)
physeq_its1_final &lt;- prune_taxa(keptTaxa, physeq_its1)
saveRDS(physeq_its1_final,&quot;REAL_MEAT/ITS1_final.rds&quot;)</code></pre> physeq_its2_final &lt;- curation_meat_its2(physeq_its2)
<pre class="r"><code>if (!file.exists(&quot;REAL_MEAT/meat_final.rds&quot;)){ saveRDS(physeq_its2_final,&quot;REAL_MEAT/ITS2_final.rds&quot;)
meat_all_final &lt;- merge_phyloseq(physeq_its1_final, physeq_its2, physeq_d1d2, physeq_rpb2)
sample_data(meat_all_final)$Marker &lt;- factor(sample_data(meat_all_final)$Marker, levels=c(&quot;ITS1&quot;,&quot;ITS2&quot;,&quot;D1D2&quot;,&quot;RPB2&quot;)) physeq_d1d2_final &lt;- curation_meat_d1d2(physeq_d1d2)
saveRDS(meat_all_final,&quot;REAL_MEAT/meat_final.rds&quot;) saveRDS(physeq_d1d2_final,&quot;REAL_MEAT/D1D2_final.rds&quot;)
}else{
meat_all_final &lt;- readRDS(&quot;REAL_MEAT/meat_final.rds&quot;) physeq_rpb2_final &lt;- curation_meat_rpb2(physeq_rpb2)
}</code></pre> saveRDS(physeq_rpb2_final,&quot;REAL_MEAT/RPB2_final.rds&quot;)</code></pre>
<pre class="r"><code>#if (!file.exists(&quot;REAL_MEAT/meat_final.rds&quot;)){
meat_all_final &lt;- merge_phyloseq(physeq_its1_final, physeq_its2_final, physeq_d1d2_final, physeq_rpb2_final)
sample_data(meat_all_final)$Marker &lt;- factor(sample_data(meat_all_final)$Marker, levels=c(&quot;ITS1&quot;,&quot;ITS2&quot;,&quot;D1D2&quot;,&quot;RPB2&quot;))
saveRDS(meat_all_final,&quot;REAL_CHEESE/meat_final.rds&quot;)
#}else{
# meat_all_final &lt;- readRDS(&quot;REAL_MEAT/meat_final.rds&quot;)
#}</code></pre>
<div id="sequencing-depth-1" class="section level3 tabset"> <div id="sequencing-depth-1" class="section level3 tabset">
<h3>Sequencing depth</h3> <h3>Sequencing depth</h3>
<pre class="r"><code>df &lt;- sample_data(meat_all_final) %&gt;% as(&quot;data.frame&quot;) %&gt;% <pre class="r"><code>df &lt;- sample_data(meat_all_final) %&gt;% as(&quot;data.frame&quot;) %&gt;%
...@@ -455,16 +462,19 @@ ggplot(df %&gt;% pivot_longer(cols = c(Initial, Final), names_to = &quot;Step&qu ...@@ -455,16 +462,19 @@ ggplot(df %&gt;% pivot_longer(cols = c(Initial, Final), names_to = &quot;Step&qu
facet_grid(~Marker, scales = &quot;free_x&quot;) + facet_grid(~Marker, scales = &quot;free_x&quot;) +
scale_fill_brewer(palette = &quot;Reds&quot;, direction = -1) + scale_fill_brewer(palette = &quot;Reds&quot;, direction = -1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))</code></pre> theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))</code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-16-1.png" width="1008" /></p>
</div> </div>
<div id="compositions-1" class="section level3"> <div id="compositions-1" class="section level3">
<h3>Compositions</h3> <h3>Compositions</h3>
<pre class="r"><code>p &lt;- plot_composition(physeq = meat_all_final, taxaRank1 = &quot;Kingdom&quot;, taxaSet1 = &quot;Fungi&quot;, taxaRank2 = &quot;Species&quot;, numberOfTaxa = 22, x = &quot;Sample&quot;) <pre class="r"><code>p &lt;- plot_composition(physeq = meat_all_final, taxaRank1 = &quot;Kingdom&quot;, taxaSet1 = &quot;Fungi&quot;, taxaRank2 = &quot;Species&quot;, numberOfTaxa = 22, x = &quot;Sample&quot;)
p + facet_grid(&quot;. ~ Marker&quot;, scales = &quot;free_x&quot;, space = &quot;free&quot;)</code></pre> p + facet_grid(&quot;. ~ Marker&quot;, scales = &quot;free_x&quot;, space = &quot;free&quot;)</code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-17-1.png" width="1008" /></p>
</div> </div>
<div id="richness-1" class="section level3"> <div id="richness-1" class="section level3">
<h3>Richness</h3> <h3>Richness</h3>
<pre class="r"><code>p &lt;- plot_richness(physeq = meat_all_final, x = &quot;Marker&quot;, color = &quot;Marker&quot;, shape = NULL, title = &quot;Alpha diversity graphics&quot;, measures = c(&quot;Observed&quot;, &quot;Chao1&quot;, &quot;ACE&quot;, &quot;Shannon&quot;, &quot;Simpson&quot;, &quot;InvSimpson&quot;, &quot;Fisher&quot;)) <pre class="r"><code>p &lt;- plot_richness(physeq = meat_all_final, x = &quot;Marker&quot;, color = &quot;Marker&quot;, shape = NULL, title = &quot;Alpha diversity graphics&quot;, measures = c(&quot;Observed&quot;, &quot;Chao1&quot;, &quot;ACE&quot;, &quot;Shannon&quot;, &quot;Simpson&quot;, &quot;InvSimpson&quot;, &quot;Fisher&quot;))
p + geom_boxplot() + NULL</code></pre> p + geom_boxplot() + NULL</code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-18-1.png" width="1008" /></p>
</div> </div>
<div id="beta-diversity-1" class="section level3"> <div id="beta-diversity-1" class="section level3">
<h3><span class="math inline">\(\beta\)</span> diversity</h3> <h3><span class="math inline">\(\beta\)</span> diversity</h3>
...@@ -473,6 +483,7 @@ ord &lt;- ordinate(meat_all_final, method = &quot;MDS&quot;, distance = beta.dis ...@@ -473,6 +483,7 @@ ord &lt;- ordinate(meat_all_final, method = &quot;MDS&quot;, distance = beta.dis
p &lt;- plot_ordination(physeq = meat_all_final, ordination = ord, type = &quot;samples&quot;, axes = c(1, 2), color = &quot;Marker&quot;, shape = NULL, label = NULL, title = &quot;Samples ordination graphic, bray-curtis distance&quot;) p &lt;- plot_ordination(physeq = meat_all_final, ordination = ord, type = &quot;samples&quot;, axes = c(1, 2), color = &quot;Marker&quot;, shape = NULL, label = NULL, title = &quot;Samples ordination graphic, bray-curtis distance&quot;)
p &lt;- p + stat_ellipse(aes_string(group = &quot;Marker&quot;)) p &lt;- p + stat_ellipse(aes_string(group = &quot;Marker&quot;))
p + theme_bw()</code></pre> p + theme_bw()</code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-19-1.png" width="1008" /></p>
</div> </div>
<div id="common-families-1" class="section level3"> <div id="common-families-1" class="section level3">
<h3>Common families</h3> <h3>Common families</h3>
...@@ -484,6 +495,7 @@ p + theme_bw()</code></pre> ...@@ -484,6 +495,7 @@ p + theme_bw()</code></pre>
) )
ggVennDiagram(x,label_alpha = 0) + ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = &quot;#F4FAFE&quot;, high = &quot;#4981BF&quot;) </code></pre> scale_fill_gradient(low = &quot;#F4FAFE&quot;, high = &quot;#4981BF&quot;) </code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-20-1.png" width="1008" /></p>
</div> </div>
<div id="common-genus-1" class="section level3"> <div id="common-genus-1" class="section level3">
<h3>Common genus</h3> <h3>Common genus</h3>
...@@ -495,6 +507,7 @@ ggVennDiagram(x,label_alpha = 0) + ...@@ -495,6 +507,7 @@ ggVennDiagram(x,label_alpha = 0) +
) )
ggVennDiagram(x,label_alpha = 0) + ggVennDiagram(x,label_alpha = 0) +
scale_fill_gradient(low = &quot;#F4FAFE&quot;, high = &quot;#4981BF&quot;) </code></pre> scale_fill_gradient(low = &quot;#F4FAFE&quot;, high = &quot;#4981BF&quot;) </code></pre>
<p><img src="real_samples_files/figure-html/unnamed-chunk-21-1.png" width="1008" /></p>
</div> </div>
</div> </div>
</div> </div>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment