From 551c860bc9747869cc6ce68dfbbfa41c66d4cb70 Mon Sep 17 00:00:00 2001
From: Thomas Renne <thomas.renne@u-psud.fr>
Date: Thu, 30 Apr 2020 11:50:59 +0200
Subject: [PATCH] Add timsTOF parameter analysis script

---
 doc/Rscripts/param_tims_tof.Rmd | 198 ++++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 doc/Rscripts/param_tims_tof.Rmd

diff --git a/doc/Rscripts/param_tims_tof.Rmd b/doc/Rscripts/param_tims_tof.Rmd
new file mode 100644
index 00000000..8001511e
--- /dev/null
+++ b/doc/Rscripts/param_tims_tof.Rmd
@@ -0,0 +1,198 @@
+---
+title: "timsTOF parameters analysis"
+author: "Thomas Renne"
+date: "27/04/2020"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+setwd("~/Documents/params_timsTOF")
+library(ggplot2)
+library(reshape2)
+library(wesanderson)
+library(gridExtra)
+
+data = read.csv("param_tims.csv")
+```
+
+# Resolution
+
+```{r include=FALSE}
+data.reso = subset(data, Changed.parameter == "resolution")
+names(data.reso)[2] <- "reso"
+
+plot.reso.unique <- function(y.val, y.lab, title){
+  ggplot(data.reso, aes(x=reso, y=y.val)) +
+    geom_point(size=2, color="darkcyan") +
+    labs(x="resolution", y=y.lab) +
+    ggtitle(paste(title, "following the resolution parameter"))+
+    theme_grey()
+}
+
+plot.reso.multiple <- function(data, y.lab, legend.title, legend.labels, title){
+  dm = melt(data, id.var=1)
+  
+  ggplot(dm, aes(x=reso, y=value, color=variable)) +
+    geom_point(size=2) +
+    labs(x="resolution", y=y.lab) +
+    scale_color_manual(name=legend.title,
+                       labels=legend.labels,
+                       values=wes_palette(name="Darjeeling1")) +
+    ggtitle(paste(title, "following the resolution parameter"))+
+    theme_grey()
+}
+```
+
+## Information
+
+- **Number of threads** : 5  
+- **Resolution** : 10 000, 20 000, 30 000, 40 000, 50 000  
+- **Smooth width** : 2.0  
+- **Integration width** : 4  
+- **Intensity threshold** : 10.0  
+
+  
+
+## Graphs
+
+```{r graphs, echo=FALSE}
+########### Time ###########
+plot.reso.unique(data.reso$time, "time (min)", "Execution time")
+
+########### groups ###########
+plot.reso.unique(data.reso$nb_groups, "number of groups", "Number of groups")
+
+########### subgroups ###########
+plot.reso.unique(data.reso$nb_subgroups, "number of subgroups", "Number of subgroups")
+
+########### proteins ###########
+plot.reso.unique(data.reso$nb_proteins, "number of proteins", "Number of proteins")
+
+########### peptides ###########
+plot.reso.unique(data.reso$nb_peptides, "number of peptides", "Number of peptides")
+
+########### fdr ###########
+plot.reso.multiple(data.reso[, c(2, 8:10)], "% of FDR", "FDRs", c("PSM", "Peptides", "Proteins"), "Different FDRs")
+
+########### mass precision ###########
+plot.reso.multiple(abs(data.reso[, c(2, 11:13)]), "mass precision", "Statistical description", c("abs(mean)", "abs(median)", "sd"), "Mass-precision description")
+
+########### Total spectra used ###########
+plot.reso.unique(data.reso$total_spectra_used, "Number of spectra used", "Total spectra used")
+
+########### Total assigned ###########
+plot.reso.multiple(data.reso[, c(2, 15:16)], "number", "total assigned", c("total spectra assigned", "total unique assigned"), "Total of spectra and unique assigned")
+
+########### Percent assigned ###########
+plot.reso.unique(data.reso$percent_assignement, "assignment in %", "Percentage of assignment")
+
+```
+
+# Smooth width
+
+```{r include=FALSE}
+data.smooth = subset(data, Changed.parameter == "smooth_width")
+names(data.smooth)[2] <- "smooth"
+
+plot.smooth.unique <- function(data, y.val, y.lab, title){
+  ggplot(data, aes(x=smooth, y=y.val)) +
+    geom_point(size=2, color="darkcyan") +
+    labs(x="smooth width", y=y.lab) +
+    ggtitle(paste(title, "following the smooth-width parameter"))+
+    theme_grey()
+}
+
+plot.smooth.multiple <- function(data, y.lab, legend.title, legend.labels, title){
+  dm = melt(data, id.var=1)
+  
+  ggplot(dm, aes(x=smooth, y=value, color=variable)) +
+    geom_point(size=2) +
+    labs(x="smooth width", y=y.lab) +
+    scale_color_manual(name=legend.title,
+                       labels=legend.labels,
+                       values=wes_palette(name="Darjeeling1")) +
+    ggtitle(paste(title, "following the smooth-width parameter"))+
+    theme_grey()
+}
+```
+
+## Information
+
+- **Number of threads** : 5  
+- **Resolution** : 40 000
+- **Smooth width** : 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 10, 20, 50, 100, 200, 500
+- **Integration width** : 4  
+- **Intensity threshold** : 10.0  
+
+  
+
+## Graphs
+
+```{r graphs_smooth, echo=FALSE}
+########### Time ###########
+plot.smooth.unique(data.smooth, data.smooth$time, "time (min)", "Execution time")
+
+########### groups ###########
+plot.smooth.unique(data.smooth, data.smooth$nb_groups, "number of groups", "Number of groups")
+
+########### subgroups ###########
+plot.smooth.unique(data.smooth, data.smooth$nb_subgroups, "number of subgroups", "Number of subgroups")
+
+########### proteins ###########
+plot.smooth.unique(data.smooth, data.smooth$nb_proteins, "number of proteins", "Number of proteins")
+
+########### peptides ###########
+plot.smooth.unique(data.smooth, data.smooth$nb_peptides, "number of peptides", "Number of peptides")
+
+########### fdr ###########
+plot.smooth.multiple(data.smooth[, c(2, 8:10)], "% of FDR", "FDRs", c("PSM", "Peptides", "Proteins"), "Different FDRs")
+
+########### mass precision ###########
+plot.smooth.multiple(abs(data.smooth[, c(2, 11:13)]), "mass precision", "Statistical description", c("abs(mean)", "sd", "abs(median)"), "Mass-precision description")
+
+########### Total spectra used ###########
+plot.smooth.unique(data.smooth, data.smooth$total_spectra_used, "Number of spectra used", "Total spectra used")
+
+########### Total assigned ###########
+plot.smooth.multiple(data.smooth[, c(2, 15:16)], "number", "total assigned", c("total spectra assigned", "total unique assigned"), "Total of spectra and unique assigned")
+
+########### Percent assigned ###########
+plot.smooth.unique(data.smooth, data.smooth$percent_assignement, "assignment in %", "Percentage of assignment")
+
+```
+
+## Focused analysis smooth-width [1:50]
+
+```{r graphs_smooth_focused, echo=FALSE}
+data.smooth_f = data.smooth[1:10,]
+########### Time ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$time, "time (min)", "Execution time")
+
+########### groups ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$nb_groups, "number of groups", "Number of groups")
+
+########### subgroups ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$nb_subgroups, "number of subgroups", "Number of subgroups")
+
+########### proteins ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$nb_proteins, "number of proteins", "Number of proteins")
+
+########### peptides ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$nb_peptides, "number of peptides", "Number of peptides")
+
+########### fdr ###########
+plot.smooth.multiple(data.smooth_f[, c(2, 8:10)], "% of FDR", "FDRs", c("PSM", "Peptides", "Proteins"), "Different FDRs")
+
+########### mass precision ###########
+plot.smooth.multiple(abs(data.smooth_f[, c(2, 11:13)]), "mass precision", "Statistical description", c("abs(mean)", "sd", "abs(median)"), "Mass-precision description")
+
+########### Total spectra used ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$total_spectra_used, "Number of spectra used", "Total spectra used")
+
+########### Total assigned ###########
+plot.smooth.multiple(data.smooth_f[, c(2, 15:16)], "number", "total assigned", c("total spectra assigned", "total unique assigned"), "Total of spectra and unique assigned")
+
+########### Percent assigned ###########
+plot.smooth.unique(data.smooth_f, data.smooth_f$percent_assignement, "assignment in %", "Percentage of assignment")
+
+```
\ No newline at end of file
-- 
GitLab