... | ... | @@ -106,7 +106,7 @@ qsub -q smpq -l mem=24G,h_vmem=512G megahit.sh |
|
|
|
|
|
**Exemple pour un lot**
|
|
|
```
|
|
|
PATH/prokka --outdir PROKKA_Sammple --prefix Sample Sample_metaspades_scaffolds.fasta --centre X –compliant
|
|
|
PATH/prokka --outdir PROKKA_Sammple --prefix Sample Sample_metaspades_scaffolds.fasta
|
|
|
```
|
|
|
|
|
|
## 4/ CD-HIT individuels
|
... | ... | @@ -115,10 +115,10 @@ PATH/prokka --outdir PROKKA_Sammple --prefix Sample Sample_metaspades_scaffolds. |
|
|
|
|
|
Le clustering est fait sur les CDS en sortie de prokka pour éliminer les CDS redondantes (à 5% = même espèce)
|
|
|
```
|
|
|
cd-hit-est -c 0.95 -i Sample.ffn -o Sample.faa.cd-hit-est.095
|
|
|
cd-hit-est -c 0.95 -d 0 -i Sample.ffn -o Sample.faa.cd-hit-est.095
|
|
|
```
|
|
|
|
|
|
**NB.** Clustering chois à 95% sur les CDS en nucléotides.
|
|
|
**NB.** Clustering choisi à 95% sur les CDS en nucléotides.
|
|
|
|
|
|
**NB2.** le fichier clstr (cluster) donne le lien entre le représentant du cluster et ses membres. Le script python ci-dessous permet de récuperer un fichier tabulé contenant cette information de lien.
|
|
|
|
... | ... | @@ -164,13 +164,14 @@ for seq in seqs : |
|
|
**Concatenation de tous les fasta des cd-hit individuels avec le même paramètre**
|
|
|
```
|
|
|
cat *cd-hit-est.095 > All-cd-hit-est.095
|
|
|
cd-hit-est -c 0.95 -i All-cd-hit-est.095 -o All-cd-hit_cd-hit-est.095
|
|
|
cd-hit-est -c 0.95 -d 0 -i All-cd-hit-est.095 -o All-cd-hit_cd-hit-est.095
|
|
|
```
|
|
|
|
|
|
## 6/ Récupération pour chaque protéine initiale du cluster dans lequel elle figure
|
|
|
|
|
|
Faire la jointure des tables pour avoir le lien entre chaque protéines initiales et les protéines finales. Pour faire un fichier de référence des protéines pour l'annotation.
|
|
|
|
|
|
Etape réalisée dans le script présenté dans la partie 8/ Synthèse des quantifications.
|
|
|
|
|
|
## 7/ Quantification des reads alignés de chaque gène pour chaque condition
|
|
|
|
... | ... | @@ -206,15 +207,196 @@ A partir des fichiers de quantification des différents conditions et du fichier |
|
|
|
|
|
**J'avais fait ça en utilisant des commandes shell, mais il vaudrait mieux faire un programme python qui produise le tableau.**
|
|
|
|
|
|
Programme python Script_quantification_clusters_opti.py
|
|
|
|
|
|
```
|
|
|
#!/bin/env python
|
|
|
|
|
|
"""----------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
Script Name: Script_quantification_clusters.py
|
|
|
Description: Create a file which join
|
|
|
table with global cluster id and intermediate clusters id
|
|
|
to table with intermediate cluster id and contigs id.
|
|
|
Create a file which contains
|
|
|
sum of reads aligned
|
|
|
to each contig of a cluster.
|
|
|
Input files:
|
|
|
1st input file: table_clstr.txt (table with cluster id and corresponding intermediate cluster ids)
|
|
|
2nd input file: file containing list of file names generated with 1st cd-hit for each sample (intermediate cluster id and contig id).
|
|
|
3rd input file: file containing list of file names generated with featureCounts for each sample (.featureCounts.count files)
|
|
|
Created By: Joanna Fourquet
|
|
|
Date: 2019-04-11
|
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
"""
|
|
|
|
|
|
# Metadata
|
|
|
__author__ = 'Fourquet Joanna - Plateforme bioinformatique Toulouse'
|
|
|
__copyright__ = 'Copyright (C) 2019 INRA'
|
|
|
__license__ = 'GNU General Public License'
|
|
|
__version__ = '0.1'
|
|
|
__email__ = 'support.bioinfo.genotoul@inra.fr'
|
|
|
__status__ = 'dev'
|
|
|
|
|
|
# Status: dev
|
|
|
|
|
|
|
|
|
# Modules importation
|
|
|
try:
|
|
|
import argparse
|
|
|
import re
|
|
|
import sys
|
|
|
from datetime import datetime
|
|
|
except ImportError as error:
|
|
|
print(error)
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
# Print date
|
|
|
print(str(datetime.now()))
|
|
|
|
|
|
# Print input arguments
|
|
|
# Manage parameters
|
|
|
parser =argparse.ArgumentParser(description='Script which create a correspondence table between global cluster id and contig id and a table with number of aligned reads in each sample and for each global cluster id.')
|
|
|
parser.add_argument('-t', '--table', required=True, help='Correspondence table between global cluster id and intermediate cluster id.')
|
|
|
parser.add_argument('-l', '--listoffileCluster', required=True, help='List of files containing correspondence tables between cluster intermediate cluster id and contig id per sample.')
|
|
|
parser.add_argument('-c', '--listoffileCounts', required=True, help='List of files storing read counts for each contig per sample.')
|
|
|
parser.add_argument('-oc', '--outputCounts', required=True, help='Name of output file containing counts for each global cluster id and each sample.')
|
|
|
parser.add_argument('-oID', '--outputID', required=True, help='Name of output file containing correspondence table between global cluster id and contig id.')
|
|
|
parser.add_argument('-v', '--version', action='version', version=__version__)
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
# Initialization of dictionnary dict_cltr_count.
|
|
|
# It will contains for each cluster the sum of reads
|
|
|
# corresponding to contigs belonging to this cluster for each sample.
|
|
|
dict_cltr_count={}
|
|
|
|
|
|
|
|
|
# Recovery of the list of file names.
|
|
|
with open(args.listoffileCounts) as fcounts_list:
|
|
|
files_of_counts = fcounts_list.read().split()
|
|
|
|
|
|
# The dictionnary dict_cltr_global_cltr will store intermediate cluster id (key) and global cluster id (value) of file after argument -t.
|
|
|
|
|
|
# Dictionnaries dict_cltr_global_cltr and dict_cltr_count initialization.
|
|
|
dict_cltr_global_cltr = {}
|
|
|
dict_cltr_count = {}
|
|
|
|
|
|
|
|
|
with open(args.table) as fp:
|
|
|
for cluster in fp:
|
|
|
# If we are not at the end of file
|
|
|
# we store intermediate cluster id of the reading line in "key" variable
|
|
|
# and global cluster id in "value" variable of the dictionnary.
|
|
|
glob_cluster, *int_cluster = cluster.split()
|
|
|
for c in int_cluster :
|
|
|
dict_cltr_global_cltr[c]=glob_cluster
|
|
|
# Initialization of dict_cltr_count keys with the name of keys of dict (name of clusters).
|
|
|
# Initialization of dict_cltr_count values at 0.
|
|
|
dict_cltr_count[glob_cluster] = [0]*len(files_of_counts)
|
|
|
|
|
|
print(dict_cltr_global_cltr)
|
|
|
print(dict_cltr_count)
|
|
|
|
|
|
# Print date.
|
|
|
print(str(datetime.now()))
|
|
|
|
|
|
# The dictionnary dict_contig_global_cltr will contain for each global cluster id (key)
|
|
|
# the corresponding contigs id (values).
|
|
|
|
|
|
# Initialization of dictionnary dict_contig_global_cltr.
|
|
|
dict_contig_global_cltr={}
|
|
|
|
|
|
|
|
|
# Opening of file after -l argument.
|
|
|
# This file contains the list of sample files names which contains
|
|
|
# correspondence between intermediate cluster id and contig id.
|
|
|
with open(args.listoffileCluster) as fcluster_list:
|
|
|
files_of_contigs = fcluster_list.read().split()
|
|
|
|
|
|
print(files_of_contigs)
|
|
|
|
|
|
# For each line of each sample file, store the contig id (value) in the dictionnary
|
|
|
# dict_contig_global_cltr.
|
|
|
# The key of dict_contig_global_cltr is the value of dict_cltr_global_cltr (global cluster id).
|
|
|
|
|
|
for cluster_contigs_path in files_of_contigs:
|
|
|
print(cluster_contigs_path)
|
|
|
with open(cluster_contigs_path) as fh:
|
|
|
for file in fh:
|
|
|
line_split = file.split()
|
|
|
print(line_split)
|
|
|
intermediate_cluster_id = line_split[0]
|
|
|
contig_id_from_cluster_contigs_path = line_split[1]
|
|
|
if 'dict_contig_global_cltr[contig_id_from_cluster_contigs_path]' not in dict_contig_global_cltr:
|
|
|
print("if")
|
|
|
dict_contig_global_cltr[contig_id_from_cluster_contigs_path] = dict_cltr_global_cltr[intermediate_cluster_id]
|
|
|
else:
|
|
|
dict_contig_global_cltr[contig_id_from_cluster_contigs_path].append(dict_cltr_global_cltr[intermediate_cluster_id])
|
|
|
|
|
|
print(dict_contig_global_cltr)
|
|
|
|
|
|
# Print date.
|
|
|
print(str(datetime.now()))
|
|
|
|
|
|
# For each count file (output of featureCounts), reading of lines one by one,
|
|
|
# recovery of name of contig and count number and incrementing of corresponding value in dict_cltr_count.
|
|
|
for (ech_idx,counts_path) in enumerate(files_of_counts):
|
|
|
with open(counts_path) as fh:
|
|
|
for f_contig_counts in fh:
|
|
|
# Test: if the line of file contains '#' or 'bam', this line doesn't contain counts so it must be passed.
|
|
|
if f_contig_counts.startswith('#') or f_contig_counts.startswith('Geneid'): continue
|
|
|
# Recovery of contig id and corresponding count.
|
|
|
line_split = f_contig_counts.split()
|
|
|
contig_id=line_split[0].split("_gene")[0]
|
|
|
contig_count=int(line_split[6])
|
|
|
dict_cltr_count[dict_contig_global_cltr[contig_id]][ech_idx] += contig_count
|
|
|
|
|
|
# Print date.
|
|
|
print(str(datetime.now()))
|
|
|
|
|
|
#######################################
|
|
|
# Write in the output files.
|
|
|
#######################################
|
|
|
|
|
|
# Open output file.
|
|
|
with open(args.outputID,"w") as foutput_res_table:
|
|
|
|
|
|
# Heading of output file: name of columns.
|
|
|
foutput_res_table.write("id_cluster"+"\t"+"id_contig"+"\n")
|
|
|
|
|
|
# Writing cluster ids and contigs ids for each sample contained in dict_contig_global_cltr in the output file line by line.
|
|
|
for key_dict_contig_global_cltr, value_dict_contig_global_cltr in dict_contig_global_cltr.items():
|
|
|
foutput_res_table.write(value_dict_contig_global_cltr+"\t"+key_dict_contig_global_cltr+"\n")
|
|
|
|
|
|
# Print date.
|
|
|
print(str(datetime.now()))
|
|
|
|
|
|
# Open output file.
|
|
|
with open(args.outputCounts,"w") as foutput_res_counts:
|
|
|
|
|
|
# Heading of output file: name of columns.
|
|
|
foutput_res_counts.write("id_cluster\t"+"\t".join(files_of_counts)+"\n")
|
|
|
|
|
|
# Writing cluster ids and counts for each sample contained in dict_cltr_count in the output file line by line.
|
|
|
for key_dict_cltr_count, value_dict_cltr_count in dict_cltr_count.items():
|
|
|
foutput_res_counts.write(key_dict_cltr_count+"\t"+"\t".join([str(i) for i in value_dict_cltr_count])+"\n")
|
|
|
|
|
|
# Print date.
|
|
|
print(str(datetime.now()))
|
|
|
```
|
|
|
|
|
|
########################################################################################################
|
|
|
|
|
|
# Classification taxonomique avec Kaiju
|
|
|
|
|
|
- Versions des logiciels utilisés : Kaiju 1.6.3 (makeDB.sh, kaiju, kaiju2krona, kaijuReport, mergeOutputs), KronaTools-2.7 (ktImportText), R-3.5.1 (histogrammes).
|
|
|
- Versions des logiciels utilisés : Kaiju 1.7.0, KronaTools-2.7 (ktImportText), R-3.5.1 (histogrammes).
|
|
|
|
|
|
- Version de la database utilisée pour kaiju (makeDB.sh) : Complete Reference Genomes from NCBI RefSeq (20 février 2019)
|
|
|
- Version de la database utilisée pour kaiju : Complete Reference Genomes from NCBI RefSeq (04 juin 2019)
|
|
|
|
|
|
## Downloading and compiling Kaiju 1.6.3
|
|
|
## Downloading and compiling Kaiju 1.7.0
|
|
|
|
|
|
```
|
|
|
git clone https://github.com/bioinformatics-centre/kaiju.git
|
... | ... | @@ -232,156 +414,75 @@ cd kaijudb |
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 24:00:00
|
|
|
#SBATCH --cpus-per-task=5
|
|
|
#SBATCH --mem=40GB
|
|
|
#SBATCH -t 01-00:00:00 #Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds".
|
|
|
#SBATCH -c 5
|
|
|
#SBATCH --mem=200G
|
|
|
|
|
|
#Load modules
|
|
|
module purge
|
|
|
mkdir kaijudb
|
|
|
cd kaijudb
|
|
|
module load bioinfo/kaiju-v1.7.0
|
|
|
|
|
|
makeDB.sh -r -t 5
|
|
|
kaiju-makedb -s refseq
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch script_index_kaiju.sh`
|
|
|
|
|
|
## Kaiju MEM
|
|
|
|
|
|
### Dans `script_kaiju_MEM.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
Attention :
|
|
|
#SBATCH -t 24:00:00
|
|
|
#SBATCH --cpus-per-task=25
|
|
|
|
|
|
kaiju -z 25 -t nodes.dmp -f kaiju_db.fmi -i ../SampleWithoutHumanR1.fastq.gz -j ../SampleWithoutHumanR2.fastq.gz -o SampleWithoutHuman_kaiju_MEM.out -a mem
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch script_kaiju_MEM.sh`
|
|
|
`kaiju -z 25 -t nodes.dmp -f kaiju_db.fmi -i ../SampleWithoutHumanR1.fastq.gz -j ../SampleWithoutHumanR2.fastq.gz -o SampleWithoutHuman_kaiju_MEM.out -a mem`
|
|
|
|
|
|
|
|
|
## Kaiju MEM : mode verbose
|
|
|
**Pour avoir la longueur maximale de correspondance entre le read et un élément de la database**
|
|
|
|
|
|
### Dans `script_kaiju_MEM_option_v.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
Attention :
|
|
|
#SBATCH -t 10:00:00
|
|
|
#SBATCH --cpus-per-task=25
|
|
|
|
|
|
kaiju -z 25 -t nodes.dmp -f kaiju_db.fmi -i ../SampleWithoutHumanR1.fastq.gz -j ../SampleWithoutHumanR2.fastq.gz -o SampleWithoutHuman_kaiju_MEM_verbose.out -a mem -v
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch script_kaiju_MEM_option_v.sh`
|
|
|
`kaiju -z 25 -t nodes.dmp -f kaiju_db.fmi -i ../SampleWithoutHumanR1.fastq.gz -j ../SampleWithoutHumanR2.fastq.gz -o SampleWithoutHuman_kaiju_MEM_verbose.out -a mem -v`
|
|
|
|
|
|
## Kaiju MEM : génération kronas
|
|
|
|
|
|
### Formattage des fichiers kaiju MEM pour pouvoir générer des kronas
|
|
|
|
|
|
#### Dans `script_krona_kaiju_MEM_option_u.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 1:00:00
|
|
|
|
|
|
kaiju2krona -t nodes.dmp -n names.dmp -i SampleWithoutHuman_kaiju_MEM.out -o SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona -u
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
sbatch `script_krona_kaiju_MEM_option_u.sh`
|
|
|
`kaiju2krona -t nodes.dmp -n names.dmp -i SampleWithoutHuman_kaiju_MEM.out -o SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona -u`
|
|
|
|
|
|
### Génération des kronas
|
|
|
|
|
|
#### Dans `script_krona_without_unassigned_MEM.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 1:00:00
|
|
|
|
|
|
module purge
|
|
|
`ktImportText -o SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona.html SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona`
|
|
|
|
|
|
module load bioinfo/KronaTools-2.7
|
|
|
|
|
|
ktImportText -o SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona.html SampleWithoutHuman_kaiju_MEM_without_unassigned.out.krona
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch script_krona_without_unassigned_MEM.sh`
|
|
|
|
|
|
|
|
|
## Kaiju MEM : lancement de kaijuReport
|
|
|
## Kaiju MEM : résumé des résultats sous forme de tableau
|
|
|
**Pour avoir un summary, ici exemple pour phylum mais peut être généré aussi pour class, order, family, genus et species.**
|
|
|
|
|
|
### Dans `script_kaiju_summary_phylum_MEM.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 1:00:00
|
|
|
|
|
|
kaijuReport -t nodes.dmp -n names.dmp -i SampleWithoutHuman_kaiju_MEM.out -r phylum -o SampleWithoutHuman_kaiju_MEM.out.phylum_summary
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch script_kaiju_summary_phylum_MEM.sh`
|
|
|
|
|
|
`kaiju2table -t nodes.dmp -n names.dmp -r phylum -o SampleWithoutHuman_kaiju_MEM_verbose_summary_order.tsv SampleWithoutHuman_kaiju_MEM_verbose.out`
|
|
|
|
|
|
## Kaiju MEM : merge avec fichiers kraken
|
|
|
|
|
|
### Sort fichiers kaiju
|
|
|
**Obligatoire avant merge avec fichiers kraken**
|
|
|
|
|
|
#### Dans `Sort_kaijuMEM_files.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 24:00:00
|
|
|
|
|
|
sort -k2,2 SampleWithoutHuman_kaiju_MEM.out >SampleWithoutHuman_kaiju_MEM.out.sort
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch Sort_kaijuMEM_files.sh`
|
|
|
`sort -k2,2 SampleWithoutHuman_kaiju_MEM.out >SampleWithoutHuman_kaiju_MEM.out.sort`
|
|
|
|
|
|
### Sort fichiers kraken
|
|
|
**Obligatoire avant merge avec fichiers kaiju**
|
|
|
|
|
|
#### Dans `Sort_kraken_files.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 24:00:00
|
|
|
|
|
|
sort -k2,2 krakenReadsSample_Filtered0.1 >krakenReadsSample_Filtered0.1.sort
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch Sort_kraken_files.sh`
|
|
|
`sort -k2,2 krakenReadsSample_Filtered0.1 >krakenReadsSample_Filtered0.1.sort`
|
|
|
|
|
|
### Merge fichiers kaiju et kraken triés (avec valeurs par défaut des options)
|
|
|
|
|
|
#### Dans `Kaiju_merge_kraken_noOption.sh` :
|
|
|
|
|
|
```
|
|
|
#!/bin/bash
|
|
|
#SBATCH -p workq
|
|
|
#SBATCH -t 1:00:00
|
|
|
`kaiju-mergeOutputs -i SampleWithoutHuman_kaiju_MEM_verbose.out.sort -j kraken2Reads_Sample_Filtered0.1_out.sort -o Sample_combined_Kaiju_Kraken2.out -v`
|
|
|
|
|
|
mergeOutputs -i SampleWithoutHuman_kaiju_MEM.out.sort -j krakenReadsSample_Filtered0.1.sort -o Sample_combined_noOption.out -v
|
|
|
```
|
|
|
Lancement sur slurm :
|
|
|
`sbatch Kaiju_merge_kraken_noOption.sh`
|
|
|
L'option `-lca` peut être utilisée.
|
|
|
|
|
|
### Diagrammes de Venn sous R
|
|
|
```
|
|
|
library(VennDiagram)
|
|
|
|
|
|
# Sample
|
|
|
grid.newpage()
|
|
|
venn.plot <- draw.pairwise.venn(49379485, 9101807, 8229754, c("Kaiju", "Kraken"), fill = c("#DE2916", "#318CE7"), col = c("#DE2916", "#318CE7"), alpha=0.75, lwd=c(1,1), cat.cex=c(2.5,2.5), cat.pos=c(0,90), cex=c(2.5,2.5,2.5), margin=0.05, cat.dist=c(0.02,0.04), ext.pos=25, ext.length=0.75);
|
|
|
```
|
|
|
|
|
|
## Kaiju MEM : obtention des histogrammes de la distribution des reads classifiés selon la longueur maximale du read trouvée par kaiju MEM
|
|
|
|
|
|
#### Dans `Script_histogramme.R` :
|
|
|
|
|
|
```
|
|
|
#!/usr/bin/env Rscript
|
|
|
args = commandArgs(trailingOnly=TRUE)
|
... | ... | @@ -396,23 +497,24 @@ if (length(args)==0) { |
|
|
print(head(tab_init))
|
|
|
tab <- tab_init[tab_init$V1=="C",]
|
|
|
print(head(tab))
|
|
|
tab_hist <- hist(tab$V4, breaks=seq(11,50,1), plot=FALSE)
|
|
|
tab_hist <- hist(tab$V4, breaks=seq(10,50,1), plot=FALSE)
|
|
|
print(paste0(args[i], " breaks and counts sum"))
|
|
|
print(tab_hist$breaks)
|
|
|
print(sum(tab_hist$counts))
|
|
|
print(tab_hist$counts)
|
|
|
jpeg(paste0(args[i], '_Kaiju_MEM_counts.jpg'))
|
|
|
plot(tab_hist, ylim=c(0,12000000))
|
|
|
plot(tab_hist, ylim=c(0,15000000))
|
|
|
dev.off()
|
|
|
tab_hist$counts <- tab_hist$counts/sum(tab_hist$counts)
|
|
|
jpeg(paste0(args[i], "_Kaiju_MEM_v2.jpg"))
|
|
|
jpeg(paste0(args[i], "_Kaiju_MEM_normalized.jpg"))
|
|
|
plot(tab_hist, ylim=c(0,0.25))
|
|
|
dev.off()
|
|
|
print(paste0(args[i], " breaks and counts sum"))
|
|
|
print(tab_hist$breaks)
|
|
|
print(sum(tab_hist$counts))
|
|
|
print(tab_hist$counts)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
```
|
|
|
|
|
|
### Lancement script
|
... | ... | @@ -424,11 +526,11 @@ module purge |
|
|
|
|
|
module load system/R-3.5.1
|
|
|
|
|
|
Rscript Script_histogramme.R SampleWithoutHuman_kaiju_MEM_verbose.out
|
|
|
Rscript Script_hist_kaiju.R SampleWithoutHuman_kaiju_MEM_verbose.out
|
|
|
```
|
|
|
|
|
|
|
|
|
## Kaiju Greedy (5 mismatchs)
|
|
|
#######################################################################################
|
|
|
## Kaiju Greedy (5 mismatchs): attnetion testé uniquement sur kaiju v1.6
|
|
|
|
|
|
### Dans `script_kaiju_greedy.sh` :
|
|
|
|
... | ... | |