|
|
These instructions describe the basic setup procedure for a production environment.
|
|
|
|
|
|
# Prerequists
|
|
|
* [java](http://www.java.com/fr/download/: to run the biomart server (only java 6 is supported)
|
|
|
* [python](http://www.python.org/): the supported versions are 2.7 or higher. Modules required :
|
|
|
** argparse
|
|
|
** MySQLdb
|
|
|
** Numpy
|
|
|
* a [mysql](http://www.mysql.fr/) database: the database should be accessible from the application with a user/password with create/truncate permissions.
|
|
|
* [makeblastdb, blastn, tblastn & blast_formatter](http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download) to perform blast from the graphical user interface.
|
|
|
* [samtools](http://sourceforge.net/projects/samtools/): samtools '''0.1.19''' provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format.
|
|
|
* [Makeflow v4.2.2](http://www3.nd.edu/~ccl/software/makeflow/): a workflow engine for executing large complex workflows on clusters, clouds, and grids.
|
|
|
* [GIT](http://git-scm.com/) : a versionnig software
|
|
|
* [ant](http://ant.apache.org/) : to compile biomart sources
|
|
|
* [R](http://cran.us.r-project.org/) with [https://cran.r-project.org/web/packages/svglite/ svglite]: to do the differential expression analysis
|
|
|
* [cairo-devel](https://www.cairographics.org/download/): 2d graphics library
|
|
|
|
|
|
== Download the latest copy ==
|
|
|
The latest version can be downloaded from the [https://mulcyber.toulouse.inra.fr/frs/?group_id=149 download page]
|
|
|
|
|
|
== Extract the archive ==
|
|
|
[user@hostname:~] tar xzvf RNAbrowse_v1.3.tar.gz
|
|
|
[user@hostname:~] cd RNAbrowse_v1.3
|
|
|
|
|
|
== Configure the application ==
|
|
|
You have then to edit the application.properties file.
|
|
|
[global]
|
|
|
# uncomment and set if not in the PATH
|
|
|
#makeflow = /usr/bin/makeflow
|
|
|
# batch system type: local, condor, sge, moab, cluster, wq, hadoop, mpi-queue
|
|
|
batch_system_type = local
|
|
|
# add these options to all batch submit files
|
|
|
batch_options =
|
|
|
# add these options to limit the number of jobs sumitted in parallel
|
|
|
limit_submission =
|
|
|
|
|
|
# what are the ngspipelines database information
|
|
|
[database]
|
|
|
host = localhost
|
|
|
user = ngspipelines
|
|
|
passwd = ngspipelines
|
|
|
dbname = ngspipelines
|
|
|
port = 3306
|
|
|
|
|
|
[storage]
|
|
|
# where should be written the log file
|
|
|
log_file = <path>/ngspipelines.log
|
|
|
# Where should ngspipelines data be stored
|
|
|
save_directory = <path>/ngspipelines
|
|
|
# Where should the pipelines write results, should be accessible
|
|
|
# by all cluster nodes
|
|
|
work_directory = <path>/work
|
|
|
# Where should the pipelines write temporary files, should be
|
|
|
# accessible by all cluster nodes
|
|
|
tmp_directory = <path>/tmp
|
|
|
|
|
|
# where could be find binaries
|
|
|
[softwares]
|
|
|
# uncomment and set if not in the PATH
|
|
|
#makeblastdb = /usr/bin/makeblastdb
|
|
|
#blastn = /usr/bin/blastn
|
|
|
#tblastn = /usr/bin/tblastn
|
|
|
#blastx = /usr/bin/blastx
|
|
|
#blast_formatter = /usr/bin/blast_formatter
|
|
|
#samtools = /usr/bin/samtools
|
|
|
#mysqlimport = /usr/bin/mysqlimport
|
|
|
#git = /usr/bin/git
|
|
|
#ant = /usr/bin/ant
|
|
|
#getorf = /usr/bin/getorf
|
|
|
#interproscan = /usr/bin/interproscan-5.2-45.0/interproscan.sh
|
|
|
#repeatmasker = /usr/bin/RepeatMasker
|
|
|
#trnascanse = /usr/bin/tRNAscan-SE
|
|
|
#rnammer = /usr/bin/rnammer
|
|
|
#diamond = <path>/diamond-master/diamond
|
|
|
#rnaseqdenovo snp calling prerequise :
|
|
|
#require java7
|
|
|
#java = /usr/bin/java
|
|
|
#components developped with gatk 3.3 .
|
|
|
#gatk = /home/cnoirot/software/GenomeAnalysisTK-3.0-0/GenomeAnalysisTK.jar
|
|
|
# picard tools :
|
|
|
#add_read_group = /home/cnoirot/software/picard-tools-1.88/AddOrReplaceReadGroups.jar
|
|
|
#reorder_sam = /home/cnoirot/software/picard-tools-1.88/ReorderSam.jar
|
|
|
#create_dictionary = /home/cnoirot/software/picard-tools-1.88/CreateSequenceDictionary.jar
|
|
|
|
|
|
#To annotate snp with snpEff 4.3i:
|
|
|
#transdecoder_longorfs = <path>/TransDecoder-3.0.1/TransDecoder.LongOrfs
|
|
|
#transdecoder_predict = <path>/TransDecoder-3.0.1/TransDecoder.Predict
|
|
|
#snpeff= <path>/snpEff/snpEff.jar
|
|
|
|
|
|
[resources]
|
|
|
biomart_git = https://github.com/biomart/biomart-rc6.git
|
|
|
# rnaseqdenovo ressources (needed only if you want to rename contigs with genename --rename):
|
|
|
# The file to describe the GOs (format : OBO).
|
|
|
ontology_desc = <path>/gene_ontology_ext.obo
|
|
|
# NCBI's file who contains link between gene name and gene ID (ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz)
|
|
|
refseq_gene_from_gi = <path>/gene_info_filter
|
|
|
# NCBI's file who contains link between gene ID and accession (ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2accession.gz).
|
|
|
refseq_gi_from_accession = <path>/gene2accession_filter
|
|
|
snpeff_config=<path>/snpEff/snpEff.config
|
|
|
|
|
|
== Test your installation ==
|
|
|
The application embed some test files, enter the following command lines to test the system:
|
|
|
[user@hostname:~] python bin/ngspipelines_cli.py addinstance --instance-name myinstance
|
|
|
[user@hostname:~] python ./bin/ngspipelines_cli.py load-rnaseqdenovo --instance-name myinstance --project-name DemoBrain --species "Dicentrarchus labrax" --species-common-name "SeaBass" --project-description "SeaBass brain transcriptome 500 examples contigs" \
|
|
|
--assembly file=workflows/rnaseqdenovo/data/load/contigs.fasta software-name=oases software-parameters="" software-version="0.2.06" comments="Transcript assembly" \
|
|
|
--library library-name=brain_400 sample-name=Brain replicat=1 tissue=Brain type=se insert-size=400 remark="100bp to 400bp insert" sequencer=HiSeq2000 files=workflows/rnaseqdenovo/data/load/brain_400.fastq.gz \
|
|
|
--alignment files=workflows/rnaseqdenovo/data/load/brain_400.bam software-name=bwa software-version=2.2.26 software-parameters=samse --image workflows/rnaseqdenovo/data/DLabrax.png \
|
|
|
--assembly-annot file=workflows/rnaseqdenovo/data/load/best_annotation_file.gff3 software-name=blastall software-parameters="-e 10e-10" software-version="2.2.26" comments="Best annotations against swissprot" is-best
|
|
|
[user@hostname:~] python bin/ngspipelines_cli.py runinstance --instance-name myinstance
|
|
|
Once done, a browser should open a new window with the following display: <br />
|
|
|
[[File:first_install_completed.png]]
|
|
|
|
|
|
== Full test of your installation ==
|
|
|
[user@hostname:~] python bin/ngspipelines_cli.py load-rnaseqdenovo @workflows/rnaseqdenovo/data/load/rnaseqdenovo.cfg
|
|
|
[user@hostname:~] python bin/ngspipelines_cli.py runinstance --instance-name myinstance
|
|
|
Once done, a browser should open a new window with the following display: <br />
|
|
|
[[File:full_install_completed.png]]
|
|
|
|
|
|
== Memory management ==
|
|
|
On real NGS data it is recommended to increase memory used by several softwares.
|
|
|
|
|
|
=== Change memory in application.properties file. ===
|
|
|
SGE exemple :
|
|
|
[components]
|
|
|
Blast.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
Blast2annot.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
InterProScan.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
BWAIndex.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
BWA.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
SamtoolsIndex.batch_options = -l mem=10G -l h_vmem=10G -q wflowq
|
|
|
IndexFaiDict.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
CountReads.batch_options = -l mem=20G -l h_vmem=20G -q wflowq
|
|
|
GatkHaplotypeCaller.batch_options = -l mem=50G -l h_vmem=50G -q wflowq
|
|
|
GatkVariantFilter.batch_options = -l mem=25G -l h_vmem=25G -q wflowq
|
|
|
GatkPreprocess.batch_options = -l mem=50G -l h_vmem=50G -q wflowq
|
|
|
GatkRecalibration.batch_options = -l mem=25G -l h_vmem=25G -q wflowq
|
|
|
VariantPreprocess.batch_options = -l mem=25G -l h_vmem=25G -q wflowq
|
|
|
tSNPannot.batch_options = -l mem=30G -l h_vmem=30G -q wflowq
|
|
|
|
|
|
=== Change memory in workflows/rnaseqdenovo/__init__.py ===
|
|
|
Exemple line 147 to 150:
|
|
|
def process(self):
|
|
|
JAVA_LARGE_MEM = 20
|
|
|
JAVA_HUGE_MEM = 40
|
|
|
NB_SPLIT = 200 |