Commit d1d97352 authored by Penom Nom's avatar Penom Nom
Browse files

Correction of illumina_rnaseq (tophat component) and add test data

parent 0f2644f8
......@@ -62,14 +62,14 @@ tmp_directory = <path>/tmp
#CollectInsertSizeMetrics = /usr/bin/CollectInsertSizeMetrics.jar
#MarkDuplicates = /usr/bin/MarkDuplicates.jar
#mothur = /usr/bin/mothur
#bowtie_build = /usr/bin/bowtie2-build
#tophat = /usr/bin/tophat
#gene_body_cov = /usr/bin/geneBody_coverage.py
#infer_experiment = /usr/bin/infer_experiment.py
#inner_distance = /usr/bin/inner_distance.py
#junction_saturation = /usr/bin/junction_saturation.py
#rpkm_saturation = /usr/bin/RPKM_saturation.py
#junction_annotation = /usr/bin/junction_annotation.py
#bowtie2-build = /usr/local/bin/bowtie2-build
#tophat2 = /usr/local/bin/tophat2
#geneBody_coverage.py = /usr/local/bin/geneBody_coverage.py
#infer_experiment.py = /usr/local/bin/infer_experiment.py
#inner_distance.py = /usr/local/bin/inner_distance.py
#junction_saturation.py = /usr/local/bin/junction_saturation.py
#RPKM_saturation.py = /usr/local/bin/scripts/RPKM_saturation.py
#junction_annotation.py = /usr/local/bin/junction_annotation.py
#ustacks = /usr/local/bin/ustacks
#cstacks = /usr/local/bin/cstacks
#trim_galore = /nosave/software/trim_galore
......@@ -88,9 +88,9 @@ tmp_directory = <path>/tmp
[resources]
typo3_src = http://prdownloads.sourceforge.net/typo3/typo3_src+dummy-4.7.12.zip
phix_bwa = /bank/bwadb/phi.fa
ecoli_bwa = /bank/bwadb/ecoli536
yeast_bwa = /bank/bwadb/yeast.nt
#phix_bwa = /bank/bwadb/phi.fa
#ecoli_bwa = /bank/bwadb/ecoli536
#yeast_bwa = /bank/bwadb/yeast.nt
[454_mids]
MID1 = ACGAGTGCGT
......
......@@ -18,28 +18,10 @@
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFileList, InputFile, Formats
from jflow.iotypes import OutputFile, InputFile, Formats
from weaver.function import PythonFunction
from weaver.function import ShellFunction
def bowtie_build(exec_path, databank, input_fasta, stdout_path, stderr_path, output_fasta):
from subprocess import Popen, PIPE
# symlink of fasta
os.symlink(input_fasta, output_fasta)
cmd = [exec_path, input_fasta, databank]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
# write down the stdout
stdoh = open(stdout_path, "w")
stdoh.write(stdout)
stdoh.close()
# write down the stderr
stdeh = open(stderr_path, "w")
stdeh.write(stderr)
stdeh.close()
class BowtieBuild (Component):
def define_parameters(self, input_fasta):
......@@ -50,8 +32,6 @@ class BowtieBuild (Component):
self.stderr = OutputFile(os.path.join(self.output_directory, "bowtie_build.stderr"))
def process(self):
buildindex = PythonFunction(bowtie_build, cmd_format="{EXE} {ARG} {IN} {OUT}")
buildindex(inputs=self.input_fasta, outputs=[ self.stdout, self.stderr, self.output_fasta], arguments=[self.get_exec_path("bowtie_build"), self.databank])
buildindex = ShellFunction( "ln -s $3 $4 ; $1 $3 $2 1> $5 2> $6 " , cmd_format="{EXE} {ARG} {IN} {OUT}" )
buildindex(inputs=self.input_fasta, outputs=[ self.output_fasta, self.stdout, self.stderr], arguments=[self.get_exec_path("bowtie2-build"), self.databank])
......@@ -185,35 +185,35 @@ class RSeQC (Analysis):
self._save_files(self.bam_files)
def get_version(self):
cmd = [self.get_exec_path("infer_experiment"), "--version"]
cmd = [self.get_exec_path("infer_experiment.py"), "--version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stdout.split()[1]
def process(self):
#InferExperiment
infer_exp = ShellFunction(self.get_exec_path("infer_experiment") + " -i $1 -r $2 -s " + self.sample_size + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
infer_exp = ShellFunction(self.get_exec_path("infer_experiment.py") + " -i $1 -r $2 -s " + self.sample_size + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
infer_exp = MultiMap(infer_exp, inputs=[self.bam_files,self.bed_files], outputs=[self.stdout_infer, self.stderr_infer])
#InnerDistance
if self.paired == True:
for bam_file, bed_file , output_prefix, stdout_inner, stderr_inner, r_file_inner in \
zip(self.bam_files, self.bed_files , self.output_prefix, self.stdout_inner, self.stderr_inner, self.r_files_inner):
inner_dist = ShellFunction(self.get_exec_path("inner_distance") + " -i $1 -r $2 -o " + output_prefix +" -l " + \
inner_dist = ShellFunction(self.get_exec_path("inner_distance.py") + " -i $1 -r $2 -o " + output_prefix +" -l " + \
self.min_inner_distance + " -u " + self.max_inner_distance + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
inner_dist(inputs=[bam_file, bed_file], outputs=[stdout_inner, stderr_inner, r_file_inner])
#Junction Annotation
for bam_file, bed_file , output_prefix, stdout_junc_ann, stderr_junc_ann, r_file_junc_ann in \
zip(self.bam_files, self.bed_files , self.output_prefix, self.stdout_junc_ann, self.stderr_junc_ann, self.r_files_junc_ann):
junc_annot = ShellFunction(self.get_exec_path("junction_annotation") + " -i $1 -r $2 -o " + output_prefix + " -m " + \
junc_annot = ShellFunction(self.get_exec_path("junction_annotation.py") + " -i $1 -r $2 -o " + output_prefix + " -m " + \
self.min_intron_size_junc_ann + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
junc_annot(inputs=[bam_file, bed_file], outputs=[stdout_junc_ann, stderr_junc_ann, r_file_junc_ann])
#Junction Saturation
for bam_file, bed_file , output_prefix, stdout_junc_sat, stderr_junc_sat,r_file_junc_sat in \
zip(self.bam_files, self.bed_files , self.output_prefix, self.stdout_junc_sat, self.stderr_junc_sat, self.r_files_junc_sat):
junc_sat = ShellFunction(self.get_exec_path("junction_saturation") + " -i $1 -r $2 -o " + output_prefix + " -m " + \
junc_sat = ShellFunction(self.get_exec_path("junction_saturation.py") + " -i $1 -r $2 -o " + output_prefix + " -m " + \
self.min_intron_size_junc_sat + " -v " + self.min_junc_coverage + " -s " + self.sampling_percent_step_junc_sat \
+ " -l " + self.min_sampling_percent_junc_sat + " -u " + self.max_sampling_percent_junc_sat + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
junc_sat(inputs=[bam_file, bed_file], outputs=[stdout_junc_sat, stderr_junc_sat, r_file_junc_sat])
......@@ -221,18 +221,18 @@ class RSeQC (Analysis):
#GeneBody Coverage
for bam_file, bed_file , output_prefix, stderr_gbc, stdout_gbc, cov_file in \
zip(self.bam_files, self.bed_files , self.output_prefix, self.stderr_gbc, self.stdout_gbc, self.cov_files):
gene_b_cov = ShellFunction(self.get_exec_path("gene_body_cov") + " -i $1 -o " + output_prefix + " -r $2 2> $3 > $4", cmd_format="{EXE} {IN} {OUT}")
gene_b_cov = ShellFunction(self.get_exec_path("geneBody_coverage.py") + " -i $1 -o " + output_prefix + " -r $2 2> $3 > $4", cmd_format="{EXE} {IN} {OUT}")
gene_b_cov(inputs=[bam_file, bed_file], outputs=[stderr_gbc, stdout_gbc, cov_file])
#RPKM Saturation
for bam_file, bed_file , output_prefix, stdout_rpkm_sat, stderr_rpkm_sat, r_file_rpkm_sat in \
zip(self.bam_files, self.bed_files , self.output_prefix, self.stdout_rpkm_sat, self.stderr_rpkm_sat, self.r_files_rpkm_sat):
if self.reads_orientation:
rpkm_sat = ShellFunction(self.get_exec_path("rpkm_saturation") + " -i $1 -r $2 -o " + output_prefix + " -s " + \
rpkm_sat = ShellFunction(self.get_exec_path("RPKM_saturation.py") + " -i $1 -r $2 -o " + output_prefix + " -s " + \
self.sampling_percent_step_rpkm_sat + " -l " + self.min_sampling_percent_rpkm_sat + " -u " + \
self.max_sampling_percent_rpkm_sat + " -d " + self.reads_orientation + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
else :
rpkm_sat = ShellFunction(self.get_exec_path("rpkm_saturation") + " -i $1 -r $2 -o " + output_prefix + " -s " + \
rpkm_sat = ShellFunction(self.get_exec_path("RPKM_saturation.py") + " -i $1 -r $2 -o " + output_prefix + " -s " + \
self.sampling_percent_step_rpkm_sat + " -l " + self.min_sampling_percent_rpkm_sat + " -u " + \
self.max_sampling_percent_rpkm_sat + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
......
......@@ -46,20 +46,20 @@ class TopHat (Analysis):
def define_analysis(self):
self.name = "TopHat Alignment"
self.description = "Spliced transcripts alignment to whole genomes."
self.software = "tophat"
self.software = "tophat2"
def post_process(self):
if self.keep_bam:
self._save_files(self.bam_files)
def get_version(self):
cmd = [self.get_exec_path("tophat"), "-v"]
cmd = [self.get_exec_path("tophat2"), "-v"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stdout.split()[1]
def process(self):
command = [self.get_exec_path("tophat"), '-r', str(self.mate_inner_dist), "-o",self.output_directory, self.index_basename ]
command = [self.get_exec_path("tophat2"), '-r', str(self.mate_inner_dist), "-o",self.output_directory, self.index_basename ]
if self.read2 :
tophat = ShellFunction( ' '.join(command + ["$1 $2 2> $4"]), cmd_format='{EXE} {IN} {OUT}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment