Commit 04473b50 authored by Penom Nom's avatar Penom Nom
Browse files

Added : html & js files for web interface

Bug fixes
Code cleaning
Documentation
Analyzebed : now allow boxplot and barplot in web interface
Python3 updates
Argparse for scripts
parent 1bd15380
$(document).on("click", ".length-view-btn", function() {
if ($(":checked[id^=chk_sample_]").size() > 0) {
$("#modal-label-tmpl").html("NG6 <small> " + $("#analyse_name").val() + "</small>");
$("#modal-body-tmpl").html('<div id="highcharts_container"></div>');
$("#modal-foot-tmpl").html('<button class="btn btn-default" data-dismiss="modal" aria-hidden="true"><i class="glyphicon glyphicon-remove"></i> Close</button>');
$("#highcharts_container").css('width', '845px');
var all_boxplot = [];
var all_outliers = [];
var all_sample_id = [];
$(":checked[id^=chk_sample_]").each(function(){
var index = $(this).attr("id").split("_")[2];
var val_boxplot = $("#boxplot_"+index).val().split(";");
var val_outliers = $("#outliers_"+index).val().split(";");
var val_soft = $("#soft_"+index).val();
all_sample_id.push(val_soft);
if(val_outliers[0] != '' ){
$.each(val_outliers, function(position, value){
all_outliers.push([parseInt(index),parseInt(value)]);
});
}
val_boxplot = val_boxplot.map(function (x) {
return parseInt(x, 10); //cast to int
});
all_boxplot.push(val_boxplot);
});
var chart = new Highcharts.Chart({
chart: {
renderTo: 'highcharts_container',
type: 'boxplot'
},
title: {
text: 'SV length Box-Plot'
},
legend: {
enabled: false
},
xAxis: {
categories: all_sample_id,
title: {
text: 'Software'
}
},
yAxis: {
title: {
text: 'Sizes'
}
},
series: [{
name: 'Sizes',
data: all_boxplot,
tooltip: {
headerFormat: '<em>Software {point.key}</em><br/>'
}
}, {
name: 'Outliers',
color: Highcharts.getOptions().colors[0],
type: 'scatter',
data: all_outliers,
marker: {
fillColor: 'white',
lineWidth: 1,
lineColor: Highcharts.getOptions().colors[0]
},
tooltip: {
pointFormat: 'Observation: {point.y}'
}
}]
});
resize_center_btmodal('#ng6modal', chart.chartWidth + 50);
//Display
$("#ng6modal").modal();
}
});
////////////////////////////////////////////////////////////////////////////////////////////////////
$(document).on("click", ".compo-view-btn", function() {
if ($(":checked[id^=chk_sample_]").size() > 0) {
$("#modal-label-tmpl").html("NG6 <small> " + $("#analyse_name").val() + "</small>");
$("#modal-body-tmpl").html('<div id="highcharts_container"></div>');
$("#modal-foot-tmpl").html('<button class="btn btn-default" data-dismiss="modal" aria-hidden="true"><i class="glyphicon glyphicon-remove"></i> Close</button>');
$("#highcharts_container").css('width', '845px');
var tags = $("#ntag_0").val().split(";")
var all_count = [];
$(":checked[id^=chk_sample_]").each(function(){
var index = $(this).attr("id").split("_")[2];
var val_count = $("#ncount_"+index).val().split(";");
var soft = $("#soft_"+index).val();
val_count = val_count.map(function (x) {
return parseInt(x, 10); //cast to int
});
all_count.push({name:soft, data:val_count});
});
var chart = new Highcharts.Chart({
chart: {
renderTo: 'highcharts_container',
type: 'bar'
},
title: {
text: 'SVR found in at least N individuals'
},
xAxis: {
categories: tags,
title: {
text: 'At least N individuals',
align: 'high'
},
labels: {
overflow: 'justify'
}
},
yAxis: {
min: 0,
title: {
text: 'Count',
align: 'high'
},
labels: {
overflow: 'justify'
}
},
plotOptions: {
bar: {
dataLabels: {
enabled: true
}
}
},
legend: {
layout: 'vertical',
align: 'right',
verticalAlign: 'top',
x: -40,
y: 80,
floating: true,
borderWidth: 1,
backgroundColor: ((Highcharts.theme && Highcharts.theme.legendBackgroundColor) || '#FFFFFF'),
shadow: true
},
credits: {
enabled: false
},
series: all_count
});
resize_center_btmodal('#ng6modal', chart.chartWidth + 50);
//Display
$("#ng6modal").modal();
}
});
{extends file='AnalysisTemplate.tpl'}
<script src="https://code.highcharts.com/highcharts-more.js"></script>
{block name=description_update}
<br/>
<div style="float:right;">
{if $is_project_admin }
<button id="add_file" type="button" class="btn btn-sm btn-primary"><i class="glyphicon glyphicon-plus"></i> add files</button>
{/if}
</div>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
{/block}
{block name=results_title}Structural variations stats{/block}
{block name=results}
<div id="user_information_dialog" title="">
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
<br/>
</div>
<table class="table table-striped table-bordered dataTable analysis-result-table">
<thead>
<tr>
<th><center><input type="checkbox" id="chk_all_sample"></center></th>
<th class="string-sort" >Sample(by software)</th>
<th class="numeric-sort">Median size</th>
<th class="numeric-sort">Mean size</th>
<th class="numeric-sort">Min size</th>
<th class="numeric-sort">Max size</th>
<th class="numeric-sort">Standard deviation</th>
<th class="numeric-sort">Number of SV</th>
</tr>
</thead>
<tbody>
{assign var="i" value=0}
{foreach from=$analyse_results key=sample item=sample_results}
<tr>
<td>
<center>
<input type="checkbox" id="chk_sample_{$i}" value="sample"/>
<input type="hidden" id="boxplot_{$i}" value="{$sample_results["default"].boxplot}"/>
<input type="hidden" id="outliers_{$i}" value="{$sample_results["default"].outliers}"/>
<input type="hidden" id="soft_{$i}" value="{$sample_results["default"].soft}"/>
<input type="hidden" id="ntag_{$i}" value="{$sample_results["default"].n_tag}"/>
<input type="hidden" id="ncount_{$i}" value="{$sample_results["default"].n_count}"/>
</center>
</td>
<td>{$sample_results["default"].soft}</td>
<td>{$sample_results["default"].median}</td>
<td>{$sample_results["default"].mean}</td>
<td>{$sample_results["default"].min}</td>
<td>{$sample_results["default"].max}</td>
<td>{$sample_results["default"].sd}</td>
<td>{$sample_results["default"].number}</td>
</tr>
{$i = $i +1}
{/foreach}
</tbody>
<tfoot>
<tr>
<th align="left" colspan="{if $has_several_samples && ($nb_processed_samples >=1)}10{else}9{/if}">
With selection :
<button type="button" class="btn btn-default multiple-selection-btn length-view-btn" id="boxplot_button"> boxplot with outlier </button>
<button type="button" class="btn btn-default multiple-selection-btn compo-view-btn" id="barplot_button"> barplot </button>
</th>
</tr>
</tfoot>
</table>
{/block}
\ No newline at end of file
......@@ -13,114 +13,61 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import tempfile
import os
import re
import tempfile
from io import BytesIO
from subprocess import Popen,PIPE
from jflow.workflow import Workflow
from jflow import utils as utils
from ng6.project import Project
from ng6.ng6workflow import NG6Workflow
from ng6.run import Run
from subprocess import Popen,PIPE
from io import BytesIO
import re
#TODO merge de cnvnator
#TODO reference -> force fasta
class SVDetection (NG6Workflow):
def get_description(self):
return "This pipeline aims to detect structural variations in whole genomes. "
return "This pipeline aims to detect structural variations in whole genomes."
def define_parameters(self, function="process"):
self.add_input_file_list("input_bam", "One bam file by individual/condition", file_format="bam", required=True)
self.add_input_file("reference_genome", "On which genome should the reads being aligned on", required=True) # , file_format="fasta"
self.add_parameter("chrom", "Studied chromosome (do not forget the chr prefix if there is one)", default="", type=str)
self.add_parameter("bin_size", "Bin size for cnvnator: Depends on the deep of coverage. X5:500, x10:250, x20-30:100, x100:30", default=100, type=int)
self.add_parameter("mean_insert", "mean insert size (PINDEL only)", type=int, required=True)
self.add_parameter("mean_insert", "mean insert size (PINDEL only)", type=int, required=True) #TODO use sample insert
self.add_parameter("min_sup", "Minimum number of read supporting a SV (3) [Not available for cnvnator]", type=int, default=3)
self.add_parameter("min_len", "Minimum length of a variant (50)", type=int, default=50)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000)", type=int, default=8500000)
self.add_parameter("max_len", "Maximum length of a variant (8 500 000)", type=int, default=8500000) #TODO: max calculed % chr size
self.add_parameter("min_mapq", "min. paired-end mapping quality", type=int, default=30)
self.add_parameter("code", "One char code", type=str, default="X")
#TODO: infuture minmapq -> juste apres lalignement
def process(self):
# indiv_list = []
# for bam in self.input_bam:
# indiv_name = self.parse_read_groups(bam)
# indiv_list.append(indiv_name)
# results = []
# delly = self.add_component("Delly", [self.input_bam, self.reference_genome])
# delly = self.add_component("Standardisation", [delly.output_file, "delly", self.code, "delly.bed", self.min_len, self.max_len, indiv_list])
# breakdancer = self.add_component("Breakdancer", [self.input_bam, self.reference_genome, self.chrom, self.min_sup, self.min_mapq, self.max_len])
# breakdancer = self.add_component("Standardisation", [breakdancer.output_file, "breakdancer", self.code, "breakdancer.bed", self.min_len, self.max_len, .indiv_list])
# pindel = self.add_component("Pindel", kwargs={"input_bam":self.input_bam,"reference_genome": self.reference_genome,
# "output_file":"pindel.concat.out", "chrom":self.chrom, "mean_insert":self.mean_insert,
# "min_sup":self.min_sup, "indiv_list":indiv_list})
# pindel = self.add_component("Standardisation", [pindel.output_file, "pindel", self.code, "pindel.bed", self.min_len, self.max_len, indiv_list])
# results.append(delly.output_file)
# results.append(pindel.output_file)
# cnvnator = self.add_component("CnvNator", [self.input_bam, self.reference_genome, self.chrom, self.bin_size])
indiv_list = ["BOB","BAB","BIB"]
input_bam = [spl.reads1[0] for spl in self.samples]
indiv_list = [spl.name for spl in self.samples]
results = []
delly = self.add_component("Delly", [input_bam, self.reference_genome])
# breakdancer = self.add_component("Breakdancer", [input_bam, self.reference_genome, self.chrom, self.min_sup, self.min_mapq, self.max_len])
pindel = self.add_component("Pindel", kwargs={"input_bam":input_bam,"reference_genome": self.reference_genome,
"output_file":"pindel.concat.out", "chrom":self.chrom, "mean_insert":self.mean_insert,
"min_sup":self.min_sup, "indiv_list":indiv_list})
# cnvnator = self.add_component("CnvNator", [input_bam, self.reference_genome, self.chrom, self.bin_size])
#=====TEST PURPOSE=====
input_f = ["/home/yguarr/DataTest/example_cnvnator_raw.txt","/home/yguarr/DataTest/example2_cnvnator_raw.txt","/home/yguarr/DataTest/example3_cnvnator_raw.txt"]
input_name = ["SAMPLE1","SAMPLE2","SAMPLE3"]
#=====TEST PURPOSE=====
delly = self.add_component("Standardisation", [delly.output_file, "delly", "delly.bed", self.min_len, self.max_len, indiv_list])
pindel = self.add_component("Standardisation", [pindel.output_file, "pindel", "pindel.bed", self.min_len, self.max_len, indiv_list])
# breakdancer = self.add_component("Standardisation", [breakdancer.output_file, "breakdancer", "breakdancer.bed", self.min_len, self.max_len, indiv_list, input_bam])
cnvnator = self.add_component("Standardisation", [input_f, "cnvnator", "cnvnator.bed", self.min_len, self.max_len, indiv_list])
# results=[cnvnator.output_file]
# self.add_component(component_name="Analyzebed", addto="project", args=[results])
results=[delly.output_file, pindel.output_file, cnvnator.output_file]#, breakdancer.output_file]#, pindel.output_file]
self.add_component(component_name="Analyzebed", addto="project", args=[results])
def post_process(self):
pass
def parse_read_groups(self, bam_file):
"""
Seek for sample name in bam file's header
First: get header of bam file with samtools
Second: grep SM in read groups
Third: Isolate name + check uniqueness
:param bam_file: a bam file containing only 1 sample
:type bam_file: string (file path)
:return: Sample name
:rtype: string
:Example:
>>> self.parse_read_groups("/home/user/RollingStones/Mick_Jagger_DNA.bam")
"MJagger"
:note: Sample name is expected to be alphanumeric only
"""
import subprocess
#STEP1
args1 = ["samtools", "view", "-H", bam_file]
header = subprocess.Popen(args1, stdout=PIPE, stderr=PIPE).communicate()
if header[1]:
print(header[1])
error = "Something went wrong with samtools.\nExiting."
print(error)
exit()
#STEP2
args2 = ["grep", "SM"]
read_groups = subprocess.Popen(args2, stdin=PIPE, stdout=PIPE, stderr=PIPE).communicate(header[0])
read_groups = BytesIO(read_groups[0])
#STEP3
previous = None
for lines in read_groups:
result = None
expr = re.compile(b"SM:(\w+)\s")
result = expr.search(lines)
if result:
result = result.group(1).decode()
if previous != None and result != previous:
error = "Several samples names detected. At least : {0} & {1}\nExiting."
print(error.format(previous, result))
exit()
previous = result
if previous == None:
error = "No samples names (SM) detected in read groups (@RG) of :\n{0}\nExiting."
print(error.format(bam_file))
exit()
else:
return result
......@@ -13,25 +13,23 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# from jflow.component import Component
from ng6.analysis import Analysis
from weaver.function import ShellFunction
from weaver.abstraction import Map
import os
from collections import Counter
import os.path
import numpy
#TODO boxplot sans outlier aussi
class Analyzebed (Analysis):
def define_parameters(self, files):#, name, description, software, options, version, compression="none",
# delete=False, archive_name=None):
def define_parameters(self, files):
self.add_input_file_list( "input_files", "The files to be archived.", default=files, required=True )
self.add_output_file_list( "output_files", "Link to the files.", pattern='link2_{basename}' , items=self.input_files )
def get_version(self):
return "version X.X"
return "version X.5"
def define_analysis(self):
self.name = "Analyzebed"
......@@ -40,16 +38,67 @@ class Analyzebed (Analysis):
self.options = "options"
def post_process(self):
for f in self.output_files:
sample = os.path.splitext(os.path.basename(f))[0]
self._save_file(f)
self._add_result_element(sample, "total", "ThisIsAResult")
self._create_and_archive(self.output_files, "toto.tar")
for bedfile in self.output_files:
sample = os.path.splitext(os.path.basename(bedfile))[0]
soft = sample.split("_",1)[1]
with open(bedfile) as result_file:
sizes_list = []
all_n = []
for i,line in enumerate(result_file):
size = int(line.split()[2])-int(line.split()[1])
sizes_list.append(size)
members = line.split()[7].split(";")
n = len(members)
all_n.append(n)
if sizes_list:
#BARPLOT
print(soft)
total_n = 0
n_count = [];
all_n = Counter(all_n)
n_tag = list(reversed(range(len(all_n)+1)))[:-1]
for n in n_tag:
total_n = total_n + all_n[n]
n_count.append(total_n)
n_tag = self.js_prepross(n_tag)
n_count = self.js_prepross(n_count)
print(n_tag)
print(n_count)
#BOXPLOT
sizes_list = numpy.array(sizes_list)
median = numpy.median(sizes_list)
upper_quartile = numpy.percentile(sizes_list, 75)
lower_quartile = numpy.percentile(sizes_list, 25)
iqr = upper_quartile - lower_quartile
upper_whisker = sizes_list[sizes_list<=upper_quartile+1.5*iqr].max()
lower_whisker = sizes_list[sizes_list>=lower_quartile-1.5*iqr].min()
outliers = sizes_list[sizes_list > upper_whisker]
numpy.append(outliers, sizes_list[sizes_list < lower_whisker])
boxplot = self.js_prepross([lower_whisker,lower_quartile,median,upper_quartile,upper_whisker])
outliers = self.js_prepross(outliers)
#DATABASE
self._add_result_element(sample, "soft", soft)
self._add_result_element(sample, "n_tag", n_tag)
self._add_result_element(sample, "n_count", n_count)
self._add_result_element(sample, "outliers", outliers)
self._add_result_element(sample, "boxplot", boxplot)
self._add_result_element(sample, "median", median)
self._add_result_element(sample, "mean", numpy.mean(sizes_list))
self._add_result_element(sample, "min", min(sizes_list))
self._add_result_element(sample, "max", max(sizes_list))
self._add_result_element(sample, "sd", numpy.std(sizes_list))
self._add_result_element(sample, "number", len(sizes_list))
self._create_and_archive(self.output_files, "sv_list_by_soft.tar")
def process(self):
link = ShellFunction(self.get_exec_path("ln") + " -s $1 $2", cmd_format='{EXE} {IN} {OUT}')
Map(link, inputs=self.input_files, outputs=self.output_files)
def js_prepross(self, my_list):
my_string = ";".join(map(str,my_list))
return my_string
\ No newline at end of file
......@@ -3,7 +3,7 @@ class SV_Group(object):
self.variants_list = [variant]
self.ambiguous = False
self.id = iD
self.start = None
self.start = None #TODO doc
self.end = None
variant.group.append(self)
......
......@@ -3,77 +3,66 @@
import re
import sys
import argparse
def get_grid(indiv_list):
def formate_support(support_by_library, soft, indiv_list, alt_names=[] ):
"""
the grid is used to relate one individual to a column number (in a table)
transform number of support by individual from native to table format
:param indiv_list: a list of individuals' name
:type indiv_list: list of string
:return: a grid with individuals' name as keys and individuals' position as values
:rtype: dictionary {'string':integer}
:Example:
>>> get_grid(["riri", "fifi", "loulou"])
{"riri":0, "fifi":1, "loulou":2}
"""
grid = {}
counter = 0
for indiv in indiv_list:
grid[indiv] = counter
counter += 1
return grid
def formate_support(support_by_library, soft, grid):
"""
transform number of support/individual from native to table format
for each individual get the name and support. Using the grid, update
for each individual get the name and support. Using the indiv_list, update
the corresponding column of the support table row.
:param support_by_library: original information of support/individual
:param soft: software used to generate the SV
:param grid: a grid with individuals' name as keys and individuals' position as values
:param indiv_list: a list of individuals' name
:param alt_names: list of bam files used for breakdancer to process names
:type support_by_library: list of strings
:type soft: string
:type grid: dictionary
:type indiv_list: list [strings]
:type alt_names: list of string (pathways)
:return: row of support table (1 individual/column)
:return: row of support table (1 individual/column + IDs)
:rtype: string
:Example:
>>> formate_support_breakdancer(['A.bam|2', 'B.bam|1', 'D.bam|1'], ['A.bam', 'B.bam', 'C.bam', 'D.bam'], 4)
"2 1 0 1"
>>> formate_support(['A.bam|2', 'B.bam|1', 'D.bam|1'], "breakdancer", my_indiv_list)
"A;B;D 2 1 0 1"
>>> formate_support(['SAMPLE1', '0', '4', '20', '20', '25', '25', 'SAMPLE2', '0', '0', '20', '20', '28', '28', 'SAMPLE3', '0', '4', '26', '26', '24', '24']
, "pindel", my_indiv_list)
'SAMPLE1;SAMPLE2;SAMPLE3 45 48 50'
>>> formate_support(["0/1:-27.8789,0,-43.8829:279:PASS:162:13:21:11:12", "0/1:-27.8789,0,-43.8829:279:PASS:162:13:21:11:12", "0/1:-27.8789,0,-43.8829:279:PASS:162:13:21:11:12"],
"delly", my_indiv_list)
'SAMPLE1;SAMPLE2;SAMPLE3 21+12 21+12 21+12'
:note: for delly support = PE + SR
"""
n = len(grid)
n = len(indiv_list)
detected_individuals = []
support_line = ["0"] * n
if soft == "breakdancer": #TODO
if soft == "breakdancer":
for i in support_by_library:
individual, support = i.split("|")
pos = alt_names.index(individual) # breakdancer use file paths instead of sample names
individual = indiv_list[pos] # so get the corresponding name
detected_individuals.append(individual)
pos = grid[individual]
support_line[pos] = str(support)
elif soft == "pindel":
for i in xrange(0, len(support_by_library), 7):
for i in range(0, len(support_by_library), 7):
individual=support_by_library[i]
upstream_support = int(support_by_library[i+4])