Commit 2086c3ad authored by Maxime Manno's avatar Maxime Manno 🍜
Browse files

Update ont_qc analysis : removing of number of channels, add N50 length

useful data, update visualisation 
parent c24bcf7e
......@@ -81,15 +81,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{if $metrics['basic']}
<div class="col-md-4 col-lg-4">
<table id="ont_table_basic" class="table table-striped table-bordered dataTable">
<thead><tr><th colspan="2"><h4>Basic matrics report</h4></thead></tr></th>
<thead><tr><th colspan="2"><h4>Basic metrics report</h4></thead></tr></th>
<tbody>
{foreach from=$basic_headers key=k item=head}
<tr>
{if ($head == 'median_yield_per_sec') or ($head == 'mean_yield_per_sec')}
<td><label>{$head}</label></td>
{if ($head == 'median_yield_per_sec')}
<td><label>{$head|replace:'_':' '}</label></td>
<td>{($basic.$head)|number_format:2:',':' '}</td>
{else}
<td><label>{$head}</label></td>
<td><label>{$head|replace:'_':' '}</label></td>
<td>{($basic.$head)|number_format:0:',':' '}</td>
{/if}
</tr>
......@@ -106,14 +106,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<tbody>
{foreach from=$quality_headers key=k item=head}
<tr>
{if ($head == 'median_read_quality') or ($head == 'mean_read_quality')}
<td><label>{$head}</label></td>
{if ($head == 'median_read_quality') or ($head == 'median read quality')}
<td><label>{$head|replace:'_':' '}</label></td>
<td>{($quality.$head)|number_format:2:',':' '}</td>
{elseif ($head == 'N50_read_length_utils') or ($head == 'N50_read_length_useful_data')}
<td><label>N50 read length useful data</label></td>
<td>{($quality.$head)|number_format:0:',':' '}
{elseif $head|strstr:"nb_read"}
<td><label>{$head}</label></td>
<td><label>{$head|replace:'_':' '|replace:'utils':'useful data'}</label></td>
<td>{($quality.$head)|number_format:0:',':' '} ({($quality.$head / $basic.nb_reads*100)|number_format:2:',':' '}%)</td>
{else}
<td><label>{$head}</label></td>
{elseif $head|strstr:"total_bases"}
<td><label>{$head|replace:'_':' '|replace:'utils':'useful data'}</label></td>
<td>{($quality.$head)|number_format:0:',':' '} ({($quality.$head / $basic.total_bases*100)|number_format:2:',':' '}%)</td>
{/if}
</tr>
......@@ -130,7 +133,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<tbody>
{foreach from=$plots_headers key=k item=head}
<tr>
<td><label>{$head}</label></td>
<td><label>{$head|replace:'_':' '|replace:'utils':'useful data'}</label></td>
<td><a class="imglink" href="{$plots.$head}" ><i class="glyphicon glyphicon-picture"></i></a></td>
</tr>
{$data_col = $data_col + 1}
......@@ -138,7 +141,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{if $barcode_name_count > 1 }
{foreach from=$plots_barcode_headers key=k item=head}
<tr>
<td><label>{$head}</label></td>
<td><label>{$head|replace:'_':' '|replace:'utils':'useful data'}</label></td>
<td><a class="imglink" href="{$plots_barcode.$head}" ><i class="glyphicon glyphicon-picture"></i></a></td>
</tr>
{$data_col = $data_col + 1}
......@@ -169,29 +172,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{assign var="th_id" value=2}
{foreach from=$barcode_headers key=k item=head}
{if $head == 'total_bases'}
<th class = "numeric-sort" id="th_id_{$th_id}">Total_bases</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Total bases</th>
{elseif $head == 'nb_reads'}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb_reads</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Nb reads</th>
{elseif $head == 'median_read_length'}
<th class = "numeric-sort" id="th_id_{$th_id}">Median read_length</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Median read length</th>
{elseif $head == 'median_read_quality'}
<th class = "numeric-sort" id="th_id_{$th_id}">Median read_quality</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Median read quality</th>
{elseif $head == 'N50_read_length'}
<th class = "numeric-sort" id="th_id_{$th_id}">N50 read_length</th>
<th class = "numeric-sort" id="th_id_{$th_id}">N50 read length</th>
{elseif $head == 'barcode_score'}
<th class = "numeric-sort" id="th_id_{$th_id}">Barcode_score</th>
{elseif $head == 'nb_read_Q>10'}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb_reads Q>10</th>
{elseif $head == 'total_bases_Q>10'}
<th class = "numeric-sort" id="th_id_{$th_id}">Total_bases Q>10</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Barcode score</th>
{elseif $head == 'median_yield_per_sec'}
<th class = "numeric-sort" id="th_id_{$th_id}">Median yield_per_sec</th>
{elseif $head == 'nb_actif_channel'}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb actif_channel</th>
{elseif $head == 'nb_reads_utils'}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb actif_channel</th>
{elseif $head == 'nb_bases_utils'}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb actif_channel</th>
<th class = "numeric-sort" id="th_id_{$th_id}">Median yield per sec</th>
{elseif ($head == 'nb_read_utils') or ($head == 'nb_read_useful_data')}
<th class = "numeric-sort" id="th_id_{$th_id}">Nb reads useful data</th>
{elseif ($head == 'total_bases_utils') or ($head == 'total_bases_useful_data')}
<th class = "numeric-sort" id="th_id_{$th_id}">Total bases useful data</th>
{elseif ($head == 'N50_read_length_useful_data')}
<th class = "numeric-sort" id="th_id_{$th_id}">N50 read length useful data</th>
{/if}
{$th_id = $th_id +1}
{/foreach}
......@@ -221,17 +220,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'barcode_score'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:1:',':' '}</th>
{elseif $head == 'nb_read_Q>10'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'total_bases_Q>10'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'median_yield_per_sec'}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'nb_actif_channel'}
{elseif ($head == 'nb_read_utils') or ($head == 'nb_read_useful_data')}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'nb_reads_utils'}
{elseif ($head == 'total_bases_utils') or ($head == 'total_bases_useful_data')}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{elseif $head == 'nb_bases_utils'}
{elseif ($head == 'N50_read_length_useful_data')}
<th id="sample_{$i}_col_{$col_id}">{($sample_results.$head)|number_format:0:',':' '}</th>
{/if}
......@@ -291,30 +286,26 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<div class="tx-nG6-pi1-help">
<img src="" alt="" class="img" />
<p>Help for Basic metrics report :</p>
<span class="meta">
<ul>
<li><strong>nb_reads</strong> :
<li><strong>Nb reads</strong> :
The total number of reads for this Flow Cell (including the reads that are filtered during the basecalling).
</li>
<li><strong>total_bases</strong> :
<li><strong>Total bases</strong> :
The number of bases for this Flow Cell.
</li>
<li><strong>median_read_length</strong> :
The median length of the total reads (bases).
<li><strong>Median/Mean read length</strong> :
The median or mean length of the total reads (bases).
</li>
<li><strong>N50_read_length</strong> :
<li><strong>N50 read length</strong> :
50% of all bases come from reads longer than this value.
</li>
<li><strong>median_yield_per_sec</strong> :
<li><strong>Median yield per sec</strong> :
The median number of bases per second for the pores of this Flow Cell.
</li>
<li><strong>nb_actif_channel</strong> :
The number of activ channels during the sequencing.
</li>
</ul>
</span>
</div>
......@@ -323,18 +314,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<p>Help for Quality report :</p>
<span class="meta">
<ul>
<li><strong>median_read_quality</strong> :
<li><strong>Median read quality</strong> :
The median quality (qscore ONT) for this Flow Cell.
</li>
<li><strong>nb_read_Q>N</strong> :
The total number of reads that have a quality > N.
</li>
<li><strong>total_bases_Q>N</strong> :
The total number of bases for the reads that have a quality > N.
</li>
<li><strong>nb_read/bases_utils</strong> :
<li><strong>Nb read/bases useful data</strong> :
The total number of reads/bases for the reads that have a quality > 7 and a length > 3000bp.
</li>
<li><strong>N50 read length useful data</strong> :
50% of all bases come from reads longer than this value for the reads that have a quality > 7 and a length > 3000bp.
</li>
</ul>
</span>
</div>
......@@ -344,26 +332,26 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<p>Help for Plots report :</p>
<span class="meta">
<ul>
<li><strong>cumulyieldperhour</strong> :
<li><strong>Cumulate yield per hour</strong> :
The cumulate number of bases per hour for this Flow Cell.
</li>
<li><strong>outrm_</strong> :
<li><strong>outrm </strong> :
The outsiders have been removed.
</li>
<li><strong>outrm_distriblength</strong> :
<li><strong>Distribution length</strong> :
The length distribution of reads for this Flow Cell.
</li>
<li><strong>outrm_distribqscore</strong> :
<li><strong>Distribution qscore</strong> :
The quality (qscore ONT) distribution of reads for this Flow Cell.
</li>
<li><strong>outrm_lengthvsqscore_density</strong> :
<li><strong> Length vs qscore density</strong> :
The length and qscore distribution of reads with density for this Flow Cell.
</li>
{if $barcode_name_count > 1 }
<li><strong>qscoreboxplot</strong> :
<li><strong> Qscore boxplot</strong> :
Boxplots for each barcode of the mean qscore of reads for this Flow Cell.
</li>
<li><strong>qscorepertimeintervalsboxplot</strong> :
<li><strong> Qscore per time intervals boxplot</strong> :
Boxplots for each barcode of the mean qscore of reads per interval of time for this Flow Cell.
</li>
{/if}
......
......@@ -74,17 +74,11 @@ class Run_stats (Analysis):
if parts[0] == "N50_read_length": stats["N50_read_length"] = parts[1]
if parts[0] == "median_read_quality": stats["median_read_quality"] = parts[1]
if parts[0] == "mean_read_quality": stats["mean_read_quality"] = parts[1]
if parts[0] == "nb_read_Q>7": stats["nb_read_Q>7"] = parts[1]
if parts[0] == "total_bases_Q>7": stats["total_bases_Q>7"] = parts[1]
if parts[0] == "nb_read_Q>9": stats["nb_read_Q>9"] = parts[1]
if parts[0] == "total_bases_Q>9": stats["total_bases_Q>9"] = parts[1]
if parts[0] == "nb_read_Q>12": stats["nb_read_Q>12"] = parts[1]
if parts[0] == "total_bases_Q>12": stats["total_bases_Q>12"] = parts[1]
if parts[0] == "median_yield_per_sec": stats["median_yield_per_sec"] = parts[1]
if parts[0] == "mean_yield_per_sec": stats["mean_yield_per_sec"] = parts[1]
if parts[0] == "nb_actif_channel": stats["nb_actif_channel"] = parts[1]
if parts[0] == "nb_read_utils": stats["nb_read_utils"] = parts[1]
if parts[0] == "total_bases_utils": stats["total_bases_utils"] = parts[1]
if parts[0] == "nb_read_utils": stats["nb_read_useful_data"] = parts[1]
if parts[0] == "total_bases_utils": stats["total_bases_useful_data"] = parts[1]
if parts[0] == "N50_read_length_utils": stats["N50_read_length_useful_data"] = parts[1]
#print(stats)
return stats
......@@ -135,7 +129,7 @@ class Run_stats (Analysis):
group = 'basic'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["nb_reads", "total_bases", "median_read_length", "N50_read_length", "median_yield_per_sec", "nb_actif_channel"]), group)
self._add_result_element("metrics", "headers", ','.join(["nb_reads", "total_bases", "median_read_length","mean_read_length" , "N50_read_length", "median_yield_per_sec"]), group)
self._add_result_element(sample, "nb_reads", str(stat_info["nb_reads"]),group),
self._add_result_element(sample, "total_bases", str(stat_info["total_bases"]),group),
......@@ -144,38 +138,36 @@ class Run_stats (Analysis):
self._add_result_element(sample, "N50_read_length", str(stat_info["N50_read_length"]),group),
self._add_result_element(sample, "median_yield_per_sec", str(stat_info["median_yield_per_sec"]),group),
self._add_result_element(sample, "mean_yield_per_sec", str(stat_info["mean_yield_per_sec"]),group),
self._add_result_element(sample, "nb_actif_channel", str(stat_info["nb_actif_channel"]),group)
group = 'quality'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["median_read_quality", "nb_read_Q>9", "total_bases_Q>9", "nb_read_utils", "total_bases_utils"]), group)
self._add_result_element("metrics", "headers", ','.join(["median_read_quality","nb_read_useful_data", "total_bases_useful_data", "N50_read_length_useful_data"]), group)
self._add_result_element(sample, "median_read_quality", str(stat_info["median_read_quality"]),group),
self._add_result_element(sample, "mean_read_quality", str(stat_info["mean_read_quality"]),group),
self._add_result_element(sample, "nb_read_Q>7", str(stat_info["nb_read_Q>7"]),group),
self._add_result_element(sample, "total_bases_Q>7", str(stat_info["total_bases_Q>7"]),group),
self._add_result_element(sample, "nb_read_Q>9", str(stat_info["nb_read_Q>9"]),group),
self._add_result_element(sample, "total_bases_Q>9", str(stat_info["total_bases_Q>9"]),group),
self._add_result_element(sample, "nb_read_Q>12", str(stat_info["nb_read_Q>12"]),group),
self._add_result_element(sample, "total_bases_Q>12", str(stat_info["total_bases_Q>12"]),group),
self._add_result_element(sample, "nb_read_utils", str(stat_info["nb_read_utils"]),group),
self._add_result_element(sample, "total_bases_utils", str(stat_info["total_bases_utils"]),group),
self._add_result_element(sample, "nb_read_useful_data", str(stat_info["nb_read_useful_data"]),group),
self._add_result_element(sample, "total_bases_useful_data", str(stat_info["total_bases_useful_data"]),group),
self._add_result_element(sample, "N50_read_length_useful_data", str(stat_info["N50_read_length_useful_data"]),group),
group = 'plots'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["cumulyieldperhour", "outrm_distriblength", "outrm_distribqscore", "outrm_lengthvsqscore_density"]), group)
self._add_result_element("metrics", "headers", ','.join(["cumulate_yield_per_hour", "distribution_length", "distribution_length_useful_data", "distribution_qscore", "length_vs_qscore_density"]), group)
if os.path.isfile(os.path.join(self.output_directory, "plot_cumulyieldperhour.png")):
self._add_result_element(sample, "cumulyieldperhour", self._save_file(os.path.join(self.output_directory, "plot_cumulyieldperhour.png"),
self._add_result_element(sample, "cumulate_yield_per_hour", self._save_file(os.path.join(self.output_directory, "plot_cumulyieldperhour.png"),
sample + ".cumulyieldperhour.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_cumulyieldperhour.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_distriblength.png")):
self._add_result_element(sample, "outrm_distriblength", self._save_file(os.path.join(self.output_directory, "plot_outrm_distriblength.png"),
self._add_result_element(sample, "distribution_length", self._save_file(os.path.join(self.output_directory, "plot_outrm_distriblength.png"),
sample + ".outrm_distriblength.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_distriblength.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_distriblength_utils.png")):
self._add_result_element(sample, "distribution_length_useful_data", self._save_file(os.path.join(self.output_directory, "plot_outrm_distriblength_utils.png"),
sample + ".outrm_distriblength_utils.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_distriblength_utils.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_distribqscore.png")):
self._add_result_element(sample, "outrm_distribqscore", self._save_file(os.path.join(self.output_directory, "plot_outrm_distribqscore.png"),
self._add_result_element(sample, "distribution_qscore", self._save_file(os.path.join(self.output_directory, "plot_outrm_distribqscore.png"),
sample + ".outrm_distribqscore.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_distribqscore.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png")):
self._add_result_element(sample, "outrm_lengthvsqscore_density", self._save_file(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png"),
self._add_result_element(sample, "length_vs_qscore_density", self._save_file(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png"),
sample + ".outrm_lengthvsqscore_density.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png"))
......@@ -186,7 +178,7 @@ class Run_stats (Analysis):
group = 'barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["barcode_score","nb_reads","total_bases","median_read_length","N50_read_length","median_read_quality","nb_read_Q>9","total_bases_Q>9","median_yield_per_sec","nb_actif_channel","nb_read_utils","total_bases_utils"]), group)
self._add_result_element("metrics", "headers", ','.join(["barcode_score","nb_reads","total_bases","median_read_length","mean_read_length", "N50_read_length","median_read_quality","median_yield_per_sec","nb_read_useful_data","total_bases_useful_data","N50_read_length_useful_data"]), group)
self._add_result_element("metrics", "names", ','.join(barcode_names),group)
for barcode in barcode_names :
......@@ -196,26 +188,25 @@ class Run_stats (Analysis):
self._add_result_element(sample, "nb_reads", str(barcode_info[barcode][2]),group),
self._add_result_element(sample, "total_bases", str(barcode_info[barcode][3]),group),
self._add_result_element(sample, "median_read_length", str(barcode_info[barcode][4]),group),
self._add_result_element(sample, "mean_read_length", str(barcode_info[barcode][5]),group),
self._add_result_element(sample, "N50_read_length", str(barcode_info[barcode][6]),group),
self._add_result_element(sample, "median_read_quality", str(barcode_info[barcode][7]),group),
self._add_result_element(sample, "nb_read_Q>9", str(barcode_info[barcode][11]),group),
self._add_result_element(sample, "total_bases_Q>9", str(barcode_info[barcode][12]),group),
self._add_result_element(sample, "median_yield_per_sec", str(barcode_info[barcode][15]),group),
self._add_result_element(sample, "nb_actif_channel", str(barcode_info[barcode][17]),group),
self._add_result_element(sample, "nb_read_utils", str(barcode_info[barcode][18]),group),
self._add_result_element(sample, "total_bases_utils", str(barcode_info[barcode][19]),group),
self._add_result_element(sample, "median_yield_per_sec", str(barcode_info[barcode][9]),group),
self._add_result_element(sample, "nb_read_useful_data", str(barcode_info[barcode][11]),group),
self._add_result_element(sample, "total_bases_useful_data", str(barcode_info[barcode][12]),group),
self._add_result_element(sample, "N50_read_length_useful_data", str(barcode_info[barcode][13]),group),
group = 'plots_barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["qscoreboxplot", "qscorepertimeintervalsboxplot"]), group)
self._add_result_element("metrics", "headers", ','.join(["qscore_boxplot", "qscore_per_time_intervals_boxplot"]), group)
if os.path.isfile(os.path.join(self.output_directory, "plot_barcoded_qscoreboxplot.png")):
self._add_result_element(sample, "qscoreboxplot", self._save_file(os.path.join(self.output_directory, "plot_barcoded_qscoreboxplot.png"),
self._add_result_element(sample, "qscore_boxplot", self._save_file(os.path.join(self.output_directory, "plot_barcoded_qscoreboxplot.png"),
sample + ".barcoded_qscoreboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_barcoded_qscoreboxplot.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_barcoded_qscorepertimeintervalsboxplot.png")):
self._add_result_element(sample, "qscorepertimeintervalsboxplot", self._save_file(os.path.join(self.output_directory, "plot_barcoded_qscorepertimeintervalsboxplot.png"),
self._add_result_element(sample, "qscore_per_time_intervals_boxplot", self._save_file(os.path.join(self.output_directory, "plot_barcoded_qscorepertimeintervalsboxplot.png"),
sample + ".barcoded_qscorepertimeintervalsboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_barcoded_qscorepertimeintervalsboxplot.png"))
......@@ -227,14 +218,14 @@ class Run_stats (Analysis):
#print (self.sequencing_summary_file)
self.add_shell_execution(self.get_exec_path("Rscript") +" /save/sbsuser/scripts-ngs/current/ont/graph_albacoresummary.R " +' -f '+ '$1' +' --out ' + self.output_directory + " 2> " +' $2',
self.add_shell_execution(self.get_exec_path("Rscript") +" /work/sbsuser/test/mmanno/scripts_ngs/graph_albacoresummary.R " +' -f '+ '$1' +' --out ' + self.output_directory + " 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
outputs = self.stderr)
if self.barcoded == "yes" :
self.add_shell_execution(self.get_exec_path("Rscript") +" /save/sbsuser/scripts-ngs/current/ont/graph_albacoresummary_barcode.R " +' -f '+ '$1' +' --out ' + self.output_directory + " 2> " +' $2',
self.add_shell_execution(self.get_exec_path("Rscript") +" /work/sbsuser/test/mmanno/scripts_ngs/graph_albacoresummary_barcode.R " +' -f '+ '$1' +' --out ' + self.output_directory + " 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment