Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
ng6
Commits
dc70596f
Commit
dc70596f
authored
Jun 09, 2015
by
Penom Nom
Browse files
correction import highchart. code cleaning
parent
9b7ddd8c
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/ng6/ng6workflow.py
View file @
dc70596f
...
...
@@ -22,6 +22,7 @@ import sys
import
pickle
import
datetime
import
argparse
import
xml.etree.ElementTree
as
ET
from
jflow.workflow
import
Workflow
from
jflow.utils
import
display_error_message
...
...
@@ -240,31 +241,62 @@ class NG6Workflow (BasicNG6Workflow):
elif
self
.
project
:
self
.
project
.
sync
()
def
get_files_from_casava
(
casava_directory
,
lane_number
,
project_name
):
def
get_files_from_casava
(
casava_directory
,
project_name
,
lane_number
):
"""
Retrieve all fastq files of a specific project and lane number from a given casava directory
@param casava_directory : path to CASAVA output directory
@param project_name : project name
@param lane_number : lane number
"""
files
=
[]
with
open
(
os
.
path
.
join
(
casava_directory
,
"SampleSheet.mk"
))
as
fh
:
barcodes_list
=
[]
sample_ids_list
=
[]
subdirs_list
=
[]
for
line
in
fh
:
if
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_SUBDIRS"
):
parts
=
line
.
strip
().
split
(
":="
)
subdirs_list
=
parts
[
1
].
split
(
" "
)
# parse samples
for
subdir
in
subdirs_list
:
# filter on project name
if
re
.
match
(
"Project_"
+
project_name
+
"/Sample_.+"
,
subdir
)
or
subdir
.
startswith
(
"Undetermined_indices"
):
for
file
in
os
.
listdir
(
casava_directory
+
"/"
+
subdir
):
filepath
=
casava_directory
+
"/"
+
subdir
+
"/"
+
file
if
file
.
endswith
(
".fastq.gz"
)
and
re
.
search
(
".*_L00"
+
str
(
lane_number
)
+
"_.*"
,
file
):
files
.
append
(
filepath
);
return
files
def
bcl2fastq_18
(
directory
,
pname
,
lane
):
"""bcl2fastq <= 1.8"""
files
=
[]
with
open
(
os
.
path
.
join
(
directory
,
"SampleSheet.mk"
))
as
fh
:
subdirs_list
=
[]
for
line
in
fh
:
if
line
.
startswith
(
"l"
+
str
(
lane
)
+
"_SUBDIRS"
):
parts
=
line
.
strip
().
split
(
":="
)
subdirs_list
=
parts
[
1
].
split
(
" "
)
# parse samples
for
subdir
in
subdirs_list
:
# filter on project name
if
re
.
match
(
"Project_"
+
pname
+
"/Sample_.+"
,
subdir
)
or
subdir
.
startswith
(
"Undetermined_indices"
):
for
file
in
os
.
listdir
(
directory
+
"/"
+
subdir
):
filepath
=
directory
+
"/"
+
subdir
+
"/"
+
file
if
file
.
endswith
(
".fastq.gz"
)
and
re
.
search
(
".*_L00"
+
str
(
lane
)
+
"_.*"
,
file
):
files
.
append
(
filepath
);
return
files
def
bcl2fastq_216
(
directory
,
pname
,
lane
):
"""bcl2fastq >= 1.9"""
files
=
[]
tree
=
ET
.
parse
(
os
.
path
.
join
(
directory
,
'Stats'
,
'DemultiplexingStats.xml'
))
root
=
tree
.
getroot
()
project
=
root
.
find
(
".//Project[@name='%s']"
%
pname
)
if
project
is
not
None
:
project_files
=
os
.
listdir
(
directory
+
"/"
+
pname
)
for
sample
in
project
.
findall
(
"./Sample"
)
:
if
sample
.
get
(
'name'
)
!=
'all'
:
for
barcode
in
sample
.
findall
(
'./Barcode'
):
if
barcode
.
get
(
'name'
)
!=
'all'
:
lnum
=
int
(
barcode
.
find
(
'Lane'
).
get
(
'number'
))
if
lnum
==
lane
:
fileregexp
=
'%s_S\d_L%03d_'
%
(
sample
.
get
(
'name'
),
lnum
)
for
pfile
in
project_files
:
if
re
.
match
(
fileregexp
,
pfile
)
:
files
.
append
(
os
.
path
.
join
(
directory
,
pname
,
pfile
))
return
files
if
os
.
path
.
exists
(
os
.
path
.
join
(
casava_directory
,
"SampleSheet.mk"
))
:
return
bcl2fastq_18
(
casava_directory
,
project_name
,
lane_number
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
casava_directory
,
'Stats'
,
'DemultiplexingStats.xml'
))
:
return
bcl2fastq_216
(
casava_directory
,
project_name
,
lane_number
)
class
CasavaNG6Workflow
(
NG6Workflow
):
...
...
@@ -299,7 +331,7 @@ class CasavaNG6Workflow(NG6Workflow):
def
__create_samples__
(
self
):
"""
Pars
e
S
ample
Sheep.mk file
from a casava directory
and return a list of Sample object
Creat
e
s
ample
s object
from a casava directory
if provided
@param casava_directory : path to CASAVA output directory
@param lane_number : files in each sample are sequenced on this lane
"""
...
...
@@ -310,72 +342,23 @@ class CasavaNG6Workflow(NG6Workflow):
lane_number
=
self
.
casava
[
"lane"
]
all_samples
,
all_samples_id
=
[],
[]
# open casava samplesheet again to associate our files with a sample
with
open
(
os
.
path
.
join
(
casava_directory
,
"SampleSheet.mk"
))
as
fh
:
barcodes_list
=
[]
sample_ids_list
=
[]
subdirs_list
=
[]
for
line
in
fh
:
if
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_BARCODES"
):
parts
=
line
.
strip
().
split
(
":="
)
barcodes_list
=
[
re
.
sub
(
r
"[-_\s]+"
,
""
,
x
)
for
x
in
parts
[
1
].
split
()
]
elif
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_SAMPLEIDS"
):
parts
=
line
.
strip
().
split
(
":="
)
sample_ids_list
=
parts
[
1
].
split
(
" "
)
elif
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_SUBDIRS"
):
parts
=
line
.
strip
().
split
(
":="
)
subdirs_list
=
parts
[
1
].
split
(
" "
)
assert
len
(
barcodes_list
)
==
len
(
sample_ids_list
)
==
len
(
subdirs_list
),
"Invalid lane {0} in SampleSheet.mk"
.
format
(
lane_number
)
# get the casava project_name
if
self
.
casava
[
"project"
]
:
project_name
=
self
.
casava
[
"project"
]
else
:
project_name
=
self
.
project_name
project_name
=
project_name
.
replace
(
" "
,
"_"
)
input_files
=
casava_directory
.
get_files
(
lane_number
,
project_name
)
if
len
(
input_files
)
==
0
:
raise
Exception
(
"Error while parsing casava directory %s, invalid project name '%s' for lane %s"
%
(
casava_directory
,
project_name
,
lane_number
))
# parse samples
for
i
in
range
(
len
(
barcodes_list
)):
sample
=
{
'barcode'
:
barcodes_list
[
i
],
'sample_id'
:
sample_ids_list
[
i
],
'subdir'
:
subdirs_list
[
i
],
'reads1'
:
[],
'reads2'
:
[]
}
# filter on project name
if
re
.
match
(
"Project_"
+
project_name
+
"/Sample_.+"
,
sample
[
'subdir'
])
or
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
for
file
in
os
.
listdir
(
casava_directory
+
"/"
+
sample
[
'subdir'
]):
filepath
=
casava_directory
+
"/"
+
sample
[
'subdir'
]
+
"/"
+
file
if
file
.
endswith
(
".fastq.gz"
)
and
re
.
search
(
".*_L00"
+
str
(
lane_number
)
+
"_.*"
,
file
):
for
idx
,
iofile
in
enumerate
(
input_files
)
:
if
iofile
==
filepath
:
if
re
.
search
(
".*_R1_.*"
,
file
):
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
sample
[
'reads1'
].
append
(
iofile
)
else
:
self
.
undetermined_reads1
.
append
(
iofile
)
if
re
.
search
(
".*_R2_.*"
,
file
):
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
sample
[
'reads2'
].
append
(
iofile
)
else
:
self
.
undetermined_reads2
.
append
(
iofile
)
input_files
.
pop
(
idx
)
break
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
)
:
sp_object
=
Sample
(
sample
[
'barcode'
],
sample
[
'reads1'
],
reads2
=
sample
[
'reads2'
],
name
=
sample
[
'sample_id'
])
sp_object
.
add_metadata
(
'barcode'
,
sample
[
'barcode'
])
sp_object
.
add_metadata
(
'is_casava'
,
True
)
all_samples
.
append
(
sp_object
)
all_samples_id
.
append
(
sample
[
'sample_id'
])
# get the casava project_name
if
self
.
casava
[
"project"
]
:
project_name
=
self
.
casava
[
"project"
]
else
:
project_name
=
self
.
project_name
project_name
=
project_name
.
replace
(
" "
,
"_"
)
input_files
=
casava_directory
.
get_files
(
project_name
,
lane_number
)
if
len
(
input_files
)
==
0
:
raise
Exception
(
"Error while parsing casava directory %s, invalid project name '%s' for lane %s"
%
(
casava_directory
,
project_name
,
lane_number
))
all_samples
,
all_samples_id
=
[],
[]
if
os
.
path
.
exists
(
os
.
path
.
join
(
casava_directory
,
"SampleSheet.mk"
))
:
all_samples
,
all_samples_id
=
self
.
_process_casava_18
(
casava_directory
,
project_name
,
lane_number
,
input_files
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
casava_directory
,
'Stats'
,
'DemultiplexingStats.xml'
))
:
all_samples
,
all_samples_id
=
self
.
_process_casava_216
(
casava_directory
,
project_name
,
lane_number
,
input_files
)
selected_samples
=
self
.
casava
[
'select_sample_id'
]
if
selected_samples
:
...
...
@@ -395,6 +378,84 @@ class CasavaNG6Workflow(NG6Workflow):
NG6Workflow
.
__preprocess_samples__
(
self
)
if
self
.
is_casava
:
self
.
group_prefix
=
(
Utils
.
get_group_basenames
(
self
.
get_all_reads
(),
"read"
)).
keys
()
def
_process_casava_18
(
self
,
casava_directory
,
project_name
,
lane_number
,
input_files
):
"""
Creates samples from casavadir (<=1.8) using input files
@param casava_directory:
@param project_name:
@param lane_number:
@param input_files:
"""
all_samples
=
[]
all_samples_id
=
[]
# open casava samplesheet again to associate our files with a sample
with
open
(
os
.
path
.
join
(
casava_directory
,
"SampleSheet.mk"
))
as
fh
:
barcodes_list
=
[]
sample_ids_list
=
[]
subdirs_list
=
[]
for
line
in
fh
:
if
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_BARCODES"
):
parts
=
line
.
strip
().
split
(
":="
)
barcodes_list
=
[
re
.
sub
(
r
"[-_\s]+"
,
""
,
x
)
for
x
in
parts
[
1
].
split
()
]
elif
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_SAMPLEIDS"
):
parts
=
line
.
strip
().
split
(
":="
)
sample_ids_list
=
parts
[
1
].
split
(
" "
)
elif
line
.
startswith
(
"l"
+
str
(
lane_number
)
+
"_SUBDIRS"
):
parts
=
line
.
strip
().
split
(
":="
)
subdirs_list
=
parts
[
1
].
split
(
" "
)
assert
len
(
barcodes_list
)
==
len
(
sample_ids_list
)
==
len
(
subdirs_list
),
"Invalid lane {0} in SampleSheet.mk"
.
format
(
lane_number
)
# parse samples
for
i
in
range
(
len
(
barcodes_list
)):
sample
=
{
'barcode'
:
barcodes_list
[
i
],
'sample_id'
:
sample_ids_list
[
i
],
'subdir'
:
subdirs_list
[
i
],
'reads1'
:
[],
'reads2'
:
[]
}
# filter on project name
if
re
.
match
(
"Project_"
+
project_name
+
"/Sample_.+"
,
sample
[
'subdir'
])
or
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
for
file
in
os
.
listdir
(
casava_directory
+
"/"
+
sample
[
'subdir'
]):
filepath
=
casava_directory
+
"/"
+
sample
[
'subdir'
]
+
"/"
+
file
if
file
.
endswith
(
".fastq.gz"
)
and
re
.
search
(
".*_L00"
+
str
(
lane_number
)
+
"_.*"
,
file
):
for
idx
,
iofile
in
enumerate
(
input_files
)
:
if
iofile
==
filepath
:
if
re
.
search
(
".*_R1_.*"
,
file
):
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
sample
[
'reads1'
].
append
(
iofile
)
else
:
self
.
undetermined_reads1
.
append
(
iofile
)
if
re
.
search
(
".*_R2_.*"
,
file
):
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
):
sample
[
'reads2'
].
append
(
iofile
)
else
:
self
.
undetermined_reads2
.
append
(
iofile
)
input_files
.
pop
(
idx
)
break
if
not
sample
[
'subdir'
].
startswith
(
"Undetermined_indices"
)
:
sp_object
=
Sample
(
sample
[
'barcode'
],
sample
[
'reads1'
],
reads2
=
sample
[
'reads2'
],
name
=
sample
[
'sample_id'
])
sp_object
.
add_metadata
(
'barcode'
,
sample
[
'barcode'
])
sp_object
.
add_metadata
(
'is_casava'
,
True
)
all_samples
.
append
(
sp_object
)
all_samples_id
.
append
(
sample
[
'sample_id'
])
return
all_samples
,
all_samples_id
def
_process_casava_216
(
self
,
casava_directory
,
project_name
,
lane_number
,
input_files
):
"""
Creates samples from casavadir (>=1.9) using input files
@param casava_directory:
@param project_name:
@param lane_number:
@param input_files:
"""
raise
NotImplementedError
def
illumina_process
(
self
):
if
self
.
is_casava
:
...
...
ui/nG6/pi6/class.tx_nG6_pi6.php
View file @
dc70596f
...
...
@@ -60,9 +60,9 @@ class tx_nG6_pi6 extends tslib_pibase {
// Add the jquery libs + the tree plugins and its css
$GLOBALS
[
'TSFE'
]
->
additionalHeaderData
[
$this
->
prefixId
]
=
'
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/jquery.min.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/highstock.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/jquery.highcharts.exporting.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/jquery.dataTables.min.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/highstock.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/bootstrap.min.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/dataTables.bootstrap.js"></script>
<script type="text/javascript" src="'
.
t3lib_extMgm
::
siteRelPath
(
$this
->
extKey
)
.
'res/js/typeahead.bundle.js"></script>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment