Commit 97a9b14f authored by Penom Nom's avatar Penom Nom

No commit message

No commit message
parent 496e0995
#
# Copyright (C) 2009 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__author__ = 'Plateforme bioinformatique Midi Pyrenees'
__copyright__ = 'Copyright (C) 2009 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0'
__email__ = 'support.genopole@toulouse.inra.fr'
__status__ = 'beta'
from optparse import *
import os, sys, re, datetime
import shutil
CHARACTHERS_TO_REPLACE = {"__": "_", "--":"-", "_.": "."}
def replace_characthers(string):
"""
Replace double characters by one
Considere only characthers from the CHARACTHERS_TO_REPLACE table
@param string: the string to modify
"""
for char in CHARACTHERS_TO_REPLACE.keys():
while True:
new_string = string.replace(char, CHARACTHERS_TO_REPLACE[char])
if new_string == string:
break
else :
string = new_string
return string
if __name__ == '__main__':
parser = OptionParser(usage="Usage: %prog")
igroup = OptionGroup(parser, "options","")
igroup.add_option("-r", "--iterator-file", help="The iterator result file.", dest="iterator_file", type="string")
igroup.add_option("-d", "--database", help="The database name used.", dest="database", type="string")
igroup.add_option("-p", "--processed-file-list", help="The list of raw file processed.", dest="processed_file_list", type="string")
igroup.add_option("-c", "--concat-regexp", help="The concat regexp to use to merge (ex for casava1_8 outputs, use *_*_*_*_(*).fastq.gz).", dest="concat_regexp", type="string")
igroup.add_option("-e", "--pe-regexp", help="The pe regexp to use (ex for casava1_8 outputs, use *_*_*_(*)_*.fastq.gz).", dest="pe_regexp", type="string")
igroup.add_option("-o", "--output-iterator-list", help="Where the output iterator list should be saved.", dest="output_list", type="string")
parser.add_option_group(igroup)
(options, args) = parser.parse_args()
if options.concat_regexp:
options.concat_regexp = options.concat_regexp.replace("*", ".*")
if options.pe_regexp:
options.pe_regexp = options.pe_regexp.replace("*", ".*")
files = {}
for line in open(options.iterator_file).readlines():
line = line.strip()
if not line.startswith("$;I_FILE_BASE$;"):
parts = line.split()
# If a pairend regexp was provided, test it
if options.concat_regexp:
m = re.search(options.concat_regexp, os.path.basename(parts[2]))
try:
filew = os.path.basename(parts[2])[:m.start(1)]+os.path.basename(parts[2])[m.end(1):]
m2 = re.search(options.pe_regexp, filew)
filew = filew[:m2.start(1)]+filew[m2.end(1):]
except:
filew = os.path.basename(parts[2])
final_name = parts[4] + ".bam"
if files.has_key(filew):
files[filew].append(final_name)
else :
files[filew] = [final_name]
else:
files[os.path.basename(parts[2])] = [parts[4] + ".bam"]
outf = open(options.output_list, "w")
outf.write("$;PREFIX$;\t$;BAM_LIST$;\t$;DATABASE_PATH$;\t$;DATABASE_BASENAME$;\n")
d_basename = os.path.splitext(os.path.basename(options.database))[0]
for file in files.keys():
if options.processed_file_list:
list = ""
for f in files[file]:
for pf in open(options.processed_file_list).readlines():
pf = pf.strip()
if os.path.basename(pf) == f:
list += pf + " "
break
else:
list = ""
for f in files[file]:
list += f + " "
outf.write(os.path.splitext(replace_characthers(file))[0] + "\t" + list + "\t" + options.database + "\t" + d_basename + "\n")
sys.exit(0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment