Commit 3e38a2d8 authored by Jerome Mariette's avatar Jerome Mariette

First import

parents
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>nG6-project</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/nG6-project/core</path>
</pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>
#!/usr/local/bioinfo/bin/python2.5
from optparse import *
import os
import sys
import numpy as np
from pylab import *
import gzip
try:
from Bio.Sequencing import Ace
except:
print "Import Bio.Sequencing.Ace : Biopython is required to use ace_statistics script !"
try:
import matplotlib.pyplot as plt
except:
print "Import Matplot: Matplot is required to use ace_statistics script !"
"""
AS <number of contigs> <total number of reads in ace file>
CO <contig name> <#bases> <#reads> <#base segments> <U or C>
lines of sequence data
BQ
lines of sequence quality data
AF <read name> <C or U> <padded start consensus position>
BS <start position> <end position> <read name>
RD <read name> <# of padded bases> <# of read info items> <# of read tags>
QA <start> <end> <align start> <align end>
DS CHROMAT_FILE: <name > PHD_FILE: <name > TIME: <date/time phd file>
WR { <tag type> <program > <YYMMDD:HHMMSS> }
RT{ <tag type> <program > < start> <end> <YYMMDD:HHMMSS> }
CT{ <contig name> <tag type> <program> <start> <end> <YYMMDD> (info) }
WA{ <tag type> <program> <YYMMDD:HHMMSS> 1 or more lines of data }
"""
if __name__ == '__main__':
parser = OptionParser(usage="Usage: ace_statistics.py -i FILE -o DIRECTORY")
parser.add_option("-i", "--input", dest="input",
help="The input ace file", metavar="FILE")
parser.add_option("-o", "--output", dest="output",
help="The output directory", metavar="DIRECTORY")
(options, args) = parser.parse_args()
if options.input == None or options.output == None:
parser.print_help()
sys.exit(1)
try :
stat_file =open (os.path.join(options.output, os.path.basename(options.input) + ".stat"), 'wr')
except:
print "erreur dans l'ouverture du fichier "+ str(os.path.join(options.output, os.path.basename(options.input))) + ".stat \n"
exit(1)
try :
if options.input.endswith(".gz"):
acefilerecord = Ace.read(open(gzip.open(options.input), 'r'))
else:
acefilerecord = Ace.read(open(options.input, 'r'))
except :
print "erreur dans l'ouverture du fichier "+ options.input + " \n"
exit(1)
contig_name=[]
contig_len=[]
prof=[]
sum_len_read=[]
nb_read=[]
stat_file.write("#id_contig\tcontig_len\tnb_read\tsum_len_read\n")
for contig in acefilerecord.contigs:
#id_contig contig_len prof sum_len_read nb_read
read_sum=0
for read in contig.reads:
read_sum=read_sum+len(read.rd.sequence)
stat_file.write( contig.name+"\t"+str(contig.nbases)+"\t"+str(contig.nreads)+"\t"+str(read_sum)+"\n")
contig_name.append(contig.name)
contig_len.append(contig.nbases)
sum_len_read.append(read_sum)
nb_read.append(contig.nreads)
prof.append (read_sum/contig.nbases)
stat_file.close
#Longueur des contig
plt.clf()
n, bins, patches = plt.hist(contig_len, np.max(contig_len)/10, normed=0, facecolor='blue')
plt.xlabel('Longueur des contigs')
plt.ylabel('Nombre de contigs')
plt.title('Repartition des contigs selon leur taille')
plt.grid(True)
plt.savefig(os.path.join(options.output, os.path.basename(options.input) + ".length.png"))
#profondeur des contigs
plt.clf()
plot(sum_len_read, contig_len, 'o')
plt.xlabel('Somme de la longueur des lectures')
plt.ylabel('Longueur du contig')
plt.title('Representation de la profondeur des contigs')
plt.grid(True)
savefig(os.path.join(options.output, os.path.basename(options.input) + ".prof.png"))
#contig = ace_gen.next()
summary_file =open (os.path.join(options.output, os.path.basename(options.input) + ".length.summary"), 'wr')
summary_file.write("Nombre de contig : " + str(acefilerecord.ncontigs) + "\n")
summary_file.write("Nombre de lecture : " + str(acefilerecord.nreads ) + "\n")
summary_file.write("Longueur minimale : " + str(np.min(contig_len)) + " pb\n")
summary_file.write("Longueur maximale : " + str(np.max(contig_len)) + " pb\n")
summary_file.write("Longueur mediane :"+ str(round(np.median(contig_len),2))+" pb\n")
summary_file.write("Moyenne des longueurs : " + str(round(np.mean(contig_len),2)) + " pb\n")
summary_file.close
summary_file =open (os.path.join(options.output, os.path.basename(options.input) + ".prof.summary"), 'wr')
summary_file.write("\nProfondeur minimale : " + str(np.min(prof)) + " \n")
summary_file.write("Profondeur maximale : " + str(np.max(prof)) + " \n")
summary_file.write("Profondeur mediane : " + str(round(np.median(prof),2)) + " \n")
summary_file.write("Profondeur moyenne : " + str(round(np.mean(prof),2)) + " \n")
summary_file.close
#!/usr/local/bioinfo/bin/python2.5
from optparse import *
from lib.t3MySQLdb import t3MySQLdb
import datetime
import time
import sys
import os
if __name__ == '__main__':
parser = OptionParser(usage="Usage: add_log.py -f file -k \"blast\" -v \"-p blastn -e 10e-10\"")
parser.add_option("-f", "--file", dest="file",
help="log file", metavar="FILE")
parser.add_option("-k", "--key", dest="key",
help="clee", metavar="FILE")
parser.add_option("-v", "--value", dest="value",
help="parameter", metavar="DIR")
(options, args) = parser.parse_args()
if options.file == None or options.key == None or options.value == None:
parser.print_help()
exit(1)
else:
try :
file = open (options.file, 'a')
except :
print "erreur a l'ouverture du fichier "+ options.file + "\n"
exit(1)
file.write(options.key+" :\t"+options.value+"\n")
file.close
exit(0)
#!/usr/local/bioinfo/bin/python2.5
from optparse import *
import os
import sys
import glob
from lib.t3MySQLdb import t3MySQLdb
import datetime
import time
from shutil import copyfile
from lib.Utils import Utils
from lib.ConfigReader import RunConfigReader
from lib.ConfigReader import NG6ConfigReader
if __name__ == '__main__':
parser = OptionParser(usage="Usage: analyze2ng6.py -i DIRECTORY -c CFG_FILE | -p project_id -t title -d description -s software -o option -v version")
parser.add_option("-i", "--in", dest="input",
help="The analyze output directory", metavar="DIRECTORY")
parser.add_option("-c", "--cfg", dest="cfg",
help="The ng6 config file", metavar="FILE")
parser.add_option("-p", "--project_id", dest="project_id",
help="project id")
parser.add_option("-t", "--title", dest="title",
help="title of the analyze")
parser.add_option("-d", "--description", dest="descr",
help="description of the analyze")
parser.add_option("-s", "--software", dest="soft",
help="software command line")
parser.add_option("-o", "--option", dest="option",
help="parameters software command line")
parser.add_option("-v", "--version", dest="version",
help="version of the software command line")
(options, args) = parser.parse_args()
if options.input == None or options.title == None or ( (options.cfg == None or options.cfg == "None") and (options.project_id == None or options.project_id == "None")):
parser.print_help()
sys.exit(1)
else :
#Gets the output folder
t3mysql = t3MySQLdb()
ng6conf = NG6ConfigReader()
analyze_id = t3mysql.getNextAnalyzeID()
out = Utils.makeAnalyzeDirs(ng6conf.getWorkDirectory(), analyze_id)
html_file = open(out+"/index.html", 'wr')
html_file.write('<div class="tx-nG6-analyze">\n')
for files in glob.glob(options.input+"/*/*/*"):
copyfile(files, out + "/" + os.path.basename(files))
if files.endswith(".png") or files.endswith(".jpg"):
html_file.write('<img src="fileadmin' + Utils.getAnalyzeDirs(analyze_id) + "/" + os.path.basename(files) + '" /><br />\n')
else :
html_file.write('<a href="fileadmin' + Utils.getAnalyzeDirs(analyze_id) + "/" + os.path.basename(files) + '" target="_blank">'+os.path.basename(files)+'</a><br />\n')
html_file.write('</div>\n')
html_file.close()
# Then write it down into the database
# Then write it down into the database
if (options.cfg != None and options.cfg != "None"):
cfg_reader = RunConfigReader(options.cfg)
t3mysql.addAnalyzeToRun(cfg_reader.getRunID(),analyze_id, options.title, options.descr, datetime.date.today(), Utils.getAnalyzeDirs(analyze_id), options.soft, options.version, options.option)
else :
if (options.project_id != None and options.project_id != "None"):
t3mysql.addAnalyzeToProject(options.project_id,analyze_id, options.title, options.descr, datetime.date.today(), Utils.getAnalyzeDirs(analyze_id), options.soft, options.version, options.option)
else :
print "erreur de parametre\n"
sys.exit (2)
sys.exit(0)
#!/usr/local/bioinfo/bin/python2.5
from optparse import *
import sys
import os
from shutil import copyfile
from ConfigParser import RawConfigParser
from lib.ConfigReader import NG6ConfigReader
import datetime
import time
import gzip
from Bio import SeqIO
from lib.t3MySQLdb import t3MySQLdb
from lib.Utils import Utils
def get_nb_sequences_andfull_size(fasta_file):
"""
@param fasta_file : the fasta file path
@return : [nb_sequences, full_seq_size]
"""
nb_seq = 0
full_size = 0
for seq_record in SeqIO.parse(gzip.open(fasta_file), "fasta") :
nb_seq += 1
full_size += len(seq_record)
return [nb_seq, full_size]
if __name__ == '__main__':
parser = OptionParser(usage="Usage: config_run.py")
parser.add_option("-a", "--date", help="The run date in dd/mm/yyyy format",
action="store", type="string", dest="date")
parser.add_option("-b", "--fe_group_parent", help="The project fe group parent name.",
action="store", type="string", dest="fe_group_parent")
parser.add_option("-c", "--cfg", dest="cfg",
help="Where the cfg file should be write down", metavar="FILE")
parser.add_option("-d", "--desc", help="Run description",
action="store", type="string", dest="desc")
parser.add_option("-e", "--name", help="The run name",
action="store", type="string", dest="name")
parser.add_option("-f", "--fasta", dest="fasta",
help="The fasta file (the qual file as to be name fasta_file.qual)", metavar="FILE")
parser.add_option("-g", "--fe_group", help="The project fe group name allowed for this project, if the group doesn't exist, it will be created.",
action="store", type="string", dest="fe_group")
parser.add_option("-i", "--fe_group_desc", help="The project fe group description.",
action="store", type="string", dest="fe_group_description")
parser.add_option("-j", "--fasta_control", dest="fasta_control",
help="The control fasta file", metavar="FILE")
parser.add_option("-k", "--qual_control", dest="qual_control",
help="The control qual file", metavar="FILE")
parser.add_option("-l", "--fasta_control_output", dest="fasta_control_output",
help="The control fasta list", metavar="FILE")
parser.add_option("-m", "--qual_control_output", dest="qual_control_output",
help="The control qual list", metavar="FILE")
parser.add_option("-n", "--data_nature", help="The nature of the data",
action="store", type="string", dest="data_nature")
parser.add_option("-p", "--project_desc", help="The project description the run belongs to",
action="store", type="string", dest="project_desc")
parser.add_option("-q", "--sequencer", help="The sequencer name used",
action="store", type="string", dest="sequencer")
parser.add_option("-r", "--project_name", help="The project name the run belongs to. If set, the project description and fe group will be ignored, if not a brand new project will be created.",
action="store", type="string", dest="project_name")
parser.add_option("-s", "--species", help="The species used for this run",
action="store", type="string", dest="species")
parser.add_option("-t", "--type", help="The type of run done",
action="store", type="string", dest="type")
parser.add_option("-u", "--qual_list_output", dest="qual_list_output",
help="The qual list", metavar="FILE")
parser.add_option("-o", "--fasta_list_output", dest="fasta_list_output",
help="The fasta list", metavar="FILE")
parser.add_option("-v", "--raw_list_output", dest="raw_list_output",
help="The raw list", metavar="FILE")
parser.add_option("-w", "--raw", dest="raw",
help="The raw file given by the sequencer", metavar="FILE")
parser.add_option("-x", "--mids-description", dest="mids_description",
help="The MIDS description file", metavar="FILE")
(options, args) = parser.parse_args()
if options.fasta != None and options.raw != None:
t3mysql = t3MySQLdb()
config = RawConfigParser()
ng6conf = NG6ConfigReader()
#Let's get the new run_id
run_id = t3mysql.getNextRunID()
#Copy qual and fasta file to the run directory
[out, control_out] = Utils.makeRunDirs(ng6conf.getWorkDirectory(), run_id)
saved_fasta = ""
if os.path.isfile(options.fasta):
saved_fasta = Utils.gzip(options.fasta, out)
else :
print "Error when copying file " + str(options.fasta) + " : file doesn't exists!"
sys.exit(1)
qual_file = os.path.splitext(options.fasta)[0]+".qual"
saved_qual = ""
if os.path.isfile(qual_file):
saved_qual = Utils.gzip(qual_file, out)
else :
print "Error when copying file " + str(qual_file) + " : file doesn't exists!"
sys.exit(1)
# Copy the raw file
saved_raw = ""
if os.path.isfile(options.raw):
saved_raw = Utils.gzip(options.raw, out)
else :
print "Error when copying file " + str(options.raw) + " : file doesn't exists!"
sys.exit(1)
# Copy the control files if exists
if os.path.isfile(options.fasta_control) and os.path.isfile(options.qual_control):
saved_fasta_control = Utils.gzip(options.fasta_control, control_out)
saved_qual_control = Utils.gzip(options.qual_control, control_out)
else :
saved_fasta_control = ""
saved_qual_control = ""
file = open(options.fasta_list_output, "w")
file.write(options.fasta+"\n")
file.close()
file = open(options.qual_list_output, "w")
file.write(qual_file+"\n")
file.close()
file = open(options.raw_list_output, "w")
file.write(options.raw+"\n")
file.close()
file = open(options.fasta_control_output, "w")
file.write(saved_fasta_control+"\n")
file.close()
file = open(options.qual_control_output, "w")
file.write(saved_qual_control+"\n")
file.close()
#Then creates and stores the cfg file
fe_group = options.fe_group
fe_group_description = options.fe_group_description
pdescription = options.project_desc
pname = options.project_name
fe_group_parent = options.fe_group_parent
project_id = t3mysql.addProject(pname, pdescription, fe_group, fe_group_description, fe_group_parent)
config.add_section("Run")
config.set("Run", "run_id", run_id)
config.set("Run", "name", options.name)
config.set("Run", "directory", Utils.getRunDirs(run_id))
config.set("Run", "fasta", os.path.basename(saved_fasta))
config.set("Run", "qual", os.path.basename(saved_qual))
config.set("Run", "fasta_control", os.path.basename(saved_fasta_control))
config.set("Run", "qual_control", os.path.basename(saved_qual_control))
config.set("Run", "raw", os.path.basename(saved_raw))
config.set("Run", "date", options.date)
config.set("Run", "species", options.species)
config.set("Run", "data_nature", options.data_nature)
config.set("Run", "type", options.type)
config.set("Run", "description", options.desc)
config.set("Run", "sequencer", options.sequencer)
# If it's a run with some mids, let's write down mids description
if os.path.isfile(options.mids_description):
config.add_section("MIDS")
for line in open(options.mids_description).readlines():
vals = line.rstrip().split()
try :
config.set("MIDS", vals[0], vals[1])
except :
pass
config.write(open(out + "/run.cfg",'w'))
config.write(open(options.cfg,'w'))
#Finaly write it down into the data base
[nb_sequences, full_seq_size] = get_nb_sequences_andfull_size(saved_fasta)
date = options.date.split("/")
t3mysql.addRunToProject(project_id, run_id, options.name, datetime.date(int(date[2]), int(date[1]), int(date[0])), Utils.getRunDirs(run_id), options.species, options.data_nature, options.type, nb_sequences, full_seq_size, options.desc, options.sequencer)
sys.exit(0)
else:
parser.print_help()
sys.exit(1)
\ No newline at end of file
#!/usr/local/bioinfo/bin/python2.5
from optparse import *
from lib.t3MySQLdb import t3MySQLdb
import datetime
import time
import sys, glob, os, re
from lib.ConfigReader import RunConfigReader
from lib.Utils import Utils
from lib.ConfigReader import NG6ConfigReader
from tempfile import NamedTemporaryFile
from shutil import copyfile
def compteNbHitDb (filename):
fd = open(filename, 'r')
lu = fd.read()
n = lu.count('# BLASTN ')
try:
m = re.search("# Database: (.*)", lu)
db=m.group(1)
except :
db=""
fd.close()
return [n,db]
if __name__ == '__main__':
parser = OptionParser(usage="Usage: Contamination2ng6.py -c cfg -f fof | -d directory")
parser.add_option("-c", "--cfg", dest="cfg",
help="configuration file", metavar="FILE")
parser.add_option("-f", "--fof", dest="fof",
help="file of contamination blast file", metavar="FILE")
parser.add_option("-d", "--dir", dest="dir",
help="directory of all contamination analysis without token", metavar="DIR")
(options, args) = parser.parse_args()
if options.cfg == None or ((options.fof == None or options.fof == "") and (options.dir == None or options.dir =="")):
parser.print_help()
exit(1)
else:
try :
cfg_reader = RunConfigReader(options.cfg)
except :
print "erreur a l'ouverture du fichier "+ options.cfg + "\n"
exit(1)
ng6conf = NG6ConfigReader()
#Gets the output folder
t3mysql = t3MySQLdb()
analyze_id = t3mysql.getNextAnalyzeID()
out = Utils.makeAnalyzeDirs(ng6conf.getWorkDirectory(), analyze_id)
#Write it down to the database
new_list=NamedTemporaryFile(mode='w+b', prefix='contaminationFof')
files=[]
group_dir={}
if ( options.dir != None and options.dir !="" ) :
#/work/ng6/ergatis/output_repository/contamination_search/29_yeast/i1/g12/454Reads.MID8.sff_extract.contamination_search.yeast.m8
for file in glob.glob(options.dir+"*/*/*/*.m8"):
files.append(file)
group_dir[os.path.split(os.path.dirname(file))[1]] = 1
else :
try :
fr = open(options.fof, 'r')
files = fr.readlines()
group_dir[os.path.basename(os.path.dirname(files[0]))] = 1
fr.close
except :
print "Erreur durant l\'analyse des fichiers de contamination<br>\n "
exit(1)
filename=os.path.join(out,"index.html")
try :
f = open( filename , 'wr')
f.write( "<div class='tx-nG6-analyze'>\n" )
except :
print "Erreur lors de l'ecriture dans " + filename
exit(1)
if (len (group_dir.keys()) > 1) :
#plusieurs iterations
# tuple correspondant au nom explicite des mids
mid_names = {}
mid_names = cfg_reader.getMIDSnames()
results={}
for file in files :
group_name = os.path.split(os.path.dirname(file))[1]
mid_tag =os.path.basename(file).split(".")[1]
copyfile(file, os.path.join(out, os.path.basename(file)))
[nbcontamination,db]=compteNbHitDb(file)
db=file.split(".")[-2]
if not results.has_key(mid_tag):
results[mid_tag]={}
results[mid_tag][db]=[nbcontamination, file]
### Affichage des options
onekey=results.keys()[0]
for bank in results[onekey] :
dir=os.path.dirname(results[onekey][bank][1])
for file in glob.glob(dir+"/*.log"):
f.write("<div class='tx-nG6-analyze-parameters'>Options utilis&eacute;es contre la banque : <b>"+ bank+ "</b><br/>")
log=open(file,'r')
for line in log.readlines():
f.write(line + "<br/>")
log.close
f.write("</div><br/>")
### Affichage des resultats
msg="Contamination pour chaque MIDS : <ul>"
for mid_tag in Utils.sortMids(results.keys()):
midmsg=""
for bank in results[mid_tag].keys() :
if results[mid_tag][bank][0]>0 :
midmsg += "" + str(results[mid_tag][bank][0]) + "</b> contamination(s) contre la banque <b>" + bank +"</b> : "
filebasename=os.path.basename(results[mid_tag][bank][1])
midmsg += '<a href="fileadmin' + Utils.getAnalyzeDirs(analyze_id) + "/" + str(filebasename) + '" target="_blank">'+ str(filebasename) +'</a>\n'
if midmsg == "" :
midmsg += "Pas de contamination"
title=""
if mid_names.has_key(mid_tag) :
title= mid_names[mid_tag] + " (" + mid_tag + ")"
else :
title= mid_tag
msg += "<li>" + title + " : " + midmsg + "</li>"
msg +="</ul>"
f.write( msg )
else :
for file in files:
file=file.rstrip()
if os.path.isfile(file) :