Commit b07b2d1f authored by Celine Noirot's avatar Celine Noirot
Browse files

Add idxstat column name

parent 7bfc4b3a
......@@ -59,7 +59,7 @@ args = parser.parse_args()
# Recovery of idxstats file.
idxstats = pd.read_csv(args.idxstats_file, delimiter='\t', header=None)
idxstats.columns = ["contig","len","mapped","unmapped"]
# Recovery of mosdepth file; remove start/end columns
mosdepth = pd.read_csv(args.mosdepth_file, delimiter='\t', header=None,compression='gzip')
mosdepth.columns = ["contig","start","end","depth"]
......@@ -69,13 +69,15 @@ mosdepth.drop(["start","end"], inplace=True,axis=1)
percontig = pd.read_csv(args.percontig_file, delimiter='\t', dtype=str)
# Merge idxstats and .percontig.tsv files.
merge = pd.merge(idxstats,percontig,left_on=0,right_on='#contig', how='outer')
merge = pd.merge(idxstats,percontig,left_on='contig',right_on='#contig', how='outer')
#add depth
merge = pd.merge(merge,mosdepth,left_on=0,right_on='contig', how='outer')
merge = pd.merge(merge,mosdepth,left_on='contig',right_on='contig', how='outer')
# Group by lineage and sum number of reads and contigs.
res = merge.groupby(['consensus_lineage','consensus_tax_id', 'tax_id_by_level']).agg({0 : [';'.join, 'count'], 2: 'sum', 'depth': 'mean'}).reset_index()
res = merge.groupby(['consensus_lineage','consensus_tax_id', 'tax_id_by_level']).agg({'contig' : [';'.join, 'count'], 'mapped': 'sum', 'depth': 'mean'}).reset_index()
res.columns=['lineage_by_level', 'consensus_tax_id', 'tax_id_by_level', 'name_contigs', 'nb_contigs', 'nb_reads', 'depth']
# Fill the NaN by 0.
res.fillna(0, inplace=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment