Commit fef2c774 authored by Celine Noirot's avatar Celine Noirot
Browse files

Merge branch 'dev' of forgemia.inra.fr:genotoul-bioinfo/metagwgs into dev

parents 5add8d6e 58ed058e
Pipeline #46877 passed with stages
in 15 minutes and 25 seconds
...@@ -511,10 +511,10 @@ def main(): ...@@ -511,10 +511,10 @@ def main():
accessions_unfound_in_mapping_prot = accessions_unfound_in_mapping & hit_accessions accessions_unfound_in_mapping_prot = accessions_unfound_in_mapping & hit_accessions
if acessions_with_unfound_taxid_prot: if acessions_with_unfound_taxid_prot:
outdisc.write(f"{protein_id}\tNo taxid in taxdump\t{','.join(acessions_with_unfound_taxid_prot)}\n") outdisc.write(f"{protein_id}\tNo taxid in taxdump\t{','.join(sorted(acessions_with_unfound_taxid_prot))}\n")
if accessions_unfound_in_mapping_prot: if accessions_unfound_in_mapping_prot:
outdisc.write(f"{protein_id}\tNo protid correspondance file\t{','.join(accessions_unfound_in_mapping_prot)}\n") outdisc.write(f"{protein_id}\tNo protid correspondance file\t{','.join(sorted(accessions_unfound_in_mapping_prot))}\n")
consensual_contig_taxid = get_taxid_consensus(contig_collate_hits, main_ranks) consensual_contig_taxid = get_taxid_consensus(contig_collate_hits, main_ranks)
count_rank_affiliation_contig[taxid2rank[consensual_contig_taxid]] += 1 count_rank_affiliation_contig[taxid2rank[consensual_contig_taxid]] += 1
......
...@@ -26,18 +26,18 @@ process GLOBAL_CD_HIT { ...@@ -26,18 +26,18 @@ process GLOBAL_CD_HIT {
label 'CD_HIT' label 'CD_HIT'
input: input:
path "*.fasta" path cluster_fasta
val pct_id val pct_id
output: output:
path "All-cd-hit-est.${pct_id}.fasta" path "All-cd-hit-est.${pct_id}.fasta"
path "table_clstr.txt", emit: clstr_table path "table_clstr.txt", emit: clstr_table
// when: ('06_func_annot' in step)
script: script:
""" """
cat * > All-cd-hit-est.${pct_id} # *fasta is important to get the correct order
cat *.fasta > All-cd-hit-est.${pct_id}
cd-hit-est -c ${pct_id} -i All-cd-hit-est.${pct_id} -o All-cd-hit-est.${pct_id}.fasta -T ${task.cpus} -M ${task.mem} -d 150 cd-hit-est -c ${pct_id} -i All-cd-hit-est.${pct_id} -o All-cd-hit-est.${pct_id}.fasta -T ${task.cpus} -M ${task.mem} -d 150
cat All-cd-hit-est.${pct_id}.fasta.clstr | cd_hit_produce_table_clstr.py > table_clstr.txt cat All-cd-hit-est.${pct_id}.fasta.clstr | cd_hit_produce_table_clstr.py > table_clstr.txt
""" """
...@@ -53,8 +53,10 @@ ch_percentage_identity // channel: val ...@@ -53,8 +53,10 @@ ch_percentage_identity // channel: val
main: main:
INDIVIDUAL_CD_HIT( ch_assembly, ch_percentage_identity ) INDIVIDUAL_CD_HIT( ch_assembly, ch_percentage_identity )
GLOBAL_CD_HIT( INDIVIDUAL_CD_HIT.out.clstr_fasta.collect(), ch_percentage_identity ) ch_individual_clusters = INDIVIDUAL_CD_HIT.out.clstr_fasta.collect()
GLOBAL_CD_HIT(ch_individual_clusters , ch_percentage_identity )
emit: emit:
individual_clstr_table = INDIVIDUAL_CD_HIT.out.clstr_table individual_clstr_table = INDIVIDUAL_CD_HIT.out.clstr_table
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment