Commit 3eaa012b authored by Frederique Bitton's avatar Frederique Bitton
Browse files

Upload snakefile for parallelization of python script TE_genes_1kb.py

parent 1947a4cd
Pipeline #42457 passed with stage
in 1 minute and 6 seconds
__author__ = "INRAE GAFL"
__license__ = "MIT"
__copyright__ = "INRAE, GAFL 2020"
import os
import sys
import re
import pandas as pd
from snakemake.utils import min_version
# snakemake built-in report function requires min version 5.1
min_version("5.1.0")
#read the sample file using pandas lib (sample names+ fastq names) and create index using the sample name
CHRS = [x.rstrip() for x in open(config["chr_list"], "r").readlines()]
gff_file_ref = config["gff_file_ref"]
gff_file_sub = config["gff_file_sub"]
tmp_dir = config["tmp_dir"]
rule final_outs:
input:
expand('{ref_files}_{chr}.gff',ref_files=[gff_file_ref, gff_file_sub], chr=CHRS),
expand("data/downstream_{chr}.csv",chr=CHRS),
expand("data/upstream_{chr}.csv",chr=CHRS),
expand("data/intersect_{chr}.csv",chr=CHRS),
rule getSplittedByID:
input:
ref_files=[gff_file_ref,gff_file_sub],
output: '{ref_files}_{chr}.gff'
shell: "grep '{wildcards.chr}' {wildcards.ref_files} > {output} || true"
# for var in `cut -f1 {input} | sort -u `; do grep \"$var\" {input} > {output}|| true; done"
rule get_distances:
input:
IA="{gff_file_ref}_{{chr}}.gff".format(gff_file_ref=config["gff_file_ref"]),
IB="{gff_file_sub}_{{chr}}.gff".format(gff_file_sub=config["gff_file_sub"])
output:
down_file = "data/downstream_{chr}.csv",
up_file = "data/upstream_{chr}.csv",
intersect_file = "data/intersect_{chr}.csv",
shell:
"python TE_genes_1kb.py -ia {input.IA} -ib {input.IB} -fa match -fb gene -od {output.down_file} -ou {output.up_file} -oi {output.intersect_file} "
# rule combinebed:
# input:
# output:
# shell:
# """
# cat {input} > {output}
# """
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment