Commit eb8c3b9d authored by Claire Hoede's avatar Claire Hoede
Browse files

SNP glycosylation annotation in eucaryotes

parent cf756fbb
#!/usr/bin/perl -w
=pod
=head1 NAME
extract_netcglyc_results.pl
=head1 SYNOPSIS
extract_netcglyc_results.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>
=head1 OPTIONS
--infile string, the path to the protein sequence query file - !!!the sequence name should be the short name!!!
--encodingfile string, the path of the file containing the encoded protein names (long name "..._allele1" <-> short name "_seq")
--outputfile string, the path of the output file for parsed results
--delta string, value between 0 and 1. Discriminant for the score comparison. (0.5 per default)
=head1 DESCRIPTION
extract_netcglyc_results.pl - This program is part of a pipeline of programs for SNP annotation.
It uses a program called netcglyc and predicts C-mannosylation sites in mammalian protein on amino acid:
http://www.cbs.dtu.dk/services/NetCGlyc/output.php
if the 2 alleles of a protein show a different signal then there is loss or gain of signal.
if allele1 has a prediction and allele2 doesn't, then there is loss of signal.
if allele2 has a prediction and allele1 doesn't, then there is gain of signal.
Moreover, the delta of the score between allele1 and 2 is measured. If the delta is > to the delta set by
the user, then loss? or gain? are retrieved.
=head1 DATE
20/02/2012
=head1 AUTHORS
Sabrina Rodriguez
Johann Beghain
=cut
use strict;
# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
#~ use lib '/usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/lib';
use Getopt::Long;
use Pod::Usage;
use formatAlleleSeq;
use runnetcglyc;
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netcglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/list_snps_coded_seq5_3.SR_netcglyc.res --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/ncglyc_test_results.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netcglyc_results.pl --infile /home/sigenae/work/Sabrina/TEST_suite/output.netcglyc --delta 0.5 --encodingfile /work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/94_default/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/TEST_suite/heho.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netcglyc_results.pl --infile /home/sigenae/work/Sabrina/horse_outputs/netcglyc_res.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/horse_outputs/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/horse_outputs/heho.txt
############################ OPTIONS / PARAMETERS ############################
my @getopt_args = (
'-infile=s' ,
'-encodingfile=s' ,
'-outputfile=s' ,
'-delta=s'
);
my %options = ();
unless ( GetOptions( \%options, @getopt_args ) ) {
usage();
}
sub usage {
exec "pod2text $0";
exit( 1 );
}
usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'encodingfile'} );
usage() if ( !exists $options{'outputfile'} );
usage() if ( !exists $options{'delta'} );
############################ PROGRAM ############################
my $encodingfile = $options{'encodingfile'};
my $infile = $options{'infile'};
my $outputfile = $options{'outputfile'};
my $delta = $options{'delta'};
netcglyc_extract_results($delta,$infile,$encodingfile,$outputfile);
#!/usr/bin/perl -w
=pod
=head1 NAME
extract_netcglyn_results.pl
=head1 SYNOPSIS
extract_netcglyn_results.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>
=head1 OPTIONS
--infile string, the path to the protein sequence query file - !!!the sequence name should be the short name!!!
--encodingfile string, the path of the file containing the encoded protein names (long name "..._allele1" <-> short name "_seq")
--outputfile string, the path of the output file for parsed results
--delta string, value between 0 and 1. Discriminant for the score comparison. (0.5 per default)
--base_name string, "base name" for sequence shorter name version (_seq)
=head1 DESCRIPTION
extract_netcglyn_results.pl - This program is part of a pipeline of programs for SNP annotation.
It uses a program called netnglyc and predicts N-Glycosylation sites in human proteins on amino acid
using artificial neural networks that examine the sequence context of Asn-Xaa-Ser/Thr sequons:
http://www.cbs.dtu.dk/services/NetNGlyc/output.php
if the 2 alleles of a protein show a different signal then there is loss or gain of signal.
if allele1 has a prediction and allele2 doesn't, then there is loss of signal.
if allele2 has a prediction and allele1 doesn't, then there is gain of signal.
Moreover, the delta of the score between allele1 and 2 is measured. If the delta is > to the delta set by
the user, then loss? or gain? are retrieved.
=head1 DATE
20/02/2012
=head1 AUTHORS
Sabrina Rodriguez
Johann Beghain
=cut
use strict;
# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
#~ use lib '/usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/lib';
use Getopt::Long;
use Pod::Usage;
use formatAlleleSeq;
use runnetnglyc;
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netnglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/list_snps_coded_seq5_6.SR_netnglyc.res --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/netnglyc_formatted.txt --base_name "_seq"
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netnglyc_results.pl --infile /home/sigenae/work/Sabrina/horse_outputs/netnglyc_res.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/horse_outputs/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/horse_outputs/netnglyc_formatted.txt --base_name "_seq"
############################ OPTIONS / PARAMETERS ############################
my @getopt_args = (
'-infile=s' ,
'-encodingfile=s' ,
'-outputfile=s' ,
'-delta=s',
'-base_name=s'
);
my %options = ();
unless ( GetOptions( \%options, @getopt_args ) ) {
usage();
}
sub usage {
exec "pod2text $0";
exit( 1 );
}
usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'encodingfile'} );
usage() if ( !exists $options{'outputfile'} );
usage() if ( !exists $options{'delta'} );
usage() if ( !exists $options{'base_name'} );
############################ PROGRAM ############################
my $encodingfile = $options{'encodingfile'};
my $infile = $options{'infile'};
my $outputfile = $options{'outputfile'};
my $delta = $options{'delta'};
my $base_name = $options{'base_name'};
netnglyc_extract_results($delta,$infile,$encodingfile,$outputfile,$base_name);
#!/usr/bin/perl -w
=pod
=head1 NAME
extract_netoglyc_results.pl
=head1 SYNOPSIS
extract_netoglyc_results.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>
=head1 OPTIONS
--infile string, the path to the protein sequence query file - !!!the sequence name should be the short name!!!
--encodingfile string, the path of the file containing the encoded protein names (long name "..._allele1" <-> short name "_seq")
--outputfile string, the path of the output file for parsed results
--delta string, value between 0 and 1. Discriminant for the score comparison. (0.5 per default)
--base_name string, "base name" for sequence shorter name version (_seq)
=head1 DESCRIPTION
extract_netoglyc_results.pl - This program is part of a pipeline of programs for SNP annotation.
It uses a program called netoglyc and predicts O-glycosylation sites in mammalian protein on amino acid
http://www.cbs.dtu.dk/services/NetOGlyc/
if the 2 alleles of a protein show a different signal then there is loss or gain of signal.
if allele1 has a prediction and allele2 doesn't, then there is loss of signal.
if allele2 has a prediction and allele1 doesn't, then there is gain of signal.
Moreover, the delta of the score between allele1 and 2 is measured. If the delta is > to the delta set by
the user, then los? or gain? are retrieved.
=head1 DATE
20/02/2012
=head1 AUTHORS
Sabrina Rodriguez
Johann Beghain
=cut
use strict;
# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
#~ use lib '/usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/lib';
use Getopt::Long;
use Pod::Usage;
use formatAlleleSeq;
use runnetoglyc;
#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/fic/res/list_snps_coded6_1.fasta > /home/sigenae/work/Sabrina/fic/netoglyc_result.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/netoglyc_formatted.txt --base_name "_seq"
#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_netoglycPB_input/SNP_proteins_encoded3000_1.fasta > /home/sigenae/work/Sabrina/fic/netoglycPB_result.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/netoglycPB_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/netoglycPB.res --base_name "_seq"
#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_Horse/LESS-4000/SNP_proteins_encoded3000_1_l4000.fasta > /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/protseq_Horse/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_parsed.res --base_name "_seq"
#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_Horse/protNseqSizemin/list_snps_coded50_736.fasta > /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/protseq_Horse/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_parsed.res --base_name "_seq"
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile=/work/sigenae/vmergatisdev/output_repository/SR_netoglyc/162_default/output_netoglyc/SNP_proteins_encoded50_3.SR_netoglyc.res --encodingfile=/work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/96_default/names_encoding.txt --delta=0.5 --base_name=_seq --outputfile=/home/sigenae/work/Sabrina/TEST_suite/netoglyc_res.txt
# perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile=/home/sigenae/work/Sabrina/horse_outputs//SNP_proteins_encoded50_3.SR_netoglyc.res --encodingfile=/work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/162_default/names_encoding.txt --delta=0.5 --base_name=_seq --outputfile=/home/sigenae/work/Sabrina/horse_outputs/SNP_proteins_encoded50_3.SR_netoglyc_parsed.res
############################ OPTIONS / PARAMETERS ############################
my @getopt_args = (
'-infile=s' ,
'-encodingfile=s' ,
'-outputfile=s' ,,
'-delta=s',
'-base_name=s'
);
my %options = ();
unless ( GetOptions( \%options, @getopt_args ) ) {
usage();
}
sub usage {
exec "pod2text $0";
exit( 1 );
}
usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'encodingfile'} );
usage() if ( !exists $options{'outputfile'} );
usage() if ( !exists $options{'delta'} );
usage() if ( !exists $options{'base_name'} );
############################ PROGRAM ############################
my $encodingfile = $options{'encodingfile'};
my $infile = $options{'infile'};
my $outputfile = $options{'outputfile'};
my $delta = $options{'delta'};
my $base_name = $options{'base_name'};
netoglyc_extract_results($delta,$infile,$encodingfile,$outputfile,$base_name);
#!/usr/bin/perl -w
=pod
=head1 NAME
run_yinoyang.pl
=head1 SYNOPSIS
run_yinoyang.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>
=head1 OPTIONS
--infile string, the path to the protein sequence query file - !!!the sequence name should be the short name!!!
--encodingfile string, the path of the file containing the encoded protein names (long name "..._allele1" <-> short name "_seq")
--workpath string, the path to the existing working directory
--outpath string, the path to the existing output directory that will contain the result file
--delta string, value between 0 and 1. Discriminant for the score comparison. (0.5 per default)
--base_name string, "base name" for sequence shorter name version (_seq)
--execommand string, the command to execute with global path to the command
=head1 DESCRIPTION
run_yinoyang.pl - This program is part of a pipeline of programs for SNP annotation. It uses a program called yinOyang and predicts for O-ß-GlcNAc attachment sites in eukaryotic protein sequences.:
http://www.cbs.dtu.dk/services/YinOYang/output.php. It analyses files with only 1 sequence per file.
if the 2 alleles of a protein show a different signal then there is loss or gain of signal.
if allele1 has a prediction and allele2 doesn't, then there is loss of signal.
if allele2 has a prediction and allele1 doesn't, then there is gain of signal.
Moreover, the delta of the score between allele1 and 2 is measured. If the delta is > to the delta set by the user, then loss? or gain? are retrieved.
=head1 VERSION
Version 1
=head1 DATE
27/02/2012
=head1 AUTHORS
Sabrina Rodriguez
Johann Beghain
=cut
use strict;
# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
use runyinoyang;
use Getopt::Long;
use Pod::Usage;
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/run_yinoyang.pl --infile /home/sigenae/work/Sabrina/fic/res/list_snps_coded6_1.fasta --outpath /home/sigenae/work/Sabrina/fic/output_yoy --execommand /usr/local/bioinfo/bin/yinOyang --delta 0.5 --workpath /home/sigenae/work/Sabrina/fic/output_yoy --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --base_name _seq
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/run_yinoyang.pl --infile /home/sigenae/work/Sabrina/yinogyang_input/SNP_proteins_encoded3000_1.fasta --outpath /home/sigenae/work/Sabrina/yinogyang_output --execommand /usr/local/bioinfo/bin/yinOyang --delta 0.5 --workpath /home/sigenae/work/Sabrina/yinogyang_output --encodingfile /work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/21_default/names_encoding.txt --base_name _seq
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/run_yinoyang.pl --infile /home/sigenae/work/Sabrina/test_yin/SNP_proteins_encoded3000_11.fasta --outpath /home/sigenae/work/Sabrina/test_yin/result --execommand /usr/local/bioinfo/bin/yinOyang --delta 0.5 --workpath /home/sigenae/work/Sabrina/yinogyang_output --encodingfile /home/sigenae/work/Sabrina/test_yin/names_encoding.txt --base_name _seq
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/run_yinoyang.pl --infile /work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/106_default/seq_fastas_Nseq/SNP_proteins_encoded3000_16.fasta --outpath /home/sigenae/work/Sabrina/test_yin/result --execommand /usr/local/bioinfo/bin/yinOyang --delta 0.5 --workpath /home/sigenae/work/Sabrina/yinogyang_output --encodingfile /home/sigenae/work/Sabrina/test_yin/names_encoding.txt --base_name _seq
#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/run_yinoyang.pl --infile /home/sigenae/work/Sabrina/horse_outputs/horse.fasta --outpath /home/sigenae/work/Sabrina/horse_outputs/yinoyang --execommand /usr/local/bioinfo/bin/yinOyang --delta 0.5 --workpath /home/sigenae/work/Sabrina/horse_outputs/yinoyang --encodingfile /home/sigenae/work/Sabrina/horse_outputs/names_encoding.txt --base_name _seq
############################ OPTIONS / PARAMETERS ############################
my @getopt_args = (
'-infile=s' ,
'-workpath=s' ,
'-outpath=s' ,
'-execommand=s',
'-delta=s',
'-base_name=s',
'-encodingfile=s'
);
my %options = ();
unless ( GetOptions( \%options, @getopt_args ) ) {
usage();
}
sub usage {
exec "pod2text $0";
exit( 1 );
}
usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'workpath'} );
usage() if ( !exists $options{'outpath'} );
usage() if ( !exists $options{'execommand'} );
usage() if ( !exists $options{'delta'} );
usage() if ( !exists $options{'encodingfile'} );
usage() if ( !exists $options{'base_name'} );
############################ PROGRAM ############################
my @args = ();
#Set options
my $outpath = $options{'outpath'};
my $infile = $options{'infile'};
my $workpath = $options{'workpath'};
my $execommand = $options{'execommand'};
my $delta = $options{'delta'};
my $encodingfile = $options{'encodingfile'};
my $base_name = $options{'base_name'};
runyinoyang($delta,$workpath,$outpath,$infile,$execommand,$encodingfile,$base_name);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment